68#define DEBUG_TYPE "openmp-ir-builder"
75 cl::desc(
"Use optimistic attributes describing "
76 "'as-if' properties of runtime calls."),
80 "openmp-ir-builder-unroll-threshold-factor",
cl::Hidden,
81 cl::desc(
"Factor for the unroll threshold to account for code "
82 "simplifications still taking place"),
93 if (!IP1.isSet() || !IP2.isSet())
95 return IP1.getBlock() == IP2.getBlock() && IP1.getPoint() == IP2.getPoint();
100 switch (SchedType & ~OMPScheduleType::MonotonicityMask) {
101 case OMPScheduleType::UnorderedStaticChunked:
102 case OMPScheduleType::UnorderedStatic:
103 case OMPScheduleType::UnorderedDynamicChunked:
104 case OMPScheduleType::UnorderedGuidedChunked:
105 case OMPScheduleType::UnorderedRuntime:
106 case OMPScheduleType::UnorderedAuto:
107 case OMPScheduleType::UnorderedTrapezoidal:
108 case OMPScheduleType::UnorderedGreedy:
109 case OMPScheduleType::UnorderedBalanced:
110 case OMPScheduleType::UnorderedGuidedIterativeChunked:
111 case OMPScheduleType::UnorderedGuidedAnalyticalChunked:
112 case OMPScheduleType::UnorderedSteal:
113 case OMPScheduleType::UnorderedStaticBalancedChunked:
114 case OMPScheduleType::UnorderedGuidedSimd:
115 case OMPScheduleType::UnorderedRuntimeSimd:
116 case OMPScheduleType::OrderedStaticChunked:
117 case OMPScheduleType::OrderedStatic:
118 case OMPScheduleType::OrderedDynamicChunked:
119 case OMPScheduleType::OrderedGuidedChunked:
120 case OMPScheduleType::OrderedRuntime:
121 case OMPScheduleType::OrderedAuto:
122 case OMPScheduleType::OrderdTrapezoidal:
123 case OMPScheduleType::NomergeUnorderedStaticChunked:
124 case OMPScheduleType::NomergeUnorderedStatic:
125 case OMPScheduleType::NomergeUnorderedDynamicChunked:
126 case OMPScheduleType::NomergeUnorderedGuidedChunked:
127 case OMPScheduleType::NomergeUnorderedRuntime:
128 case OMPScheduleType::NomergeUnorderedAuto:
129 case OMPScheduleType::NomergeUnorderedTrapezoidal:
130 case OMPScheduleType::NomergeUnorderedGreedy:
131 case OMPScheduleType::NomergeUnorderedBalanced:
132 case OMPScheduleType::NomergeUnorderedGuidedIterativeChunked:
133 case OMPScheduleType::NomergeUnorderedGuidedAnalyticalChunked:
134 case OMPScheduleType::NomergeUnorderedSteal:
135 case OMPScheduleType::NomergeOrderedStaticChunked:
136 case OMPScheduleType::NomergeOrderedStatic:
137 case OMPScheduleType::NomergeOrderedDynamicChunked:
138 case OMPScheduleType::NomergeOrderedGuidedChunked:
139 case OMPScheduleType::NomergeOrderedRuntime:
140 case OMPScheduleType::NomergeOrderedAuto:
141 case OMPScheduleType::NomergeOrderedTrapezoidal:
142 case OMPScheduleType::OrderedDistributeChunked:
143 case OMPScheduleType::OrderedDistribute:
151 SchedType & OMPScheduleType::MonotonicityMask;
152 if (MonotonicityFlags == OMPScheduleType::MonotonicityMask)
164 Builder.restoreIP(IP);
174 Kernel->getFnAttribute(
"target-features").getValueAsString();
175 if (Features.
count(
"+wavefrontsize64"))
190 bool HasSimdModifier,
bool HasDistScheduleChunks) {
192 switch (ClauseKind) {
193 case OMP_SCHEDULE_Default:
194 case OMP_SCHEDULE_Static:
195 return HasChunks ? OMPScheduleType::BaseStaticChunked
196 : OMPScheduleType::BaseStatic;
197 case OMP_SCHEDULE_Dynamic:
198 return OMPScheduleType::BaseDynamicChunked;
199 case OMP_SCHEDULE_Guided:
200 return HasSimdModifier ? OMPScheduleType::BaseGuidedSimd
201 : OMPScheduleType::BaseGuidedChunked;
202 case OMP_SCHEDULE_Auto:
204 case OMP_SCHEDULE_Runtime:
205 return HasSimdModifier ? OMPScheduleType::BaseRuntimeSimd
206 : OMPScheduleType::BaseRuntime;
207 case OMP_SCHEDULE_Distribute:
208 return HasDistScheduleChunks ? OMPScheduleType::BaseDistributeChunked
209 : OMPScheduleType::BaseDistribute;
217 bool HasOrderedClause) {
218 assert((BaseScheduleType & OMPScheduleType::ModifierMask) ==
219 OMPScheduleType::None &&
220 "Must not have ordering nor monotonicity flags already set");
223 ? OMPScheduleType::ModifierOrdered
224 : OMPScheduleType::ModifierUnordered;
225 OMPScheduleType OrderingScheduleType = BaseScheduleType | OrderingModifier;
228 if (OrderingScheduleType ==
229 (OMPScheduleType::BaseGuidedSimd | OMPScheduleType::ModifierOrdered))
230 return OMPScheduleType::OrderedGuidedChunked;
231 else if (OrderingScheduleType == (OMPScheduleType::BaseRuntimeSimd |
232 OMPScheduleType::ModifierOrdered))
233 return OMPScheduleType::OrderedRuntime;
235 return OrderingScheduleType;
241 bool HasSimdModifier,
bool HasMonotonic,
242 bool HasNonmonotonic,
bool HasOrderedClause) {
243 assert((ScheduleType & OMPScheduleType::MonotonicityMask) ==
244 OMPScheduleType::None &&
245 "Must not have monotonicity flags already set");
246 assert((!HasMonotonic || !HasNonmonotonic) &&
247 "Monotonic and Nonmonotonic are contradicting each other");
250 return ScheduleType | OMPScheduleType::ModifierMonotonic;
251 }
else if (HasNonmonotonic) {
252 return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
262 if ((BaseScheduleType == OMPScheduleType::BaseStatic) ||
263 (BaseScheduleType == OMPScheduleType::BaseStaticChunked) ||
269 return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
277 bool HasSimdModifier,
bool HasMonotonicModifier,
278 bool HasNonmonotonicModifier,
bool HasOrderedClause,
279 bool HasDistScheduleChunks) {
281 ClauseKind, HasChunks, HasSimdModifier, HasDistScheduleChunks);
285 OrderedSchedule, HasSimdModifier, HasMonotonicModifier,
286 HasNonmonotonicModifier, HasOrderedClause);
301 assert(!Br->isConditional() &&
302 "BB's terminator must be an unconditional branch (or degenerate)");
305 Br->setSuccessor(0,
Target);
310 NewBr->setDebugLoc(
DL);
315 assert(New->getFirstInsertionPt() == New->begin() &&
316 "Target BB must not have PHI nodes");
332 New->splice(New->begin(), Old, IP.
getPoint(), Old->
end());
336 NewBr->setDebugLoc(
DL);
348 Builder.SetInsertPoint(Old);
352 Builder.SetCurrentDebugLocation(
DebugLoc);
362 New->replaceSuccessorsPhiUsesWith(Old, New);
371 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
373 Builder.SetInsertPoint(Builder.GetInsertBlock());
376 Builder.SetCurrentDebugLocation(
DebugLoc);
385 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
387 Builder.SetInsertPoint(Builder.GetInsertBlock());
390 Builder.SetCurrentDebugLocation(
DebugLoc);
407 const Twine &Name =
"",
bool AsPtr =
true,
408 bool Is64Bit =
false) {
409 Builder.restoreIP(OuterAllocaIP);
413 Builder.CreateAlloca(IntTy,
nullptr, Name +
".addr");
417 FakeVal = FakeValAddr;
419 FakeVal = Builder.CreateLoad(IntTy, FakeValAddr, Name +
".val");
424 Builder.restoreIP(InnerAllocaIP);
427 UseFakeVal = Builder.CreateLoad(IntTy, FakeVal, Name +
".use");
430 FakeVal, Is64Bit ? Builder.getInt64(10) : Builder.getInt32(10)));
443enum OpenMPOffloadingRequiresDirFlags {
445 OMP_REQ_UNDEFINED = 0x000,
447 OMP_REQ_NONE = 0x001,
449 OMP_REQ_REVERSE_OFFLOAD = 0x002,
451 OMP_REQ_UNIFIED_ADDRESS = 0x004,
453 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008,
455 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010,
462 : RequiresFlags(OMP_REQ_UNDEFINED) {}
466 bool HasRequiresReverseOffload,
bool HasRequiresUnifiedAddress,
467 bool HasRequiresUnifiedSharedMemory,
bool HasRequiresDynamicAllocators)
470 RequiresFlags(OMP_REQ_UNDEFINED) {
471 if (HasRequiresReverseOffload)
472 RequiresFlags |= OMP_REQ_REVERSE_OFFLOAD;
473 if (HasRequiresUnifiedAddress)
474 RequiresFlags |= OMP_REQ_UNIFIED_ADDRESS;
475 if (HasRequiresUnifiedSharedMemory)
476 RequiresFlags |= OMP_REQ_UNIFIED_SHARED_MEMORY;
477 if (HasRequiresDynamicAllocators)
478 RequiresFlags |= OMP_REQ_DYNAMIC_ALLOCATORS;
482 return RequiresFlags & OMP_REQ_REVERSE_OFFLOAD;
486 return RequiresFlags & OMP_REQ_UNIFIED_ADDRESS;
490 return RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY;
494 return RequiresFlags & OMP_REQ_DYNAMIC_ALLOCATORS;
499 :
static_cast<int64_t
>(OMP_REQ_NONE);
504 RequiresFlags |= OMP_REQ_REVERSE_OFFLOAD;
506 RequiresFlags &= ~OMP_REQ_REVERSE_OFFLOAD;
511 RequiresFlags |= OMP_REQ_UNIFIED_ADDRESS;
513 RequiresFlags &= ~OMP_REQ_UNIFIED_ADDRESS;
518 RequiresFlags |= OMP_REQ_UNIFIED_SHARED_MEMORY;
520 RequiresFlags &= ~OMP_REQ_UNIFIED_SHARED_MEMORY;
525 RequiresFlags |= OMP_REQ_DYNAMIC_ALLOCATORS;
527 RequiresFlags &= ~OMP_REQ_DYNAMIC_ALLOCATORS;
540 constexpr size_t MaxDim = 3;
545 Value *DynCGroupMemFallbackFlag =
547 DynCGroupMemFallbackFlag =
Builder.CreateShl(DynCGroupMemFallbackFlag, 2);
548 Value *Flags =
Builder.CreateOr(HasNoWaitFlag, DynCGroupMemFallbackFlag);
554 Value *NumThreads3D =
585 auto FnAttrs = Attrs.getFnAttrs();
586 auto RetAttrs = Attrs.getRetAttrs();
588 for (
size_t ArgNo = 0; ArgNo < Fn.
arg_size(); ++ArgNo)
593 bool Param =
true) ->
void {
594 bool HasSignExt = AS.hasAttribute(Attribute::SExt);
595 bool HasZeroExt = AS.hasAttribute(Attribute::ZExt);
596 if (HasSignExt || HasZeroExt) {
597 assert(AS.getNumAttributes() == 1 &&
598 "Currently not handling extension attr combined with others.");
600 if (
auto AK = TargetLibraryInfo::getExtAttrForI32Param(
T, HasSignExt))
603 TargetLibraryInfo::getExtAttrForI32Return(
T, HasSignExt))
610#define OMP_ATTRS_SET(VarName, AttrSet) AttributeSet VarName = AttrSet;
611#include "llvm/Frontend/OpenMP/OMPKinds.def"
615#define OMP_RTL_ATTRS(Enum, FnAttrSet, RetAttrSet, ArgAttrSets) \
617 FnAttrs = FnAttrs.addAttributes(Ctx, FnAttrSet); \
618 addAttrSet(RetAttrs, RetAttrSet, false); \
619 for (size_t ArgNo = 0; ArgNo < ArgAttrSets.size(); ++ArgNo) \
620 addAttrSet(ArgAttrs[ArgNo], ArgAttrSets[ArgNo]); \
621 Fn.setAttributes(AttributeList::get(Ctx, FnAttrs, RetAttrs, ArgAttrs)); \
623#include "llvm/Frontend/OpenMP/OMPKinds.def"
637#define OMP_RTL(Enum, Str, IsVarArg, ReturnType, ...) \
639 FnTy = FunctionType::get(ReturnType, ArrayRef<Type *>{__VA_ARGS__}, \
641 Fn = M.getFunction(Str); \
643#include "llvm/Frontend/OpenMP/OMPKinds.def"
649#define OMP_RTL(Enum, Str, ...) \
651 Fn = Function::Create(FnTy, GlobalValue::ExternalLinkage, Str, M); \
653#include "llvm/Frontend/OpenMP/OMPKinds.def"
657 if (FnID == OMPRTL___kmpc_fork_call || FnID == OMPRTL___kmpc_fork_teams) {
667 LLVMContext::MD_callback,
669 2, {-1, -1},
true)}));
682 assert(Fn &&
"Failed to create OpenMP runtime function");
693 Builder.SetInsertPoint(FiniBB);
705 FiniBB = OtherFiniBB;
707 Builder.SetInsertPoint(FiniBB->getFirstNonPHIIt());
715 auto EndIt = FiniBB->end();
716 if (FiniBB->size() >= 1)
717 if (
auto Prev = std::prev(EndIt); Prev->isTerminator())
722 FiniBB->replaceAllUsesWith(OtherFiniBB);
723 FiniBB->eraseFromParent();
724 FiniBB = OtherFiniBB;
731 assert(Fn &&
"Failed to create OpenMP runtime function pointer");
754 for (
auto Inst =
Block->getReverseIterator()->begin();
755 Inst !=
Block->getReverseIterator()->end();) {
784 Block.getParent()->getEntryBlock().getTerminator()->getIterator();
802 ParallelRegionBlockSet.
clear();
804 OI.collectBlocks(ParallelRegionBlockSet, Blocks);
814 bool ArgsInZeroAddressSpace =
Config.isTargetDevice();
823 ".omp_par", ArgsInZeroAddressSpace);
827 <<
" Exit: " << OI.ExitBB->getName() <<
"\n");
828 assert(Extractor.isEligible() &&
829 "Expected OpenMP outlining to be possible!");
831 for (
auto *V : OI.ExcludeArgsFromAggregate)
832 Extractor.excludeArgFromAggregate(V);
835 Extractor.extractCodeRegion(CEAC, OI.Inputs, OI.Outputs);
839 if (TargetCpuAttr.isStringAttribute())
842 auto TargetFeaturesAttr = OuterFn->
getFnAttribute(
"target-features");
843 if (TargetFeaturesAttr.isStringAttribute())
844 OutlinedFn->
addFnAttr(TargetFeaturesAttr);
847 LLVM_DEBUG(
dbgs() <<
" Outlined function: " << *OutlinedFn <<
"\n");
849 "OpenMP outlined functions should not return a value!");
854 M.getFunctionList().insertAfter(OuterFn->
getIterator(), OutlinedFn);
861 assert(OI.EntryBB->getUniquePredecessor() == &ArtificialEntry);
868 "Expected instructions to add in the outlined region entry");
870 End = ArtificialEntry.
rend();
875 if (
I.isTerminator()) {
877 if (OI.EntryBB->getTerminator())
878 OI.EntryBB->getTerminator()->adoptDbgRecords(
879 &ArtificialEntry,
I.getIterator(),
false);
883 I.moveBeforePreserving(*OI.EntryBB, OI.EntryBB->getFirstInsertionPt());
886 OI.EntryBB->moveBefore(&ArtificialEntry);
893 if (OI.PostOutlineCB)
894 OI.PostOutlineCB(*OutlinedFn);
896 if (OI.FixUpNonEntryAllocas) {
932 errs() <<
"Error of kind: " << Kind
933 <<
" when emitting offload entries and metadata during "
934 "OMPIRBuilder finalization \n";
940 if (
Config.EmitLLVMUsedMetaInfo.value_or(
false)) {
941 std::vector<WeakTrackingVH> LLVMCompilerUsed = {
942 M.getGlobalVariable(
"__openmp_nvptx_data_transfer_temporary_storage")};
943 emitUsed(
"llvm.compiler.used", LLVMCompilerUsed);
960 ConstantInt::get(I32Ty,
Value), Name);
973 for (
unsigned I = 0, E =
List.size();
I != E; ++
I)
977 if (UsedArray.
empty())
984 GV->setSection(
"llvm.metadata");
990 auto *Int8Ty =
Builder.getInt8Ty();
993 ConstantInt::get(Int8Ty, Mode),
Twine(KernelName,
"_exec_mode"));
1001 unsigned Reserve2Flags) {
1003 LocFlags |= OMP_IDENT_FLAG_KMPC;
1010 ConstantInt::get(Int32,
uint32_t(LocFlags)),
1011 ConstantInt::get(Int32, Reserve2Flags),
1012 ConstantInt::get(Int32, SrcLocStrSize), SrcLocStr};
1014 size_t SrcLocStrArgIdx = 4;
1015 if (OpenMPIRBuilder::Ident->getElementType(SrcLocStrArgIdx)
1019 SrcLocStr, OpenMPIRBuilder::Ident->getElementType(SrcLocStrArgIdx));
1026 if (
GV.getValueType() == OpenMPIRBuilder::Ident &&
GV.hasInitializer())
1027 if (
GV.getInitializer() == Initializer)
1032 M, OpenMPIRBuilder::Ident,
1035 M.getDataLayout().getDefaultGlobalsAddressSpace());
1047 SrcLocStrSize = LocStr.
size();
1056 if (
GV.isConstant() &&
GV.hasInitializer() &&
1057 GV.getInitializer() == Initializer)
1060 SrcLocStr =
Builder.CreateGlobalString(
1061 LocStr,
"",
M.getDataLayout().getDefaultGlobalsAddressSpace(),
1069 unsigned Line,
unsigned Column,
1075 Buffer.
append(FunctionName);
1077 Buffer.
append(std::to_string(Line));
1079 Buffer.
append(std::to_string(Column));
1087 StringRef UnknownLoc =
";unknown;unknown;0;0;;";
1098 if (
DIFile *DIF = DIL->getFile())
1099 if (std::optional<StringRef> Source = DIF->getSource())
1105 DIL->getColumn(), SrcLocStrSize);
1111 Loc.IP.getBlock()->getParent());
1117 "omp_global_thread_num");
1122 bool ForceSimpleCall,
bool CheckCancelFlag) {
1132 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_FOR;
1135 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SECTIONS;
1138 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SINGLE;
1141 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_EXPL;
1144 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL;
1157 bool UseCancelBarrier =
1162 ? OMPRTL___kmpc_cancel_barrier
1163 : OMPRTL___kmpc_barrier),
1166 if (UseCancelBarrier && CheckCancelFlag)
1176 omp::Directive CanceledDirective) {
1181 auto *UI =
Builder.CreateUnreachable();
1189 Builder.SetInsertPoint(ElseTI);
1190 auto ElseIP =
Builder.saveIP();
1198 Builder.SetInsertPoint(ThenTI);
1200 Value *CancelKind =
nullptr;
1201 switch (CanceledDirective) {
1202#define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
1203 case DirectiveEnum: \
1204 CancelKind = Builder.getInt32(Value); \
1206#include "llvm/Frontend/OpenMP/OMPKinds.def"
1223 Builder.SetInsertPoint(UI->getParent());
1224 UI->eraseFromParent();
1231 omp::Directive CanceledDirective) {
1236 auto *UI =
Builder.CreateUnreachable();
1239 Value *CancelKind =
nullptr;
1240 switch (CanceledDirective) {
1241#define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
1242 case DirectiveEnum: \
1243 CancelKind = Builder.getInt32(Value); \
1245#include "llvm/Frontend/OpenMP/OMPKinds.def"
1262 Builder.SetInsertPoint(UI->getParent());
1263 UI->eraseFromParent();
1276 auto *KernelArgsPtr =
1277 Builder.CreateAlloca(OpenMPIRBuilder::KernelArgs,
nullptr,
"kernel_args");
1282 Builder.CreateStructGEP(OpenMPIRBuilder::KernelArgs, KernelArgsPtr,
I);
1285 M.getDataLayout().getPrefTypeAlign(KernelArgs[
I]->getType()));
1289 NumThreads, HostPtr, KernelArgsPtr};
1316 assert(OutlinedFnID &&
"Invalid outlined function ID!");
1320 Value *Return =
nullptr;
1340 Builder, AllocaIP, Return, RTLoc, DeviceID, Args.NumTeams.front(),
1341 Args.NumThreads.front(), OutlinedFnID, ArgsVector));
1348 Builder.CreateCondBr(
Failed, OffloadFailedBlock, OffloadContBlock);
1350 auto CurFn =
Builder.GetInsertBlock()->getParent();
1357 emitBlock(OffloadContBlock, CurFn,
true);
1362 Value *CancelFlag, omp::Directive CanceledDirective) {
1364 "Unexpected cancellation!");
1384 Builder.CreateCondBr(Cmp, NonCancellationBlock, CancellationBlock,
1393 Builder.SetInsertPoint(CancellationBlock);
1394 Builder.CreateBr(*FiniBBOrErr);
1397 Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->
begin());
1416 OutlinedFn.
addFnAttr(Attribute::NoUnwind);
1419 "Expected at least tid and bounded tid as arguments");
1420 unsigned NumCapturedVars = OutlinedFn.
arg_size() - 2;
1423 assert(CI &&
"Expected call instruction to outlined function");
1424 CI->
getParent()->setName(
"omp_parallel");
1426 Builder.SetInsertPoint(CI);
1427 Type *PtrTy = OMPIRBuilder->VoidPtr;
1431 OpenMPIRBuilder ::InsertPointTy CurrentIP = Builder.saveIP();
1435 Value *Args = ArgsAlloca;
1439 Args = Builder.CreatePointerCast(ArgsAlloca, PtrTy);
1440 Builder.restoreIP(CurrentIP);
1443 for (
unsigned Idx = 0; Idx < NumCapturedVars; Idx++) {
1445 Value *StoreAddress = Builder.CreateConstInBoundsGEP2_64(
1447 Builder.CreateStore(V, StoreAddress);
1451 IfCondition ? Builder.CreateSExtOrTrunc(IfCondition, OMPIRBuilder->Int32)
1452 : Builder.getInt32(1);
1455 Value *Parallel60CallArgs[] = {
1459 NumThreads ? NumThreads : Builder.getInt32(-1),
1460 Builder.getInt32(-1),
1464 Builder.getInt64(NumCapturedVars),
1465 Builder.getInt32(0)};
1473 << *Builder.GetInsertBlock()->getParent() <<
"\n");
1476 Builder.SetInsertPoint(PrivTID);
1478 Builder.CreateStore(Builder.CreateLoad(OMPIRBuilder->Int32, OutlinedAI),
1485 I->eraseFromParent();
1508 if (!
F->hasMetadata(LLVMContext::MD_callback)) {
1516 F->addMetadata(LLVMContext::MD_callback,
1525 OutlinedFn.
addFnAttr(Attribute::NoUnwind);
1528 "Expected at least tid and bounded tid as arguments");
1529 unsigned NumCapturedVars = OutlinedFn.
arg_size() - 2;
1532 CI->
getParent()->setName(
"omp_parallel");
1533 Builder.SetInsertPoint(CI);
1536 Value *ForkCallArgs[] = {Ident, Builder.getInt32(NumCapturedVars),
1540 RealArgs.
append(std::begin(ForkCallArgs), std::end(ForkCallArgs));
1542 Value *
Cond = Builder.CreateSExtOrTrunc(IfCondition, OMPIRBuilder->Int32);
1549 auto PtrTy = OMPIRBuilder->VoidPtr;
1550 if (IfCondition && NumCapturedVars == 0) {
1558 << *Builder.GetInsertBlock()->getParent() <<
"\n");
1561 Builder.SetInsertPoint(PrivTID);
1563 Builder.CreateStore(Builder.CreateLoad(OMPIRBuilder->Int32, OutlinedAI),
1570 I->eraseFromParent();
1578 omp::ProcBindKind ProcBind,
bool IsCancellable) {
1593 bool ArgsInZeroAddressSpace =
Config.isTargetDevice();
1597 if (NumThreads && !
Config.isTargetDevice()) {
1600 Builder.CreateIntCast(NumThreads, Int32,
false)};
1605 if (ProcBind != OMP_PROC_BIND_default) {
1609 ConstantInt::get(Int32,
unsigned(ProcBind),
true)};
1631 Builder.CreateAlloca(Int32,
nullptr,
"zero.addr");
1634 if (ArgsInZeroAddressSpace &&
M.getDataLayout().getAllocaAddrSpace() != 0) {
1637 TIDAddrAlloca, PointerType ::get(
M.getContext(), 0),
"tid.addr.ascast");
1641 PointerType ::get(
M.getContext(), 0),
1642 "zero.addr.ascast");
1666 if (IP.getBlock()->end() == IP.getPoint()) {
1672 assert(IP.getBlock()->getTerminator()->getNumSuccessors() == 1 &&
1673 IP.getBlock()->getTerminator()->getSuccessor(0) == PRegExitBB &&
1674 "Unexpected insertion point for finalization call!");
1686 Builder.CreateAlloca(Int32,
nullptr,
"tid.addr.local");
1692 Builder.CreateLoad(Int32, ZeroAddr,
"zero.addr.use");
1710 LLVM_DEBUG(
dbgs() <<
"Before body codegen: " << *OuterFn <<
"\n");
1713 assert(BodyGenCB &&
"Expected body generation callback!");
1715 if (
Error Err = BodyGenCB(InnerAllocaIP, CodeGenIP))
1718 LLVM_DEBUG(
dbgs() <<
"After body codegen: " << *OuterFn <<
"\n");
1721 if (
Config.isTargetDevice()) {
1724 std::move(ToBeDeleted)](
Function &OutlinedFn) {
1726 IfCondition, NumThreads, PrivTID, PrivTIDAddr,
1727 ThreadID, ToBeDeletedVec);
1733 std::move(ToBeDeleted)](
Function &OutlinedFn) {
1735 PrivTID, PrivTIDAddr, ToBeDeletedVec);
1757 ".omp_par", ArgsInZeroAddressSpace);
1762 Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
1764 Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands,
1769 return GV->getValueType() == OpenMPIRBuilder::Ident;
1774 LLVM_DEBUG(
dbgs() <<
"Before privatization: " << *OuterFn <<
"\n");
1780 if (&V == TIDAddr || &V == ZeroAddr) {
1786 for (
Use &U : V.uses())
1788 if (ParallelRegionBlockSet.
count(UserI->getParent()))
1798 if (!V.getType()->isPointerTy()) {
1802 Builder.restoreIP(OuterAllocaIP);
1804 Builder.CreateAlloca(V.getType(),
nullptr, V.getName() +
".reloaded");
1808 Builder.SetInsertPoint(InsertBB,
1813 Builder.restoreIP(InnerAllocaIP);
1814 Inner =
Builder.CreateLoad(V.getType(), Ptr);
1817 Value *ReplacementValue =
nullptr;
1820 ReplacementValue = PrivTID;
1823 PrivCB(InnerAllocaIP,
Builder.saveIP(), V, *Inner, ReplacementValue);
1831 assert(ReplacementValue &&
1832 "Expected copy/create callback to set replacement value!");
1833 if (ReplacementValue == &V)
1838 UPtr->set(ReplacementValue);
1863 for (
Value *Output : Outputs)
1867 "OpenMP outlining should not produce live-out values!");
1869 LLVM_DEBUG(
dbgs() <<
"After privatization: " << *OuterFn <<
"\n");
1871 for (
auto *BB : Blocks)
1872 dbgs() <<
" PBR: " << BB->getName() <<
"\n";
1880 assert(FiniInfo.DK == OMPD_parallel &&
1881 "Unexpected finalization stack state!");
1892 Builder.CreateBr(*FiniBBOrErr);
1896 Term->eraseFromParent();
1902 InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end());
1903 UI->eraseFromParent();
1970 if (Dependencies.
empty())
1990 Type *DependInfo = OMPBuilder.DependInfo;
1993 Value *DepArray =
nullptr;
1995 Builder.SetInsertPoint(
1999 DepArray = Builder.CreateAlloca(DepArrayTy,
nullptr,
".dep.arr.addr");
2001 Builder.restoreIP(OldIP);
2003 for (
const auto &[DepIdx, Dep] :
enumerate(Dependencies)) {
2005 Builder.CreateConstInBoundsGEP2_64(DepArrayTy, DepArray, 0, DepIdx);
2007 Value *Addr = Builder.CreateStructGEP(
2009 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr));
2010 Value *DepValPtr = Builder.CreatePtrToInt(Dep.DepVal, Builder.getInt64Ty());
2011 Builder.CreateStore(DepValPtr, Addr);
2014 DependInfo,
Base,
static_cast<unsigned int>(RTLDependInfoFields::Len));
2015 Builder.CreateStore(
2016 Builder.getInt64(M.getDataLayout().getTypeStoreSize(Dep.DepValueType)),
2019 Value *Flags = Builder.CreateStructGEP(
2021 static_cast<unsigned int>(RTLDependInfoFields::Flags));
2022 Builder.CreateStore(
2023 ConstantInt::get(Builder.getInt8Ty(),
2024 static_cast<unsigned int>(Dep.DepKind)),
2031Expected<Value *> OpenMPIRBuilder::createTaskDuplicationFunction(
2033 unsigned ProgramAddressSpace =
M.getDataLayout().getProgramAddressSpace();
2048 Builder.getVoidTy(), {VoidPtrTy, VoidPtrTy, Builder.getInt32Ty()},
2052 "omp_taskloop_dup",
M);
2055 Value *LastprivateFlagArg = DupFunction->
getArg(2);
2056 DestTaskArg->
setName(
"dest_task");
2057 SrcTaskArg->
setName(
"src_task");
2058 LastprivateFlagArg->
setName(
"lastprivate_flag");
2060 IRBuilderBase::InsertPointGuard Guard(
Builder);
2064 auto GetTaskContextPtrFromArg = [&](
Value *Arg) ->
Value * {
2065 Type *TaskWithPrivatesTy =
2068 TaskWithPrivatesTy, Arg, {
Builder.getInt32(0),
Builder.getInt32(1)});
2070 PrivatesTy, TaskPrivates,
2075 Value *DestTaskContextPtr = GetTaskContextPtrFromArg(DestTaskArg);
2076 Value *SrcTaskContextPtr = GetTaskContextPtrFromArg(SrcTaskArg);
2078 DestTaskContextPtr->
setName(
"destPtr");
2079 SrcTaskContextPtr->
setName(
"srcPtr");
2084 Expected<IRBuilderBase::InsertPoint> AfterIPOrError =
2085 DupCB(AllocaIP, CodeGenIP, DestTaskContextPtr, SrcTaskContextPtr);
2086 if (!AfterIPOrError)
2088 Builder.restoreIP(*AfterIPOrError);
2098 llvm::function_ref<llvm::Expected<llvm::CanonicalLoopInfo *>()> LoopInfo,
2100 Value *GrainSize,
bool NoGroup,
int Sched,
Value *Final,
bool Mergeable,
2106 uint32_t SrcLocStrSize;
2122 if (
Error Err = BodyGenCB(TaskloopAllocaIP, TaskloopBodyIP))
2125 llvm::Expected<llvm::CanonicalLoopInfo *> result = LoopInfo();
2130 llvm::CanonicalLoopInfo *CLI = result.
get();
2132 OI.
EntryBB = TaskloopAllocaBB;
2133 OI.OuterAllocaBB = AllocaIP.getBlock();
2134 OI.ExitBB = TaskloopExitBB;
2140 Builder, AllocaIP, ToBeDeleted, TaskloopAllocaIP,
"global.tid",
false));
2142 TaskloopAllocaIP,
"lb",
false,
true);
2144 TaskloopAllocaIP,
"ub",
false,
true);
2146 TaskloopAllocaIP,
"step",
false,
true);
2149 OI.Inputs.insert(FakeLB);
2150 OI.Inputs.insert(FakeUB);
2151 OI.Inputs.insert(FakeStep);
2152 if (TaskContextStructPtrVal)
2153 OI.Inputs.insert(TaskContextStructPtrVal);
2154 assert(((TaskContextStructPtrVal && DupCB) ||
2155 (!TaskContextStructPtrVal && !DupCB)) &&
2156 "Task context struct ptr and duplication callback must be both set "
2162 unsigned ProgramAddressSpace =
M.getDataLayout().getProgramAddressSpace();
2166 {FakeLB->getType(), FakeUB->getType(), FakeStep->getType(), PointerTy});
2167 Expected<Value *> TaskDupFnOrErr = createTaskDuplicationFunction(
2170 if (!TaskDupFnOrErr) {
2173 Value *TaskDupFn = *TaskDupFnOrErr;
2175 OI.PostOutlineCB = [
this, Ident, LBVal, UBVal, StepVal, Untied,
2176 TaskloopAllocaBB, CLI, Loc, TaskDupFn, ToBeDeleted,
2177 IfCond, GrainSize, NoGroup, Sched, FakeLB, FakeUB,
2178 FakeStep, Final, Mergeable,
2179 Priority](
Function &OutlinedFn)
mutable {
2181 assert(OutlinedFn.hasOneUse() &&
2182 "there must be a single user for the outlined function");
2188 IRBuilderBase::InsertPoint CurrentIp =
Builder.saveIP();
2190 Value *CastedLBVal =
2191 Builder.CreateIntCast(LBVal,
Builder.getInt64Ty(),
true,
"lb64");
2192 Value *CastedUBVal =
2193 Builder.CreateIntCast(UBVal,
Builder.getInt64Ty(),
true,
"ub64");
2194 Value *CastedStepVal =
2195 Builder.CreateIntCast(StepVal,
Builder.getInt64Ty(),
true,
"step64");
2198 Builder.SetInsertPoint(StaleCI);
2211 Builder.CreateCall(TaskgroupFn, {Ident, ThreadID});
2232 divideCeil(
M.getDataLayout().getTypeSizeInBits(Task), 8));
2234 AllocaInst *ArgStructAlloca =
2236 assert(ArgStructAlloca &&
2237 "Unable to find the alloca instruction corresponding to arguments "
2238 "for extracted function");
2239 StructType *ArgStructType =
2241 assert(ArgStructType &&
"Unable to find struct type corresponding to "
2242 "arguments for extracted function");
2243 Value *SharedsSize =
2244 Builder.getInt64(
M.getDataLayout().getTypeStoreSize(ArgStructType));
2249 CallInst *TaskData =
Builder.CreateCall(
2250 TaskAllocFn, {Ident, ThreadID,
Flags,
2251 TaskSize, SharedsSize,
2256 Value *TaskShareds =
Builder.CreateLoad(VoidPtr, TaskData);
2257 Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment,
2262 ArgStructType, TaskShareds, {
Builder.getInt32(0),
Builder.getInt32(0)});
2265 ArgStructType, TaskShareds, {
Builder.getInt32(0),
Builder.getInt32(1)});
2268 ArgStructType, TaskShareds, {
Builder.getInt32(0),
Builder.getInt32(2)});
2274 IfCond ?
Builder.CreateIntCast(IfCond,
Builder.getInt32Ty(),
true)
2280 Value *GrainSizeVal =
2281 GrainSize ?
Builder.CreateIntCast(GrainSize,
Builder.getInt64Ty(),
true)
2283 Value *TaskDup = TaskDupFn;
2285 Value *
Args[] = {Ident, ThreadID, TaskData, IfCondVal, Lb, Ub,
2286 Loadstep, NoGroupVal, SchedVal, GrainSizeVal, TaskDup};
2291 Builder.CreateCall(TaskloopFn, Args);
2298 Builder.CreateCall(EndTaskgroupFn, {Ident, ThreadID});
2303 Builder.SetInsertPoint(TaskloopAllocaBB, TaskloopAllocaBB->begin());
2305 LoadInst *SharedsOutlined =
2306 Builder.CreateLoad(VoidPtr, OutlinedFn.getArg(1));
2307 OutlinedFn.getArg(1)->replaceUsesWithIf(
2309 [SharedsOutlined](Use &U) {
return U.getUser() != SharedsOutlined; });
2312 Type *IVTy =
IV->getType();
2318 Value *TaskLB =
nullptr;
2319 Value *TaskUB =
nullptr;
2320 Value *LoadTaskLB =
nullptr;
2321 Value *LoadTaskUB =
nullptr;
2322 for (Instruction &
I : *TaskloopAllocaBB) {
2323 if (
I.getOpcode() == Instruction::GetElementPtr) {
2326 switch (CI->getZExtValue()) {
2335 }
else if (
I.getOpcode() == Instruction::Load) {
2337 if (
Load.getPointerOperand() == TaskLB) {
2338 assert(TaskLB !=
nullptr &&
"Expected value for TaskLB");
2340 }
else if (
Load.getPointerOperand() == TaskUB) {
2341 assert(TaskUB !=
nullptr &&
"Expected value for TaskUB");
2347 Builder.SetInsertPoint(CLI->getPreheader()->getTerminator());
2349 assert(LoadTaskLB !=
nullptr &&
"Expected value for LoadTaskLB");
2350 assert(LoadTaskUB !=
nullptr &&
"Expected value for LoadTaskUB");
2351 Value *TripCountMinusOne =
2352 Builder.CreateSDiv(
Builder.CreateSub(LoadTaskUB, LoadTaskLB), FakeStep);
2353 Value *TripCount =
Builder.CreateAdd(TripCountMinusOne, One,
"trip_cnt");
2354 Value *CastedTripCount =
Builder.CreateIntCast(TripCount, IVTy,
true);
2355 Value *CastedTaskLB =
Builder.CreateIntCast(LoadTaskLB, IVTy,
true);
2357 CLI->setTripCount(CastedTripCount);
2359 Builder.SetInsertPoint(CLI->getBody(),
2360 CLI->getBody()->getFirstInsertionPt());
2376 assert(CLI->getIndVar()->getNumUses() == 3 &&
2377 "Canonical loop should have exactly three uses of the ind var");
2378 for (User *IVUser : CLI->getIndVar()->users()) {
2380 if (
Mul->getOpcode() == Instruction::Mul) {
2381 for (User *MulUser :
Mul->users()) {
2383 if (
Add->getOpcode() == Instruction::Add) {
2384 Add->setOperand(1, CastedTaskLB);
2392 FakeLB->replaceAllUsesWith(CastedLBVal);
2393 FakeUB->replaceAllUsesWith(CastedUBVal);
2394 FakeStep->replaceAllUsesWith(CastedStepVal);
2396 I->eraseFromParent();
2401 Builder.SetInsertPoint(TaskloopExitBB, TaskloopExitBB->
begin());
2441 if (
Error Err = BodyGenCB(TaskAllocaIP, TaskBodyIP))
2452 Builder, AllocaIP, ToBeDeleted, TaskAllocaIP,
"global.tid",
false));
2454 OI.
PostOutlineCB = [
this, Ident, Tied, Final, IfCondition, Dependencies,
2455 Mergeable, Priority, EventHandle, TaskAllocaBB,
2456 ToBeDeleted](
Function &OutlinedFn)
mutable {
2458 assert(OutlinedFn.hasOneUse() &&
2459 "there must be a single user for the outlined function");
2464 bool HasShareds = StaleCI->
arg_size() > 1;
2465 Builder.SetInsertPoint(StaleCI);
2490 Flags =
Builder.CreateOr(FinalFlag, Flags);
2503 divideCeil(
M.getDataLayout().getTypeSizeInBits(Task), 8));
2512 assert(ArgStructAlloca &&
2513 "Unable to find the alloca instruction corresponding to arguments "
2514 "for extracted function");
2517 assert(ArgStructType &&
"Unable to find struct type corresponding to "
2518 "arguments for extracted function");
2520 Builder.getInt64(
M.getDataLayout().getTypeStoreSize(ArgStructType));
2526 TaskAllocFn, {Ident, ThreadID, Flags,
2527 TaskSize, SharedsSize,
2535 OMPRTL___kmpc_task_allow_completion_event);
2539 Builder.CreatePointerBitCastOrAddrSpaceCast(EventHandle,
2541 EventVal =
Builder.CreatePtrToInt(EventVal,
Builder.getInt64Ty());
2542 Builder.CreateStore(EventVal, EventHandleAddr);
2548 Value *TaskShareds =
Builder.CreateLoad(VoidPtr, TaskData);
2549 Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment,
2567 Builder.CreateInBoundsGEP(TaskPtr, TaskData, {Zero, Zero});
2570 VoidPtr, VoidPtr,
Builder.getInt32Ty(), VoidPtr, VoidPtr);
2572 TaskStructType, TaskGEP, {Zero, ConstantInt::get(
Int32Ty, 4)});
2575 Value *CmplrData =
Builder.CreateInBoundsGEP(CmplrStructType,
2576 PriorityData, {Zero, Zero});
2577 Builder.CreateStore(Priority, CmplrData);
2604 Builder.GetInsertPoint()->getParent()->getTerminator();
2605 Instruction *ThenTI = IfTerminator, *ElseTI =
nullptr;
2606 Builder.SetInsertPoint(IfTerminator);
2609 Builder.SetInsertPoint(ElseTI);
2611 if (Dependencies.
size()) {
2616 {Ident, ThreadID,
Builder.getInt32(Dependencies.
size()), DepArray,
2617 ConstantInt::get(
Builder.getInt32Ty(), 0),
2632 Builder.SetInsertPoint(ThenTI);
2635 if (Dependencies.
size()) {
2640 {Ident, ThreadID, TaskData,
Builder.getInt32(Dependencies.
size()),
2641 DepArray, ConstantInt::get(
Builder.getInt32Ty(), 0),
2652 Builder.SetInsertPoint(TaskAllocaBB, TaskAllocaBB->
begin());
2654 LoadInst *Shareds =
Builder.CreateLoad(VoidPtr, OutlinedFn.getArg(1));
2655 OutlinedFn.getArg(1)->replaceUsesWithIf(
2656 Shareds, [Shareds](
Use &U) {
return U.getUser() != Shareds; });
2660 I->eraseFromParent();
2664 Builder.SetInsertPoint(TaskExitBB, TaskExitBB->
begin());
2690 Builder.SetInsertPoint(TaskgroupExitBB);
2733 unsigned CaseNumber = 0;
2734 for (
auto SectionCB : SectionCBs) {
2736 M.getContext(),
"omp_section_loop.body.case", CurFn,
Continue);
2738 Builder.SetInsertPoint(CaseBB);
2752 Value *LB = ConstantInt::get(I32Ty, 0);
2753 Value *UB = ConstantInt::get(I32Ty, SectionCBs.
size());
2754 Value *ST = ConstantInt::get(I32Ty, 1);
2756 Loc, LoopBodyGenCB, LB, UB, ST,
true,
false, AllocaIP,
"section_loop");
2761 applyStaticWorkshareLoop(
Loc.DL, *
LoopInfo, AllocaIP,
2762 WorksharingLoopType::ForStaticLoop, !IsNowait);
2768 assert(LoopFini &&
"Bad structure of static workshare loop finalization");
2772 assert(FiniInfo.DK == OMPD_sections &&
2773 "Unexpected finalization stack state!");
2774 if (
Error Err = FiniInfo.mergeFiniBB(
Builder, LoopFini))
2788 if (IP.getBlock()->end() != IP.getPoint())
2799 auto *CaseBB =
Loc.IP.getBlock();
2800 auto *CondBB = CaseBB->getSinglePredecessor()->getSinglePredecessor();
2801 auto *ExitBB = CondBB->getTerminator()->getSuccessor(1);
2807 Directive OMPD = Directive::OMPD_sections;
2810 return EmitOMPInlinedRegion(OMPD,
nullptr,
nullptr, BodyGenCB, FiniCBWrapper,
2821Value *OpenMPIRBuilder::getGPUThreadID() {
2824 OMPRTL___kmpc_get_hardware_thread_id_in_block),
2828Value *OpenMPIRBuilder::getGPUWarpSize() {
2833Value *OpenMPIRBuilder::getNVPTXWarpID() {
2834 unsigned LaneIDBits =
Log2_32(
Config.getGridValue().GV_Warp_Size);
2835 return Builder.CreateAShr(getGPUThreadID(), LaneIDBits,
"nvptx_warp_id");
2838Value *OpenMPIRBuilder::getNVPTXLaneID() {
2839 unsigned LaneIDBits =
Log2_32(
Config.getGridValue().GV_Warp_Size);
2840 assert(LaneIDBits < 32 &&
"Invalid LaneIDBits size in NVPTX device.");
2841 unsigned LaneIDMask = ~0
u >> (32u - LaneIDBits);
2842 return Builder.CreateAnd(getGPUThreadID(),
Builder.getInt32(LaneIDMask),
2849 uint64_t FromSize =
M.getDataLayout().getTypeStoreSize(FromType);
2850 uint64_t ToSize =
M.getDataLayout().getTypeStoreSize(ToType);
2851 assert(FromSize > 0 &&
"From size must be greater than zero");
2852 assert(ToSize > 0 &&
"To size must be greater than zero");
2853 if (FromType == ToType)
2855 if (FromSize == ToSize)
2856 return Builder.CreateBitCast(From, ToType);
2858 return Builder.CreateIntCast(From, ToType,
true);
2864 Value *ValCastItem =
Builder.CreatePointerBitCastOrAddrSpaceCast(
2865 CastItem,
Builder.getPtrTy(0));
2866 Builder.CreateStore(From, ValCastItem);
2867 return Builder.CreateLoad(ToType, CastItem);
2874 uint64_t
Size =
M.getDataLayout().getTypeStoreSize(ElementType);
2875 assert(
Size <= 8 &&
"Unsupported bitwidth in shuffle instruction");
2879 Value *ElemCast = castValueToType(AllocaIP, Element, CastTy);
2881 Builder.CreateIntCast(getGPUWarpSize(),
Builder.getInt16Ty(),
true);
2883 Size <= 4 ? RuntimeFunction::OMPRTL___kmpc_shuffle_int32
2884 : RuntimeFunction::OMPRTL___kmpc_shuffle_int64);
2885 Value *WarpSizeCast =
2887 Value *ShuffleCall =
2889 return castValueToType(AllocaIP, ShuffleCall, CastTy);
2896 uint64_t
Size =
M.getDataLayout().getTypeStoreSize(ElemType);
2908 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
2909 Value *ElemPtr = DstAddr;
2910 Value *Ptr = SrcAddr;
2911 for (
unsigned IntSize = 8; IntSize >= 1; IntSize /= 2) {
2915 Ptr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
2918 Builder.CreateGEP(ElemType, SrcAddr, {ConstantInt::get(IndexTy, 1)});
2919 ElemPtr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
2923 if ((
Size / IntSize) > 1) {
2924 Value *PtrEnd =
Builder.CreatePointerBitCastOrAddrSpaceCast(
2925 SrcAddrGEP,
Builder.getPtrTy());
2942 Builder.CreatePointerBitCastOrAddrSpaceCast(Ptr,
Builder.getPtrTy()));
2944 Builder.CreateICmpSGT(PtrDiff,
Builder.getInt64(IntSize - 1)), ThenBB,
2947 Value *Res = createRuntimeShuffleFunction(
2950 IntType, Ptr,
M.getDataLayout().getPrefTypeAlign(ElemType)),
2952 Builder.CreateAlignedStore(Res, ElemPtr,
2953 M.getDataLayout().getPrefTypeAlign(ElemType));
2955 Builder.CreateGEP(IntType, Ptr, {ConstantInt::get(IndexTy, 1)});
2956 Value *LocalElemPtr =
2957 Builder.CreateGEP(IntType, ElemPtr, {ConstantInt::get(IndexTy, 1)});
2963 Value *Res = createRuntimeShuffleFunction(
2964 AllocaIP,
Builder.CreateLoad(IntType, Ptr), IntType,
Offset);
2967 Res =
Builder.CreateTrunc(Res, ElemType);
2968 Builder.CreateStore(Res, ElemPtr);
2969 Ptr =
Builder.CreateGEP(IntType, Ptr, {ConstantInt::get(IndexTy, 1)});
2971 Builder.CreateGEP(IntType, ElemPtr, {ConstantInt::get(IndexTy, 1)});
2977Error OpenMPIRBuilder::emitReductionListCopy(
2982 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
2983 Value *RemoteLaneOffset = CopyOptions.RemoteLaneOffset;
2987 for (
auto En :
enumerate(ReductionInfos)) {
2989 Value *SrcElementAddr =
nullptr;
2990 AllocaInst *DestAlloca =
nullptr;
2991 Value *DestElementAddr =
nullptr;
2992 Value *DestElementPtrAddr =
nullptr;
2994 bool ShuffleInElement =
false;
2997 bool UpdateDestListPtr =
false;
3001 ReductionArrayTy, SrcBase,
3002 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3003 SrcElementAddr =
Builder.CreateLoad(
Builder.getPtrTy(), SrcElementPtrAddr);
3007 DestElementPtrAddr =
Builder.CreateInBoundsGEP(
3008 ReductionArrayTy, DestBase,
3009 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3010 bool IsByRefElem = (!IsByRef.
empty() && IsByRef[En.index()]);
3016 Type *DestAllocaType =
3017 IsByRefElem ? RI.ByRefAllocatedType : RI.ElementType;
3018 DestAlloca =
Builder.CreateAlloca(DestAllocaType,
nullptr,
3019 ".omp.reduction.element");
3021 M.getDataLayout().getPrefTypeAlign(DestAllocaType));
3022 DestElementAddr = DestAlloca;
3025 DestElementAddr->
getName() +
".ascast");
3027 ShuffleInElement =
true;
3028 UpdateDestListPtr =
true;
3040 if (ShuffleInElement) {
3041 Type *ShuffleType = RI.ElementType;
3042 Value *ShuffleSrcAddr = SrcElementAddr;
3043 Value *ShuffleDestAddr = DestElementAddr;
3044 AllocaInst *LocalStorage =
nullptr;
3047 assert(RI.ByRefElementType &&
"Expected by-ref element type to be set");
3048 assert(RI.ByRefAllocatedType &&
3049 "Expected by-ref allocated type to be set");
3054 ShuffleType = RI.ByRefElementType;
3057 RI.DataPtrPtrGen(
Builder.saveIP(), ShuffleSrcAddr, ShuffleSrcAddr);
3060 return GenResult.takeError();
3062 ShuffleSrcAddr =
Builder.CreateLoad(
Builder.getPtrTy(), ShuffleSrcAddr);
3068 LocalStorage =
Builder.CreateAlloca(ShuffleType);
3070 ShuffleDestAddr = LocalStorage;
3074 shuffleAndStore(AllocaIP, ShuffleSrcAddr, ShuffleDestAddr, ShuffleType,
3075 RemoteLaneOffset, ReductionArrayTy, IsByRefElem);
3080 RI.DataPtrPtrGen(
Builder.saveIP(),
3081 Builder.CreatePointerBitCastOrAddrSpaceCast(
3082 DestAlloca,
Builder.getPtrTy(),
".ascast"),
3086 return GenResult.takeError();
3089 LocalStorage,
Builder.getPtrTy(),
".ascast"),
3093 switch (RI.EvaluationKind) {
3095 Value *Elem =
Builder.CreateLoad(RI.ElementType, SrcElementAddr);
3097 Builder.CreateStore(Elem, DestElementAddr);
3101 Value *SrcRealPtr =
Builder.CreateConstInBoundsGEP2_32(
3102 RI.ElementType, SrcElementAddr, 0, 0,
".realp");
3104 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
3106 RI.ElementType, SrcElementAddr, 0, 1,
".imagp");
3108 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
3110 Value *DestRealPtr =
Builder.CreateConstInBoundsGEP2_32(
3111 RI.ElementType, DestElementAddr, 0, 0,
".realp");
3112 Value *DestImgPtr =
Builder.CreateConstInBoundsGEP2_32(
3113 RI.ElementType, DestElementAddr, 0, 1,
".imagp");
3114 Builder.CreateStore(SrcReal, DestRealPtr);
3115 Builder.CreateStore(SrcImg, DestImgPtr);
3120 M.getDataLayout().getTypeStoreSize(RI.ElementType));
3122 DestElementAddr,
M.getDataLayout().getPrefTypeAlign(RI.ElementType),
3123 SrcElementAddr,
M.getDataLayout().getPrefTypeAlign(RI.ElementType),
3135 if (UpdateDestListPtr) {
3136 Value *CastDestAddr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3137 DestElementAddr,
Builder.getPtrTy(),
3138 DestElementAddr->
getName() +
".ascast");
3139 Builder.CreateStore(CastDestAddr, DestElementPtrAddr);
3146Expected<Function *> OpenMPIRBuilder::emitInterWarpCopyFunction(
3150 LLVMContext &Ctx =
M.getContext();
3152 Builder.getVoidTy(), {Builder.getPtrTy(), Builder.getInt32Ty()},
3156 "_omp_reduction_inter_warp_copy_func", &
M);
3161 Builder.SetInsertPoint(EntryBB);
3178 StringRef TransferMediumName =
3179 "__openmp_nvptx_data_transfer_temporary_storage";
3180 GlobalVariable *TransferMedium =
M.getGlobalVariable(TransferMediumName);
3181 unsigned WarpSize =
Config.getGridValue().GV_Warp_Size;
3183 if (!TransferMedium) {
3184 TransferMedium =
new GlobalVariable(
3192 Value *GPUThreadID = getGPUThreadID();
3194 Value *LaneID = getNVPTXLaneID();
3196 Value *WarpID = getNVPTXWarpID();
3200 Builder.GetInsertBlock()->getFirstInsertionPt());
3204 AllocaInst *ReduceListAlloca =
Builder.CreateAlloca(
3205 Arg0Type,
nullptr, ReduceListArg->
getName() +
".addr");
3206 AllocaInst *NumWarpsAlloca =
3207 Builder.CreateAlloca(Arg1Type,
nullptr, NumWarpsArg->
getName() +
".addr");
3208 Value *ReduceListAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3209 ReduceListAlloca, Arg0Type, ReduceListAlloca->
getName() +
".ascast");
3210 Value *NumWarpsAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3211 NumWarpsAlloca,
Builder.getPtrTy(0),
3212 NumWarpsAlloca->
getName() +
".ascast");
3213 Builder.CreateStore(ReduceListArg, ReduceListAddrCast);
3214 Builder.CreateStore(NumWarpsArg, NumWarpsAddrCast);
3223 for (
auto En :
enumerate(ReductionInfos)) {
3229 bool IsByRefElem = !IsByRef.
empty() && IsByRef[En.index()];
3230 unsigned RealTySize =
M.getDataLayout().getTypeAllocSize(
3231 IsByRefElem ? RI.ByRefElementType : RI.ElementType);
3232 for (
unsigned TySize = 4; TySize > 0 && RealTySize > 0; TySize /= 2) {
3235 unsigned NumIters = RealTySize / TySize;
3238 Value *Cnt =
nullptr;
3239 Value *CntAddr =
nullptr;
3246 Builder.CreateAlloca(
Builder.getInt32Ty(),
nullptr,
".cnt.addr");
3248 CntAddr =
Builder.CreateAddrSpaceCast(CntAddr,
Builder.getPtrTy(),
3249 CntAddr->
getName() +
".ascast");
3261 Cnt, ConstantInt::get(
Builder.getInt32Ty(), NumIters));
3262 Builder.CreateCondBr(Cmp, BodyBB, ExitBB);
3269 omp::Directive::OMPD_unknown,
3273 return BarrierIP1.takeError();
3279 Value *IsWarpMaster =
Builder.CreateIsNull(LaneID,
"warp_master");
3280 Builder.CreateCondBr(IsWarpMaster, ThenBB, ElseBB);
3284 auto *RedListArrayTy =
3287 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
3289 Builder.CreateInBoundsGEP(RedListArrayTy, ReduceList,
3290 {ConstantInt::get(IndexTy, 0),
3291 ConstantInt::get(IndexTy, En.index())});
3297 RI.DataPtrPtrGen(
Builder.saveIP(), ElemPtr, ElemPtr);
3300 return GenRes.takeError();
3311 ArrayTy, TransferMedium, {
Builder.getInt64(0), WarpID});
3316 Builder.CreateStore(Elem, MediumPtr,
3328 omp::Directive::OMPD_unknown,
3332 return BarrierIP2.takeError();
3339 Value *NumWarpsVal =
3342 Value *IsActiveThread =
3343 Builder.CreateICmpULT(GPUThreadID, NumWarpsVal,
"is_active_thread");
3344 Builder.CreateCondBr(IsActiveThread, W0ThenBB, W0ElseBB);
3351 ArrayTy, TransferMedium, {
Builder.getInt64(0), GPUThreadID});
3353 Value *TargetElemPtrPtr =
3354 Builder.CreateInBoundsGEP(RedListArrayTy, ReduceList,
3355 {ConstantInt::get(IndexTy, 0),
3356 ConstantInt::get(IndexTy, En.index())});
3357 Value *TargetElemPtrVal =
3359 Value *TargetElemPtr = TargetElemPtrVal;
3363 RI.DataPtrPtrGen(
Builder.saveIP(), TargetElemPtr, TargetElemPtr);
3366 return GenRes.takeError();
3368 TargetElemPtr =
Builder.CreateLoad(
Builder.getPtrTy(), TargetElemPtr);
3376 Value *SrcMediumValue =
3377 Builder.CreateLoad(CType, SrcMediumPtrVal,
true);
3378 Builder.CreateStore(SrcMediumValue, TargetElemPtr);
3388 Cnt, ConstantInt::get(
Builder.getInt32Ty(), 1));
3389 Builder.CreateStore(Cnt, CntAddr,
false);
3391 auto *CurFn =
Builder.GetInsertBlock()->getParent();
3395 RealTySize %= TySize;
3405Expected<Function *> OpenMPIRBuilder::emitShuffleAndReduceFunction(
3408 LLVMContext &Ctx =
M.getContext();
3409 FunctionType *FuncTy =
3411 {Builder.getPtrTy(), Builder.getInt16Ty(),
3412 Builder.getInt16Ty(), Builder.getInt16Ty()},
3416 "_omp_reduction_shuffle_and_reduce_func", &
M);
3426 Builder.SetInsertPoint(EntryBB);
3437 Type *ReduceListArgType = ReduceListArg->
getType();
3441 ReduceListArgType,
nullptr, ReduceListArg->
getName() +
".addr");
3442 Value *LaneIdAlloca =
Builder.CreateAlloca(LaneIDArgType,
nullptr,
3443 LaneIDArg->
getName() +
".addr");
3445 LaneIDArgType,
nullptr, RemoteLaneOffsetArg->
getName() +
".addr");
3446 Value *AlgoVerAlloca =
Builder.CreateAlloca(LaneIDArgType,
nullptr,
3447 AlgoVerArg->
getName() +
".addr");
3454 RedListArrayTy,
nullptr,
".omp.reduction.remote_reduce_list");
3456 Value *ReduceListAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3457 ReduceListAlloca, ReduceListArgType,
3458 ReduceListAlloca->
getName() +
".ascast");
3459 Value *LaneIdAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3460 LaneIdAlloca, LaneIDArgPtrType, LaneIdAlloca->
getName() +
".ascast");
3461 Value *RemoteLaneOffsetAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3462 RemoteLaneOffsetAlloca, LaneIDArgPtrType,
3463 RemoteLaneOffsetAlloca->
getName() +
".ascast");
3464 Value *AlgoVerAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3465 AlgoVerAlloca, LaneIDArgPtrType, AlgoVerAlloca->
getName() +
".ascast");
3466 Value *RemoteListAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3467 RemoteReductionListAlloca,
Builder.getPtrTy(),
3468 RemoteReductionListAlloca->
getName() +
".ascast");
3470 Builder.CreateStore(ReduceListArg, ReduceListAddrCast);
3471 Builder.CreateStore(LaneIDArg, LaneIdAddrCast);
3472 Builder.CreateStore(RemoteLaneOffsetArg, RemoteLaneOffsetAddrCast);
3473 Builder.CreateStore(AlgoVerArg, AlgoVerAddrCast);
3475 Value *ReduceList =
Builder.CreateLoad(ReduceListArgType, ReduceListAddrCast);
3476 Value *LaneId =
Builder.CreateLoad(LaneIDArgType, LaneIdAddrCast);
3477 Value *RemoteLaneOffset =
3478 Builder.CreateLoad(LaneIDArgType, RemoteLaneOffsetAddrCast);
3479 Value *AlgoVer =
Builder.CreateLoad(LaneIDArgType, AlgoVerAddrCast);
3486 Error EmitRedLsCpRes = emitReductionListCopy(
3488 ReduceList, RemoteListAddrCast, IsByRef,
3489 {RemoteLaneOffset,
nullptr,
nullptr});
3492 return EmitRedLsCpRes;
3517 Value *LaneComp =
Builder.CreateICmpULT(LaneId, RemoteLaneOffset);
3522 Value *Algo2AndLaneIdComp =
Builder.CreateAnd(Algo2, LaneIdComp);
3523 Value *RemoteOffsetComp =
3525 Value *CondAlgo2 =
Builder.CreateAnd(Algo2AndLaneIdComp, RemoteOffsetComp);
3526 Value *CA0OrCA1 =
Builder.CreateOr(CondAlgo0, CondAlgo1);
3527 Value *CondReduce =
Builder.CreateOr(CA0OrCA1, CondAlgo2);
3533 Builder.CreateCondBr(CondReduce, ThenBB, ElseBB);
3535 Value *LocalReduceListPtr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3536 ReduceList,
Builder.getPtrTy());
3537 Value *RemoteReduceListPtr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3538 RemoteListAddrCast,
Builder.getPtrTy());
3540 ->addFnAttr(Attribute::NoUnwind);
3551 Value *LaneIdGtOffset =
Builder.CreateICmpUGE(LaneId, RemoteLaneOffset);
3552 Value *CondCopy =
Builder.CreateAnd(Algo1, LaneIdGtOffset);
3557 Builder.CreateCondBr(CondCopy, CpyThenBB, CpyElseBB);
3561 EmitRedLsCpRes = emitReductionListCopy(
3563 RemoteListAddrCast, ReduceList, IsByRef);
3566 return EmitRedLsCpRes;
3580Expected<Function *> OpenMPIRBuilder::emitListToGlobalCopyFunction(
3584 LLVMContext &Ctx =
M.getContext();
3587 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3591 "_omp_reduction_list_to_global_copy_func", &
M);
3598 Builder.SetInsertPoint(EntryBlock);
3608 BufferArg->
getName() +
".addr");
3612 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3613 Value *BufferArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3614 BufferArgAlloca,
Builder.getPtrTy(),
3615 BufferArgAlloca->
getName() +
".ascast");
3616 Value *IdxArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3617 IdxArgAlloca,
Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3618 Value *ReduceListArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3619 ReduceListArgAlloca,
Builder.getPtrTy(),
3620 ReduceListArgAlloca->
getName() +
".ascast");
3622 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3623 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3624 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3626 Value *LocalReduceList =
3628 Value *BufferArgVal =
3632 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
3633 for (
auto En :
enumerate(ReductionInfos)) {
3635 auto *RedListArrayTy =
3639 RedListArrayTy, LocalReduceList,
3640 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3646 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferArgVal, Idxs);
3648 ReductionsBufferTy, BufferVD, 0, En.index());
3650 switch (RI.EvaluationKind) {
3652 Value *TargetElement;
3654 if (IsByRef.
empty() || !IsByRef[En.index()]) {
3655 TargetElement =
Builder.CreateLoad(RI.ElementType, ElemPtr);
3658 RI.DataPtrPtrGen(
Builder.saveIP(), ElemPtr, ElemPtr);
3661 return GenResult.takeError();
3664 TargetElement =
Builder.CreateLoad(RI.ByRefElementType, ElemPtr);
3667 Builder.CreateStore(TargetElement, GlobVal);
3671 Value *SrcRealPtr =
Builder.CreateConstInBoundsGEP2_32(
3672 RI.ElementType, ElemPtr, 0, 0,
".realp");
3674 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
3676 RI.ElementType, ElemPtr, 0, 1,
".imagp");
3678 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
3680 Value *DestRealPtr =
Builder.CreateConstInBoundsGEP2_32(
3681 RI.ElementType, GlobVal, 0, 0,
".realp");
3682 Value *DestImgPtr =
Builder.CreateConstInBoundsGEP2_32(
3683 RI.ElementType, GlobVal, 0, 1,
".imagp");
3684 Builder.CreateStore(SrcReal, DestRealPtr);
3685 Builder.CreateStore(SrcImg, DestImgPtr);
3690 Builder.getInt64(
M.getDataLayout().getTypeStoreSize(RI.ElementType));
3692 GlobVal,
M.getDataLayout().getPrefTypeAlign(RI.ElementType), ElemPtr,
3693 M.getDataLayout().getPrefTypeAlign(RI.ElementType), SizeVal,
false);
3704Expected<Function *> OpenMPIRBuilder::emitListToGlobalReduceFunction(
3708 LLVMContext &Ctx =
M.getContext();
3711 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3715 "_omp_reduction_list_to_global_reduce_func", &
M);
3722 Builder.SetInsertPoint(EntryBlock);
3732 BufferArg->
getName() +
".addr");
3736 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3737 auto *RedListArrayTy =
3742 Value *LocalReduceList =
3743 Builder.CreateAlloca(RedListArrayTy,
nullptr,
".omp.reduction.red_list");
3747 Value *BufferArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3748 BufferArgAlloca,
Builder.getPtrTy(),
3749 BufferArgAlloca->
getName() +
".ascast");
3750 Value *IdxArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3751 IdxArgAlloca,
Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3752 Value *ReduceListArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3753 ReduceListArgAlloca,
Builder.getPtrTy(),
3754 ReduceListArgAlloca->
getName() +
".ascast");
3755 Value *LocalReduceListAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3756 LocalReduceList,
Builder.getPtrTy(),
3757 LocalReduceList->
getName() +
".ascast");
3759 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3760 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3761 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3766 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
3767 for (
auto En :
enumerate(ReductionInfos)) {
3771 if (!IsByRef.
empty() && IsByRef[En.index()]) {
3775 ByRefAlloc =
Builder.CreateAlloca(RI.ByRefAllocatedType);
3776 ByRefAlloc =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3777 ByRefAlloc,
Builder.getPtrTy(), ByRefAlloc->
getName() +
".ascast");
3782 Value *TargetElementPtrPtr =
Builder.CreateInBoundsGEP(
3783 RedListArrayTy, LocalReduceListAddrCast,
3784 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3786 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
3788 Value *GlobValPtr =
Builder.CreateConstInBoundsGEP2_32(
3789 ReductionsBufferTy, BufferVD, 0, En.index());
3791 if (!IsByRef.
empty() && IsByRef[En.index()]) {
3792 Value *ByRefDataPtr;
3795 RI.DataPtrPtrGen(
Builder.saveIP(), ByRefAlloc, ByRefDataPtr);
3798 return GenResult.takeError();
3800 Builder.CreateStore(GlobValPtr, ByRefDataPtr);
3801 Builder.CreateStore(ByRefAlloc, TargetElementPtrPtr);
3803 Builder.CreateStore(GlobValPtr, TargetElementPtrPtr);
3811 ->addFnAttr(Attribute::NoUnwind);
3817Expected<Function *> OpenMPIRBuilder::emitGlobalToListCopyFunction(
3821 LLVMContext &Ctx =
M.getContext();
3824 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3828 "_omp_reduction_global_to_list_copy_func", &
M);
3835 Builder.SetInsertPoint(EntryBlock);
3845 BufferArg->
getName() +
".addr");
3849 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3850 Value *BufferArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3851 BufferArgAlloca,
Builder.getPtrTy(),
3852 BufferArgAlloca->
getName() +
".ascast");
3853 Value *IdxArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3854 IdxArgAlloca,
Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3855 Value *ReduceListArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3856 ReduceListArgAlloca,
Builder.getPtrTy(),
3857 ReduceListArgAlloca->
getName() +
".ascast");
3858 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3859 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3860 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3862 Value *LocalReduceList =
3867 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
3868 for (
auto En :
enumerate(ReductionInfos)) {
3869 const OpenMPIRBuilder::ReductionInfo &RI = En.value();
3870 auto *RedListArrayTy =
3874 RedListArrayTy, LocalReduceList,
3875 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3880 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
3881 Value *GlobValPtr =
Builder.CreateConstInBoundsGEP2_32(
3882 ReductionsBufferTy, BufferVD, 0, En.index());
3888 if (!IsByRef.
empty() && IsByRef[En.index()]) {
3894 return GenResult.takeError();
3899 Value *TargetElement =
Builder.CreateLoad(ElemType, GlobValPtr);
3900 Builder.CreateStore(TargetElement, ElemPtr);
3904 Value *SrcRealPtr =
Builder.CreateConstInBoundsGEP2_32(
3913 Value *DestRealPtr =
Builder.CreateConstInBoundsGEP2_32(
3915 Value *DestImgPtr =
Builder.CreateConstInBoundsGEP2_32(
3917 Builder.CreateStore(SrcReal, DestRealPtr);
3918 Builder.CreateStore(SrcImg, DestImgPtr);
3925 ElemPtr,
M.getDataLayout().getPrefTypeAlign(RI.
ElementType),
3926 GlobValPtr,
M.getDataLayout().getPrefTypeAlign(RI.
ElementType),
3938Expected<Function *> OpenMPIRBuilder::emitGlobalToListReduceFunction(
3942 LLVMContext &Ctx =
M.getContext();
3945 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3949 "_omp_reduction_global_to_list_reduce_func", &
M);
3956 Builder.SetInsertPoint(EntryBlock);
3966 BufferArg->
getName() +
".addr");
3970 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3976 Value *LocalReduceList =
3977 Builder.CreateAlloca(RedListArrayTy,
nullptr,
".omp.reduction.red_list");
3981 Value *BufferArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3982 BufferArgAlloca,
Builder.getPtrTy(),
3983 BufferArgAlloca->
getName() +
".ascast");
3984 Value *IdxArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3985 IdxArgAlloca,
Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3986 Value *ReduceListArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3987 ReduceListArgAlloca,
Builder.getPtrTy(),
3988 ReduceListArgAlloca->
getName() +
".ascast");
3989 Value *ReductionList =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3990 LocalReduceList,
Builder.getPtrTy(),
3991 LocalReduceList->
getName() +
".ascast");
3993 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3994 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3995 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
4000 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
4001 for (
auto En :
enumerate(ReductionInfos)) {
4005 if (!IsByRef.
empty() && IsByRef[En.index()]) {
4009 ByRefAlloc =
Builder.CreateAlloca(RI.ByRefAllocatedType);
4010 ByRefAlloc =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4011 ByRefAlloc,
Builder.getPtrTy(), ByRefAlloc->
getName() +
".ascast");
4016 Value *TargetElementPtrPtr =
Builder.CreateInBoundsGEP(
4017 RedListArrayTy, ReductionList,
4018 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
4021 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
4022 Value *GlobValPtr =
Builder.CreateConstInBoundsGEP2_32(
4023 ReductionsBufferTy, BufferVD, 0, En.index());
4025 if (!IsByRef.
empty() && IsByRef[En.index()]) {
4026 Value *ByRefDataPtr;
4028 RI.DataPtrPtrGen(
Builder.saveIP(), ByRefAlloc, ByRefDataPtr);
4030 return GenResult.takeError();
4032 Builder.CreateStore(GlobValPtr, ByRefDataPtr);
4033 Builder.CreateStore(ByRefAlloc, TargetElementPtrPtr);
4035 Builder.CreateStore(GlobValPtr, TargetElementPtrPtr);
4043 ->addFnAttr(Attribute::NoUnwind);
4049std::string OpenMPIRBuilder::getReductionFuncName(StringRef Name)
const {
4050 std::string Suffix =
4052 return (Name + Suffix).str();
4055Expected<Function *> OpenMPIRBuilder::createReductionFunction(
4058 AttributeList FuncAttrs) {
4060 {Builder.getPtrTy(), Builder.getPtrTy()},
4062 std::string
Name = getReductionFuncName(ReducerName);
4070 Builder.SetInsertPoint(EntryBB);
4074 Value *LHSArrayPtr =
nullptr;
4075 Value *RHSArrayPtr =
nullptr;
4082 Builder.CreateAlloca(Arg0Type,
nullptr, Arg0->
getName() +
".addr");
4084 Builder.CreateAlloca(Arg1Type,
nullptr, Arg1->
getName() +
".addr");
4085 Value *LHSAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4086 LHSAlloca, Arg0Type, LHSAlloca->
getName() +
".ascast");
4087 Value *RHSAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4088 RHSAlloca, Arg1Type, RHSAlloca->
getName() +
".ascast");
4089 Builder.CreateStore(Arg0, LHSAddrCast);
4090 Builder.CreateStore(Arg1, RHSAddrCast);
4091 LHSArrayPtr =
Builder.CreateLoad(Arg0Type, LHSAddrCast);
4092 RHSArrayPtr =
Builder.CreateLoad(Arg1Type, RHSAddrCast);
4096 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
4098 for (
auto En :
enumerate(ReductionInfos)) {
4101 RedArrayTy, RHSArrayPtr,
4102 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
4104 Value *RHSPtr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4105 RHSI8Ptr, RI.PrivateVariable->getType(),
4106 RHSI8Ptr->
getName() +
".ascast");
4109 RedArrayTy, LHSArrayPtr,
4110 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
4112 Value *LHSPtr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4113 LHSI8Ptr, RI.Variable->getType(), LHSI8Ptr->
getName() +
".ascast");
4122 if (!IsByRef.
empty() && !IsByRef[En.index()]) {
4123 LHS =
Builder.CreateLoad(RI.ElementType, LHSPtr);
4124 RHS =
Builder.CreateLoad(RI.ElementType, RHSPtr);
4131 return AfterIP.takeError();
4132 if (!
Builder.GetInsertBlock())
4133 return ReductionFunc;
4137 if (!IsByRef.
empty() && !IsByRef[En.index()])
4138 Builder.CreateStore(Reduced, LHSPtr);
4143 for (
auto En :
enumerate(ReductionInfos)) {
4144 unsigned Index = En.index();
4146 Value *LHSFixupPtr, *RHSFixupPtr;
4147 Builder.restoreIP(RI.ReductionGenClang(
4148 Builder.saveIP(), Index, &LHSFixupPtr, &RHSFixupPtr, ReductionFunc));
4153 LHSPtrs[Index], [ReductionFunc](
const Use &U) {
4158 RHSPtrs[Index], [ReductionFunc](
const Use &U) {
4165 return ReductionFunc;
4173 assert(RI.Variable &&
"expected non-null variable");
4174 assert(RI.PrivateVariable &&
"expected non-null private variable");
4175 assert((RI.ReductionGen || RI.ReductionGenClang) &&
4176 "expected non-null reduction generator callback");
4179 RI.Variable->getType() == RI.PrivateVariable->getType() &&
4180 "expected variables and their private equivalents to have the same "
4183 assert(RI.Variable->getType()->isPointerTy() &&
4184 "expected variables to be pointers");
4193 unsigned ReductionBufNum,
Value *SrcLocInfo) {
4207 if (ReductionInfos.
size() == 0)
4217 Builder.SetInsertPoint(InsertBlock, InsertBlock->
end());
4221 AttributeList FuncAttrs;
4222 AttrBuilder AttrBldr(Ctx);
4224 AttrBldr.addAttribute(Attr);
4225 AttrBldr.removeAttribute(Attribute::OptimizeNone);
4226 FuncAttrs = FuncAttrs.addFnAttributes(Ctx, AttrBldr);
4230 Builder.GetInsertBlock()->getParent()->getName(), ReductionInfos, IsByRef,
4232 if (!ReductionResult)
4234 Function *ReductionFunc = *ReductionResult;
4238 if (GridValue.has_value())
4239 Config.setGridValue(GridValue.value());
4254 Builder.getPtrTy(
M.getDataLayout().getProgramAddressSpace());
4258 Value *ReductionListAlloca =
4259 Builder.CreateAlloca(RedArrayTy,
nullptr,
".omp.reduction.red_list");
4260 Value *ReductionList =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4261 ReductionListAlloca, PtrTy, ReductionListAlloca->
getName() +
".ascast");
4264 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
4265 for (
auto En :
enumerate(ReductionInfos)) {
4268 RedArrayTy, ReductionList,
4269 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
4272 bool IsByRefElem = !IsByRef.
empty() && IsByRef[En.index()];
4277 Builder.CreatePointerBitCastOrAddrSpaceCast(PrivateVar, PtrTy);
4278 Builder.CreateStore(CastElem, ElemPtr);
4282 ReductionInfos, ReductionFunc, FuncAttrs, IsByRef);
4288 emitInterWarpCopyFunction(
Loc, ReductionInfos, FuncAttrs, IsByRef);
4294 Value *RL =
Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList, PtrTy);
4296 unsigned MaxDataSize = 0;
4298 for (
auto En :
enumerate(ReductionInfos)) {
4299 auto Size =
M.getDataLayout().getTypeStoreSize(En.value().ElementType);
4300 if (
Size > MaxDataSize)
4302 Type *RedTypeArg = (!IsByRef.
empty() && IsByRef[En.index()])
4303 ? En.value().ByRefElementType
4304 : En.value().ElementType;
4307 Value *ReductionDataSize =
4308 Builder.getInt64(MaxDataSize * ReductionInfos.
size());
4309 if (!IsTeamsReduction) {
4310 Value *SarFuncCast =
4311 Builder.CreatePointerBitCastOrAddrSpaceCast(*SarFunc, FuncPtrTy);
4313 Builder.CreatePointerBitCastOrAddrSpaceCast(WcFunc, FuncPtrTy);
4314 Value *Args[] = {SrcLocInfo, ReductionDataSize, RL, SarFuncCast,
4317 RuntimeFunction::OMPRTL___kmpc_nvptx_parallel_reduce_nowait_v2);
4322 Ctx, ReductionTypeArgs,
"struct._globalized_locals_ty");
4324 RuntimeFunction::OMPRTL___kmpc_reduction_get_fixed_buffer);
4327 ReductionInfos, ReductionsBufferTy, FuncAttrs, IsByRef);
4332 ReductionInfos, ReductionFunc, ReductionsBufferTy, FuncAttrs, IsByRef);
4337 ReductionInfos, ReductionsBufferTy, FuncAttrs, IsByRef);
4342 ReductionInfos, ReductionFunc, ReductionsBufferTy, FuncAttrs, IsByRef);
4349 RedFixedBufferFn, {},
"_openmp_teams_reductions_buffer_$_$ptr");
4351 Value *Args3[] = {SrcLocInfo,
4352 KernelTeamsReductionPtr,
4353 Builder.getInt32(ReductionBufNum),
4364 RuntimeFunction::OMPRTL___kmpc_nvptx_teams_reduce_nowait_v2);
4381 for (
auto En :
enumerate(ReductionInfos)) {
4389 Value *LHSPtr, *RHSPtr;
4391 &LHSPtr, &RHSPtr, CurFunc));
4404 if (IsByRef.
empty() || !IsByRef[En.index()]) {
4406 "red.value." +
Twine(En.index()));
4417 if (!IsByRef.
empty() && !IsByRef[En.index()])
4422 if (ContinuationBlock) {
4423 Builder.CreateBr(ContinuationBlock);
4424 Builder.SetInsertPoint(ContinuationBlock);
4426 Config.setEmitLLVMUsed();
4437 ".omp.reduction.func", &M);
4447 Builder.SetInsertPoint(ReductionFuncBlock);
4448 Value *LHSArrayPtr =
nullptr;
4449 Value *RHSArrayPtr =
nullptr;
4460 Builder.CreateAlloca(Arg0Type,
nullptr, Arg0->
getName() +
".addr");
4462 Builder.CreateAlloca(Arg1Type,
nullptr, Arg1->
getName() +
".addr");
4463 Value *LHSAddrCast =
4464 Builder.CreatePointerBitCastOrAddrSpaceCast(LHSAlloca, Arg0Type);
4465 Value *RHSAddrCast =
4466 Builder.CreatePointerBitCastOrAddrSpaceCast(RHSAlloca, Arg1Type);
4467 Builder.CreateStore(Arg0, LHSAddrCast);
4468 Builder.CreateStore(Arg1, RHSAddrCast);
4469 LHSArrayPtr = Builder.CreateLoad(Arg0Type, LHSAddrCast);
4470 RHSArrayPtr = Builder.CreateLoad(Arg1Type, RHSAddrCast);
4472 LHSArrayPtr = ReductionFunc->
getArg(0);
4473 RHSArrayPtr = ReductionFunc->
getArg(1);
4476 unsigned NumReductions = ReductionInfos.
size();
4479 for (
auto En :
enumerate(ReductionInfos)) {
4481 Value *LHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
4482 RedArrayTy, LHSArrayPtr, 0, En.index());
4483 Value *LHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), LHSI8PtrPtr);
4484 Value *LHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
4487 Value *RHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
4488 RedArrayTy, RHSArrayPtr, 0, En.index());
4489 Value *RHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), RHSI8PtrPtr);
4490 Value *RHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
4499 Builder.restoreIP(*AfterIP);
4501 if (!Builder.GetInsertBlock())
4505 if (!IsByRef[En.index()])
4506 Builder.CreateStore(Reduced, LHSPtr);
4508 Builder.CreateRetVoid();
4515 bool IsNoWait,
bool IsTeamsReduction) {
4519 IsByRef, IsNoWait, IsTeamsReduction);
4526 if (ReductionInfos.
size() == 0)
4536 unsigned NumReductions = ReductionInfos.
size();
4539 Value *RedArray =
Builder.CreateAlloca(RedArrayTy,
nullptr,
"red.array");
4541 Builder.SetInsertPoint(InsertBlock, InsertBlock->
end());
4543 for (
auto En :
enumerate(ReductionInfos)) {
4544 unsigned Index = En.index();
4546 Value *RedArrayElemPtr =
Builder.CreateConstInBoundsGEP2_64(
4547 RedArrayTy, RedArray, 0, Index,
"red.array.elem." +
Twine(Index));
4554 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
4564 ? IdentFlag::OMP_IDENT_FLAG_ATOMIC_REDUCE
4569 unsigned RedArrayByteSize =
DL.getTypeStoreSize(RedArrayTy);
4570 Constant *RedArraySize = ConstantInt::get(IndexTy, RedArrayByteSize);
4572 Value *Lock = getOMPCriticalRegionLock(
".reduction");
4574 IsNoWait ? RuntimeFunction::OMPRTL___kmpc_reduce_nowait
4575 : RuntimeFunction::OMPRTL___kmpc_reduce);
4578 {Ident, ThreadId, NumVariables, RedArraySize,
4579 RedArray, ReductionFunc, Lock},
4590 Builder.CreateSwitch(ReduceCall, ContinuationBlock, 2);
4591 Switch->addCase(
Builder.getInt32(1), NonAtomicRedBlock);
4592 Switch->addCase(
Builder.getInt32(2), AtomicRedBlock);
4597 Builder.SetInsertPoint(NonAtomicRedBlock);
4598 for (
auto En :
enumerate(ReductionInfos)) {
4604 if (!IsByRef[En.index()]) {
4606 "red.value." +
Twine(En.index()));
4608 Value *PrivateRedValue =
4610 "red.private.value." +
Twine(En.index()));
4618 if (!
Builder.GetInsertBlock())
4621 if (!IsByRef[En.index()])
4625 IsNoWait ? RuntimeFunction::OMPRTL___kmpc_end_reduce_nowait
4626 : RuntimeFunction::OMPRTL___kmpc_end_reduce);
4628 Builder.CreateBr(ContinuationBlock);
4633 Builder.SetInsertPoint(AtomicRedBlock);
4634 if (CanGenerateAtomic &&
llvm::none_of(IsByRef, [](
bool P) {
return P; })) {
4641 if (!
Builder.GetInsertBlock())
4644 Builder.CreateBr(ContinuationBlock);
4657 if (!
Builder.GetInsertBlock())
4660 Builder.SetInsertPoint(ContinuationBlock);
4671 Directive OMPD = Directive::OMPD_master;
4676 Value *Args[] = {Ident, ThreadId};
4684 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
4695 Directive OMPD = Directive::OMPD_masked;
4701 Value *ArgsEnd[] = {Ident, ThreadId};
4709 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
4719 Call->setDoesNotThrow();
4734 bool IsInclusive,
ScanInfo *ScanRedInfo) {
4736 llvm::Error Err = emitScanBasedDirectiveDeclsIR(AllocaIP, ScanVars,
4737 ScanVarsType, ScanRedInfo);
4748 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4751 Type *DestTy = ScanVarsType[i];
4752 Value *Val =
Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
4755 Builder.CreateStore(Src, Val);
4760 Builder.GetInsertBlock()->getParent());
4763 IV = ScanRedInfo->
IV;
4766 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4769 Type *DestTy = ScanVarsType[i];
4771 Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
4773 Builder.CreateStore(Src, ScanVars[i]);
4787 Builder.GetInsertBlock()->getParent());
4792Error OpenMPIRBuilder::emitScanBasedDirectiveDeclsIR(
4796 Builder.restoreIP(AllocaIP);
4798 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4800 Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
"vla");
4807 Builder.restoreIP(CodeGenIP);
4809 Builder.CreateAdd(ScanRedInfo->
Span, Builder.getInt32(1));
4810 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4814 Value *Buff = Builder.CreateMalloc(IntPtrTy, ScanVarsType[i], Allocsize,
4815 AllocSpan,
nullptr,
"arr");
4816 Builder.CreateStore(Buff, (*(ScanRedInfo->
ScanBuffPtrs))[ScanVars[i]]);
4834 Builder.SetInsertPoint(
Builder.GetInsertBlock()->getTerminator());
4843Error OpenMPIRBuilder::emitScanBasedDirectiveFinalsIR(
4849 Value *PrivateVar = RedInfo.PrivateVariable;
4850 Value *OrigVar = RedInfo.Variable;
4854 Type *SrcTy = RedInfo.ElementType;
4859 Builder.CreateStore(Src, OrigVar);
4882 Builder.SetInsertPoint(
Builder.GetInsertBlock()->getTerminator());
4907 Builder.GetInsertBlock()->getModule(),
4914 Builder.GetInsertBlock()->getModule(),
4920 llvm::ConstantInt::get(ScanRedInfo->
Span->
getType(), 1));
4921 Builder.SetInsertPoint(InputBB);
4924 Builder.SetInsertPoint(LoopBB);
4940 Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
4942 Builder.SetInsertPoint(InnerLoopBB);
4946 Value *ReductionVal = RedInfo.PrivateVariable;
4949 Type *DestTy = RedInfo.ElementType;
4952 Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
4955 Builder.CreateInBoundsGEP(DestTy, Buff, OffsetIval,
"arrayOffset");
4960 RedInfo.ReductionGen(
Builder.saveIP(), LHS, RHS, Result);
4963 Builder.CreateStore(Result, LHSPtr);
4966 IVal, llvm::ConstantInt::get(
Builder.getInt32Ty(), 1));
4968 CmpI =
Builder.CreateICmpUGE(NextIVal, Pow2K);
4969 Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
4972 Counter, llvm::ConstantInt::get(Counter->
getType(), 1));
4978 Builder.CreateCondBr(Cmp, LoopBB, ExitBB);
4999 Error Err = emitScanBasedDirectiveFinalsIR(ReductionInfos, ScanRedInfo);
5006Error OpenMPIRBuilder::emitScanBasedDirectiveIR(
5018 Error Err = InputLoopGen();
5029 Error Err = ScanLoopGen(Builder.saveIP());
5036void OpenMPIRBuilder::createScanBBs(ScanInfo *ScanRedInfo) {
5073 Builder.SetInsertPoint(Preheader);
5076 Builder.SetInsertPoint(Header);
5077 PHINode *IndVarPHI =
Builder.CreatePHI(IndVarTy, 2,
"omp_" + Name +
".iv");
5078 IndVarPHI->
addIncoming(ConstantInt::get(IndVarTy, 0), Preheader);
5083 Builder.CreateICmpULT(IndVarPHI, TripCount,
"omp_" + Name +
".cmp");
5084 Builder.CreateCondBr(Cmp, Body, Exit);
5089 Builder.SetInsertPoint(Latch);
5091 "omp_" + Name +
".next",
true);
5102 CL->Header = Header;
5121 NextBB, NextBB, Name);
5153 Value *Start,
Value *Stop,
Value *Step,
bool IsSigned,
bool InclusiveStop,
5162 ComputeLoc, Start, Stop, Step, IsSigned, InclusiveStop, Name);
5163 ScanRedInfo->
Span = TripCount;
5169 ScanRedInfo->
IV =
IV;
5170 createScanBBs(ScanRedInfo);
5173 assert(Terminator->getNumSuccessors() == 1);
5174 BasicBlock *ContinueBlock = Terminator->getSuccessor(0);
5177 Builder.GetInsertBlock()->getParent());
5180 Builder.GetInsertBlock()->getParent());
5181 Builder.CreateBr(ContinueBlock);
5187 const auto &&InputLoopGen = [&]() ->
Error {
5189 Builder.saveIP(), BodyGen, Start, Stop, Step, IsSigned, InclusiveStop,
5190 ComputeIP, Name,
true, ScanRedInfo);
5194 Builder.restoreIP((*LoopInfo)->getAfterIP());
5200 InclusiveStop, ComputeIP, Name,
true, ScanRedInfo);
5204 Builder.restoreIP((*LoopInfo)->getAfterIP());
5208 Error Err = emitScanBasedDirectiveIR(InputLoopGen, ScanLoopGen, ScanRedInfo);
5216 bool IsSigned,
bool InclusiveStop,
const Twine &Name) {
5226 assert(IndVarTy == Stop->
getType() &&
"Stop type mismatch");
5227 assert(IndVarTy == Step->
getType() &&
"Step type mismatch");
5231 ConstantInt *Zero = ConstantInt::get(IndVarTy, 0);
5247 Incr =
Builder.CreateSelect(IsNeg,
Builder.CreateNeg(Step), Step);
5250 Span =
Builder.CreateSub(UB, LB,
"",
false,
true);
5254 Span =
Builder.CreateSub(Stop, Start,
"",
true);
5259 Value *CountIfLooping;
5260 if (InclusiveStop) {
5261 CountIfLooping =
Builder.CreateAdd(
Builder.CreateUDiv(Span, Incr), One);
5267 CountIfLooping =
Builder.CreateSelect(OneCmp, One, CountIfTwo);
5270 return Builder.CreateSelect(ZeroCmp, Zero, CountIfLooping,
5271 "omp_" + Name +
".tripcount");
5276 Value *Start,
Value *Stop,
Value *Step,
bool IsSigned,
bool InclusiveStop,
5283 ComputeLoc, Start, Stop, Step, IsSigned, InclusiveStop, Name);
5290 ScanRedInfo->
IV = IndVar;
5291 return BodyGenCB(
Builder.saveIP(), IndVar);
5297 Builder.getCurrentDebugLocation());
5308 unsigned Bitwidth = Ty->getIntegerBitWidth();
5311 M, omp::RuntimeFunction::OMPRTL___kmpc_dist_for_static_init_4u);
5314 M, omp::RuntimeFunction::OMPRTL___kmpc_dist_for_static_init_8u);
5324 unsigned Bitwidth = Ty->getIntegerBitWidth();
5327 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_4u);
5330 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_8u);
5338 assert(CLI->
isValid() &&
"Requires a valid canonical loop");
5340 "Require dedicated allocate IP");
5346 uint32_t SrcLocStrSize;
5352 Type *IVTy =
IV->getType();
5353 FunctionCallee StaticInit =
5354 LoopType == WorksharingLoopType::DistributeForStaticLoop
5357 FunctionCallee StaticFini =
5361 Builder.SetInsertPoint(AllocaIP.getBlock()->getFirstNonPHIOrDbgOrAlloca());
5364 Value *PLastIter =
Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
5365 Value *PLowerBound =
Builder.CreateAlloca(IVTy,
nullptr,
"p.lowerbound");
5366 Value *PUpperBound =
Builder.CreateAlloca(IVTy,
nullptr,
"p.upperbound");
5367 Value *PStride =
Builder.CreateAlloca(IVTy,
nullptr,
"p.stride");
5376 Constant *One = ConstantInt::get(IVTy, 1);
5377 Builder.CreateStore(Zero, PLowerBound);
5379 Builder.CreateStore(UpperBound, PUpperBound);
5380 Builder.CreateStore(One, PStride);
5385 (LoopType == WorksharingLoopType::DistributeStaticLoop)
5386 ? OMPScheduleType::OrderedDistribute
5389 ConstantInt::get(I32Type,
static_cast<int>(SchedType));
5393 auto BuildInitCall = [LoopType, SrcLoc, ThreadNum, PLastIter, PLowerBound,
5394 PUpperBound, IVTy, PStride, One,
Zero, StaticInit,
5397 PLowerBound, PUpperBound});
5398 if (LoopType == WorksharingLoopType::DistributeForStaticLoop) {
5399 Value *PDistUpperBound =
5400 Builder.CreateAlloca(IVTy,
nullptr,
"p.distupperbound");
5401 Args.push_back(PDistUpperBound);
5406 BuildInitCall(SchedulingType,
Builder);
5407 if (HasDistSchedule &&
5408 LoopType != WorksharingLoopType::DistributeStaticLoop) {
5409 Constant *DistScheduleSchedType = ConstantInt::get(
5414 BuildInitCall(DistScheduleSchedType,
Builder);
5416 Value *LowerBound =
Builder.CreateLoad(IVTy, PLowerBound);
5417 Value *InclusiveUpperBound =
Builder.CreateLoad(IVTy, PUpperBound);
5418 Value *TripCountMinusOne =
Builder.CreateSub(InclusiveUpperBound, LowerBound);
5419 Value *TripCount =
Builder.CreateAdd(TripCountMinusOne, One);
5420 CLI->setTripCount(TripCount);
5426 CLI->mapIndVar([&](Instruction *OldIV) ->
Value * {
5430 return Builder.CreateAdd(OldIV, LowerBound);
5442 omp::Directive::OMPD_for,
false,
5445 return BarrierIP.takeError();
5472 Reachable.insert(
Block);
5482 Ctx, {
MDString::get(Ctx,
"llvm.loop.parallel_accesses"), AccessGroup}));
5486OpenMPIRBuilder::applyStaticChunkedWorkshareLoop(
5490 assert(CLI->
isValid() &&
"Requires a valid canonical loop");
5491 assert((ChunkSize || DistScheduleChunkSize) &&
"Chunk size is required");
5496 Type *IVTy =
IV->getType();
5498 "Max supported tripcount bitwidth is 64 bits");
5500 :
Type::getInt64Ty(Ctx);
5503 Constant *One = ConstantInt::get(InternalIVTy, 1);
5510 LoopInfo &&LI = LIA.
run(*
F,
FAM);
5513 if (ChunkSize || DistScheduleChunkSize)
5518 FunctionCallee StaticInit =
5520 FunctionCallee StaticFini =
5526 Value *PLastIter =
Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
5527 Value *PLowerBound =
5528 Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.lowerbound");
5529 Value *PUpperBound =
5530 Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.upperbound");
5531 Value *PStride =
Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.stride");
5540 ChunkSize ? ChunkSize : Zero, InternalIVTy,
"chunksize");
5541 Value *CastedDistScheduleChunkSize =
Builder.CreateZExtOrTrunc(
5542 DistScheduleChunkSize ? DistScheduleChunkSize : Zero, InternalIVTy,
5543 "distschedulechunksize");
5544 Value *CastedTripCount =
5545 Builder.CreateZExt(OrigTripCount, InternalIVTy,
"tripcount");
5548 ConstantInt::get(I32Type,
static_cast<int>(SchedType));
5550 ConstantInt::get(I32Type,
static_cast<int>(DistScheduleSchedType));
5551 Builder.CreateStore(Zero, PLowerBound);
5552 Value *OrigUpperBound =
Builder.CreateSub(CastedTripCount, One);
5553 Value *IsTripCountZero =
Builder.CreateICmpEQ(CastedTripCount, Zero);
5555 Builder.CreateSelect(IsTripCountZero, Zero, OrigUpperBound);
5556 Builder.CreateStore(UpperBound, PUpperBound);
5557 Builder.CreateStore(One, PStride);
5561 uint32_t SrcLocStrSize;
5565 auto BuildInitCall = [StaticInit, SrcLoc, ThreadNum, PLastIter, PLowerBound,
5566 PUpperBound, PStride, One,
5567 this](
Value *SchedulingType,
Value *ChunkSize,
5570 StaticInit, {SrcLoc, ThreadNum,
5571 SchedulingType, PLastIter,
5572 PLowerBound, PUpperBound,
5576 BuildInitCall(SchedulingType, CastedChunkSize,
Builder);
5577 if (DistScheduleSchedType != OMPScheduleType::None &&
5578 SchedType != OMPScheduleType::OrderedDistributeChunked &&
5579 SchedType != OMPScheduleType::OrderedDistribute) {
5583 BuildInitCall(DistSchedulingType, CastedDistScheduleChunkSize,
Builder);
5587 Value *FirstChunkStart =
5588 Builder.CreateLoad(InternalIVTy, PLowerBound,
"omp_firstchunk.lb");
5589 Value *FirstChunkStop =
5590 Builder.CreateLoad(InternalIVTy, PUpperBound,
"omp_firstchunk.ub");
5591 Value *FirstChunkEnd =
Builder.CreateAdd(FirstChunkStop, One);
5593 Builder.CreateSub(FirstChunkEnd, FirstChunkStart,
"omp_chunk.range");
5594 Value *NextChunkStride =
5595 Builder.CreateLoad(InternalIVTy, PStride,
"omp_dispatch.stride");
5599 Value *DispatchCounter;
5607 DispatchCounter = Counter;
5610 FirstChunkStart, CastedTripCount, NextChunkStride,
5633 Value *ChunkEnd =
Builder.CreateAdd(DispatchCounter, ChunkRange);
5634 Value *IsLastChunk =
5635 Builder.CreateICmpUGE(ChunkEnd, CastedTripCount,
"omp_chunk.is_last");
5636 Value *CountUntilOrigTripCount =
5637 Builder.CreateSub(CastedTripCount, DispatchCounter);
5639 IsLastChunk, CountUntilOrigTripCount, ChunkRange,
"omp_chunk.tripcount");
5640 Value *BackcastedChunkTC =
5641 Builder.CreateTrunc(ChunkTripCount, IVTy,
"omp_chunk.tripcount.trunc");
5642 CLI->setTripCount(BackcastedChunkTC);
5647 Value *BackcastedDispatchCounter =
5648 Builder.CreateTrunc(DispatchCounter, IVTy,
"omp_dispatch.iv.trunc");
5649 CLI->mapIndVar([&](Instruction *) ->
Value * {
5651 return Builder.CreateAdd(
IV, BackcastedDispatchCounter);
5664 return AfterIP.takeError();
5679static FunctionCallee
5682 unsigned Bitwidth = Ty->getIntegerBitWidth();
5685 case WorksharingLoopType::ForStaticLoop:
5688 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_loop_4u);
5691 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_loop_8u);
5693 case WorksharingLoopType::DistributeStaticLoop:
5696 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_static_loop_4u);
5699 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_static_loop_8u);
5701 case WorksharingLoopType::DistributeForStaticLoop:
5704 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_for_static_loop_4u);
5707 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_for_static_loop_8u);
5710 if (Bitwidth != 32 && Bitwidth != 64) {
5722 Function &LoopBodyFn,
bool NoLoop) {
5733 if (LoopType == WorksharingLoopType::DistributeStaticLoop) {
5734 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
5735 RealArgs.
push_back(ConstantInt::get(Builder.getInt8Ty(), 0));
5736 Builder.restoreIP({InsertBlock, std::prev(InsertBlock->
end())});
5741 M, omp::RuntimeFunction::OMPRTL_omp_get_num_threads);
5742 Builder.restoreIP({InsertBlock, std::prev(InsertBlock->
end())});
5746 Builder.CreateZExtOrTrunc(NumThreads, TripCountTy,
"num.threads.cast"));
5747 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
5748 if (LoopType == WorksharingLoopType::DistributeForStaticLoop) {
5749 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
5750 RealArgs.
push_back(ConstantInt::get(Builder.getInt8Ty(), NoLoop));
5752 RealArgs.
push_back(ConstantInt::get(Builder.getInt8Ty(), 0));
5776 Builder.restoreIP({Preheader, Preheader->
end()});
5779 Builder.CreateBr(CLI->
getExit());
5787 CleanUpInfo.
collectBlocks(RegionBlockSet, BlocksToBeRemoved);
5795 "Expected unique undroppable user of outlined function");
5797 assert(OutlinedFnCallInstruction &&
"Expected outlined function call");
5799 "Expected outlined function call to be located in loop preheader");
5801 if (OutlinedFnCallInstruction->
arg_size() > 1)
5808 LoopBodyArg, TripCount, OutlinedFn, NoLoop);
5810 for (
auto &ToBeDeletedItem : ToBeDeleted)
5811 ToBeDeletedItem->eraseFromParent();
5818 uint32_t SrcLocStrSize;
5827 SmallVector<Instruction *, 4> ToBeDeleted;
5829 OI.OuterAllocaBB = AllocaIP.getBlock();
5834 "omp.prelatch",
true);
5852 SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet;
5854 OI.collectBlocks(ParallelRegionBlockSet, Blocks);
5856 CodeExtractorAnalysisCache CEAC(*OuterFn);
5857 CodeExtractor Extractor(Blocks,
5870 SetVector<Value *> SinkingCands, HoistingCands;
5874 Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
5881 for (
auto Use :
Users) {
5883 if (ParallelRegionBlockSet.
count(Inst->getParent())) {
5884 Inst->replaceUsesOfWith(CLI->
getIndVar(), NewLoopCntLoad);
5890 OI.ExcludeArgsFromAggregate.push_back(NewLoopCntLoad);
5897 OI.PostOutlineCB = [=, ToBeDeletedVec =
5898 std::move(ToBeDeleted)](
Function &OutlinedFn) {
5908 bool NeedsBarrier, omp::ScheduleKind SchedKind,
Value *ChunkSize,
5909 bool HasSimdModifier,
bool HasMonotonicModifier,
5910 bool HasNonmonotonicModifier,
bool HasOrderedClause,
5912 Value *DistScheduleChunkSize) {
5913 if (
Config.isTargetDevice())
5914 return applyWorkshareLoopTarget(
DL, CLI, AllocaIP, LoopType, NoLoop);
5916 SchedKind, ChunkSize, HasSimdModifier, HasMonotonicModifier,
5917 HasNonmonotonicModifier, HasOrderedClause, DistScheduleChunkSize);
5919 bool IsOrdered = (EffectiveScheduleType & OMPScheduleType::ModifierOrdered) ==
5920 OMPScheduleType::ModifierOrdered;
5922 if (HasDistSchedule) {
5923 DistScheduleSchedType = DistScheduleChunkSize
5924 ? OMPScheduleType::OrderedDistributeChunked
5925 : OMPScheduleType::OrderedDistribute;
5927 switch (EffectiveScheduleType & ~OMPScheduleType::ModifierMask) {
5928 case OMPScheduleType::BaseStatic:
5929 case OMPScheduleType::BaseDistribute:
5930 assert((!ChunkSize || !DistScheduleChunkSize) &&
5931 "No chunk size with static-chunked schedule");
5932 if (IsOrdered && !HasDistSchedule)
5933 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
5934 NeedsBarrier, ChunkSize);
5936 if (DistScheduleChunkSize)
5937 return applyStaticChunkedWorkshareLoop(
5938 DL, CLI, AllocaIP, NeedsBarrier, ChunkSize, EffectiveScheduleType,
5939 DistScheduleChunkSize, DistScheduleSchedType);
5940 return applyStaticWorkshareLoop(
DL, CLI, AllocaIP, LoopType, NeedsBarrier,
5943 case OMPScheduleType::BaseStaticChunked:
5944 case OMPScheduleType::BaseDistributeChunked:
5945 if (IsOrdered && !HasDistSchedule)
5946 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
5947 NeedsBarrier, ChunkSize);
5949 return applyStaticChunkedWorkshareLoop(
5950 DL, CLI, AllocaIP, NeedsBarrier, ChunkSize, EffectiveScheduleType,
5951 DistScheduleChunkSize, DistScheduleSchedType);
5953 case OMPScheduleType::BaseRuntime:
5954 case OMPScheduleType::BaseAuto:
5955 case OMPScheduleType::BaseGreedy:
5956 case OMPScheduleType::BaseBalanced:
5957 case OMPScheduleType::BaseSteal:
5958 case OMPScheduleType::BaseGuidedSimd:
5959 case OMPScheduleType::BaseRuntimeSimd:
5961 "schedule type does not support user-defined chunk sizes");
5963 case OMPScheduleType::BaseDynamicChunked:
5964 case OMPScheduleType::BaseGuidedChunked:
5965 case OMPScheduleType::BaseGuidedIterativeChunked:
5966 case OMPScheduleType::BaseGuidedAnalyticalChunked:
5967 case OMPScheduleType::BaseStaticBalancedChunked:
5968 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
5969 NeedsBarrier, ChunkSize);
5982 unsigned Bitwidth = Ty->getIntegerBitWidth();
5985 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_4u);
5988 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_8u);
5996static FunctionCallee
5998 unsigned Bitwidth = Ty->getIntegerBitWidth();
6001 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_4u);
6004 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_8u);
6011static FunctionCallee
6013 unsigned Bitwidth = Ty->getIntegerBitWidth();
6016 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_4u);
6019 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_8u);
6024OpenMPIRBuilder::applyDynamicWorkshareLoop(
DebugLoc DL, CanonicalLoopInfo *CLI,
6027 bool NeedsBarrier,
Value *Chunk) {
6028 assert(CLI->
isValid() &&
"Requires a valid canonical loop");
6030 "Require dedicated allocate IP");
6032 "Require valid schedule type");
6034 bool Ordered = (SchedType & OMPScheduleType::ModifierOrdered) ==
6035 OMPScheduleType::ModifierOrdered;
6040 uint32_t SrcLocStrSize;
6046 Type *IVTy =
IV->getType();
6051 Builder.SetInsertPoint(AllocaIP.getBlock()->getFirstNonPHIOrDbgOrAlloca());
6053 Value *PLastIter =
Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
6054 Value *PLowerBound =
Builder.CreateAlloca(IVTy,
nullptr,
"p.lowerbound");
6055 Value *PUpperBound =
Builder.CreateAlloca(IVTy,
nullptr,
"p.upperbound");
6056 Value *PStride =
Builder.CreateAlloca(IVTy,
nullptr,
"p.stride");
6065 Constant *One = ConstantInt::get(IVTy, 1);
6066 Builder.CreateStore(One, PLowerBound);
6068 Builder.CreateStore(UpperBound, PUpperBound);
6069 Builder.CreateStore(One, PStride);
6086 ConstantInt::get(I32Type,
static_cast<int>(SchedType));
6098 Builder.SetInsertPoint(OuterCond, OuterCond->getFirstInsertionPt());
6101 {SrcLoc, ThreadNum, PLastIter, PLowerBound, PUpperBound, PStride});
6102 Constant *Zero32 = ConstantInt::get(I32Type, 0);
6105 Builder.CreateSub(
Builder.CreateLoad(IVTy, PLowerBound), One,
"lb");
6106 Builder.CreateCondBr(MoreWork, Header, Exit);
6112 PI->setIncomingBlock(0, OuterCond);
6113 PI->setIncomingValue(0, LowerBound);
6118 Br->setSuccessor(0, OuterCond);
6124 UpperBound =
Builder.CreateLoad(IVTy, PUpperBound,
"ub");
6127 CI->setOperand(1, UpperBound);
6131 assert(BI->getSuccessor(1) == Exit);
6132 BI->setSuccessor(1, OuterCond);
6146 omp::Directive::OMPD_for,
false,
6149 return BarrierIP.takeError();
6168 auto HasRemainingUses = [&BBsToErase](
BasicBlock *BB) {
6173 if (BBsToErase.
count(UseInst->getParent()))
6180 while (BBsToErase.
remove_if(HasRemainingUses)) {
6191 assert(
Loops.size() >= 1 &&
"At least one loop required");
6192 size_t NumLoops =
Loops.size();
6196 return Loops.front();
6208 Loop->collectControlBlocks(OldControlBBs);
6212 if (ComputeIP.
isSet())
6219 Value *CollapsedTripCount =
nullptr;
6222 "All loops to collapse must be valid canonical loops");
6223 Value *OrigTripCount = L->getTripCount();
6224 if (!CollapsedTripCount) {
6225 CollapsedTripCount = OrigTripCount;
6230 CollapsedTripCount =
6231 Builder.CreateNUWMul(CollapsedTripCount, OrigTripCount);
6237 OrigPreheader->
getNextNode(), OrigAfter,
"collapsed");
6243 Builder.restoreIP(Result->getBodyIP());
6245 Value *Leftover = Result->getIndVar();
6247 NewIndVars.
resize(NumLoops);
6248 for (
int i = NumLoops - 1; i >= 1; --i) {
6249 Value *OrigTripCount =
Loops[i]->getTripCount();
6251 Value *NewIndVar =
Builder.CreateURem(Leftover, OrigTripCount);
6252 NewIndVars[i] = NewIndVar;
6254 Leftover =
Builder.CreateUDiv(Leftover, OrigTripCount);
6257 NewIndVars[0] = Leftover;
6266 BasicBlock *ContinueBlock = Result->getBody();
6268 auto ContinueWith = [&ContinueBlock, &ContinuePred,
DL](
BasicBlock *Dest,
6275 ContinueBlock =
nullptr;
6276 ContinuePred = NextSrc;
6283 for (
size_t i = 0; i < NumLoops - 1; ++i)
6284 ContinueWith(
Loops[i]->getBody(),
Loops[i + 1]->getHeader());
6290 for (
size_t i = NumLoops - 1; i > 0; --i)
6291 ContinueWith(
Loops[i]->getAfter(),
Loops[i - 1]->getLatch());
6294 ContinueWith(Result->getLatch(),
nullptr);
6301 for (
size_t i = 0; i < NumLoops; ++i)
6302 Loops[i]->getIndVar()->replaceAllUsesWith(NewIndVars[i]);
6316std::vector<CanonicalLoopInfo *>
6320 "Must pass as many tile sizes as there are loops");
6321 int NumLoops =
Loops.size();
6322 assert(NumLoops >= 1 &&
"At least one loop to tile required");
6334 Loop->collectControlBlocks(OldControlBBs);
6342 assert(L->isValid() &&
"All input loops must be valid canonical loops");
6343 OrigTripCounts.
push_back(L->getTripCount());
6354 for (
int i = 0; i < NumLoops - 1; ++i) {
6367 for (
int i = 0; i < NumLoops; ++i) {
6369 Value *OrigTripCount = OrigTripCounts[i];
6382 Value *FloorTripOverflow =
6383 Builder.CreateICmpNE(FloorTripRem, ConstantInt::get(IVType, 0));
6385 FloorTripOverflow =
Builder.CreateZExt(FloorTripOverflow, IVType);
6386 Value *FloorTripCount =
6387 Builder.CreateAdd(FloorCompleteTripCount, FloorTripOverflow,
6388 "omp_floor" +
Twine(i) +
".tripcount",
true);
6391 FloorCompleteCount.
push_back(FloorCompleteTripCount);
6397 std::vector<CanonicalLoopInfo *> Result;
6398 Result.reserve(NumLoops * 2);
6411 auto EmbeddNewLoop =
6412 [
this,
DL,
F, InnerEnter, &Enter, &
Continue, &OutroInsertBefore](
6415 DL, TripCount,
F, InnerEnter, OutroInsertBefore, Name);
6420 Enter = EmbeddedLoop->
getBody();
6422 OutroInsertBefore = EmbeddedLoop->
getLatch();
6423 return EmbeddedLoop;
6427 const Twine &NameBase) {
6430 EmbeddNewLoop(
P.value(), NameBase +
Twine(
P.index()));
6431 Result.push_back(EmbeddedLoop);
6435 EmbeddNewLoops(FloorCount,
"floor");
6441 for (
int i = 0; i < NumLoops; ++i) {
6445 Value *FloorIsEpilogue =
6447 Value *TileTripCount =
6454 EmbeddNewLoops(TileCounts,
"tile");
6459 for (std::pair<BasicBlock *, BasicBlock *>
P : InbetweenCode) {
6468 BodyEnter =
nullptr;
6469 BodyEntered = ExitBB;
6481 Builder.restoreIP(Result.back()->getBodyIP());
6482 for (
int i = 0; i < NumLoops; ++i) {
6485 Value *OrigIndVar = OrigIndVars[i];
6513 if (Properties.
empty())
6536 assert(
Loop->isValid() &&
"Expecting a valid CanonicalLoopInfo");
6540 assert(Latch &&
"A valid CanonicalLoopInfo must have a unique latch");
6548 if (
I.mayReadOrWriteMemory()) {
6552 I.setMetadata(LLVMContext::MD_access_group, AccessGroup);
6575 const Twine &NamePrefix) {
6604 C, NamePrefix +
".if.then",
Cond->getParent(),
Cond->getNextNode());
6606 C, NamePrefix +
".if.else",
Cond->getParent(), CanonicalLoop->
getExit());
6609 Builder.SetInsertPoint(SplitBeforeIt);
6611 Builder.CreateCondBr(IfCond, ThenBlock, ElseBlock);
6614 spliceBB(IP, ThenBlock,
false, Builder.getCurrentDebugLocation());
6617 Builder.SetInsertPoint(ElseBlock);
6623 ExistingBlocks.
reserve(L->getNumBlocks() + 1);
6625 ExistingBlocks.
append(L->block_begin(), L->block_end());
6631 assert(LoopCond && LoopHeader &&
"Invalid loop structure");
6633 if (
Block == L->getLoopPreheader() ||
Block == L->getLoopLatch() ||
6640 if (
Block == ThenBlock)
6641 NewBB->
setName(NamePrefix +
".if.else");
6644 VMap[
Block] = NewBB;
6652 L->getLoopLatch()->splitBasicBlock(
6653 L->getLoopLatch()->begin(), NamePrefix +
".pre_latch",
true);
6657 L->addBasicBlockToLoop(ThenBlock, LI);
6663 if (TargetTriple.
isX86()) {
6664 if (Features.
lookup(
"avx512f"))
6666 else if (Features.
lookup(
"avx"))
6670 if (TargetTriple.
isPPC())
6672 if (TargetTriple.
isWasm())
6679 Value *IfCond, OrderKind Order,
6698 if (AlignedVars.
size()) {
6700 for (
auto &AlignedItem : AlignedVars) {
6701 Value *AlignedPtr = AlignedItem.first;
6702 Value *Alignment = AlignedItem.second;
6705 Builder.CreateAlignmentAssumption(
F->getDataLayout(), AlignedPtr,
6713 createIfVersion(CanonicalLoop, IfCond, VMap, LIA, LI, L,
"simd");
6726 Reachable.insert(
Block);
6736 if ((Safelen ==
nullptr) || (Order == OrderKind::OMP_ORDER_concurrent))
6752 Ctx, {
MDString::get(Ctx,
"llvm.loop.vectorize.enable"), BoolConst}));
6754 if (Simdlen || Safelen) {
6758 ConstantInt *VectorizeWidth = Simdlen ==
nullptr ? Safelen : Simdlen;
6784static std::unique_ptr<TargetMachine>
6788 StringRef CPU =
F->getFnAttribute(
"target-cpu").getValueAsString();
6789 StringRef Features =
F->getFnAttribute(
"target-features").getValueAsString();
6800 std::nullopt, OptLevel));
6824 [&](
const Function &
F) {
return TM->getTargetTransformInfo(
F); });
6825 FAM.registerPass([&]() {
return TIRA; });
6839 assert(L &&
"Expecting CanonicalLoopInfo to be recognized as a loop");
6844 nullptr, ORE,
static_cast<int>(OptLevel),
6865 <<
" Threshold=" << UP.
Threshold <<
"\n"
6868 <<
" PartialOptSizeThreshold="
6888 Ptr = Load->getPointerOperand();
6890 Ptr = Store->getPointerOperand();
6897 if (Alloca->getParent() == &
F->getEntryBlock())
6917 int MaxTripCount = 0;
6918 bool MaxOrZero =
false;
6919 unsigned TripMultiple = 0;
6921 bool UseUpperBound =
false;
6923 MaxTripCount, MaxOrZero, TripMultiple, UCE, UP, PP,
6925 unsigned Factor = UP.
Count;
6926 LLVM_DEBUG(
dbgs() <<
"Suggesting unroll factor of " << Factor <<
"\n");
6937 assert(Factor >= 0 &&
"Unroll factor must not be negative");
6953 Ctx, {
MDString::get(Ctx,
"llvm.loop.unroll.count"), FactorConst}));
6966 *UnrolledCLI =
Loop;
6971 "unrolling only makes sense with a factor of 2 or larger");
6973 Type *IndVarTy =
Loop->getIndVarType();
6980 std::vector<CanonicalLoopInfo *>
LoopNest =
6995 Ctx, {
MDString::get(Ctx,
"llvm.loop.unroll.count"), FactorConst})});
6998 (*UnrolledCLI)->assertOK();
7016 Value *Args[] = {Ident, ThreadId, BufSize, CpyBuf, CpyFn, DidItLD};
7035 if (!CPVars.
empty()) {
7040 Directive OMPD = Directive::OMPD_single;
7045 Value *Args[] = {Ident, ThreadId};
7054 if (
Error Err = FiniCB(IP))
7075 EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCBWrapper,
7082 for (
size_t I = 0, E = CPVars.
size();
I < E; ++
I)
7085 ConstantInt::get(Int64, 0), CPVars[
I],
7088 }
else if (!IsNowait) {
7091 omp::Directive::OMPD_unknown,
false,
7106 Directive OMPD = Directive::OMPD_critical;
7111 Value *LockVar = getOMPCriticalRegionLock(CriticalName);
7112 Value *Args[] = {Ident, ThreadId, LockVar};
7129 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
7137 const Twine &Name,
bool IsDependSource) {
7141 "OpenMP runtime requires depend vec with i64 type");
7154 for (
unsigned I = 0;
I < NumLoops; ++
I) {
7168 Value *Args[] = {Ident, ThreadId, DependBaseAddrGEP};
7186 Directive OMPD = Directive::OMPD_ordered;
7195 Value *Args[] = {Ident, ThreadId};
7205 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
7212 bool HasFinalize,
bool IsCancellable) {
7219 BasicBlock *EntryBB = Builder.GetInsertBlock();
7228 emitCommonDirectiveEntry(OMPD, EntryCall, ExitBB, Conditional);
7239 "Unexpected control flow graph state!!");
7241 emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize);
7243 return AfterIP.takeError();
7248 "Unexpected Insertion point location!");
7251 auto InsertBB = merged ? ExitPredBB : ExitBB;
7254 Builder.SetInsertPoint(InsertBB);
7256 return Builder.saveIP();
7260 Directive OMPD,
Value *EntryCall, BasicBlock *ExitBB,
bool Conditional) {
7262 if (!Conditional || !EntryCall)
7268 auto *UI =
new UnreachableInst(
Builder.getContext(), ThenBB);
7278 Builder.CreateCondBr(CallBool, ThenBB, ExitBB);
7282 UI->eraseFromParent();
7290 omp::Directive OMPD,
InsertPointTy FinIP, Instruction *ExitCall,
7298 "Unexpected finalization stack state!");
7301 assert(Fi.DK == OMPD &&
"Unexpected Directive for Finalization call!");
7303 if (
Error Err = Fi.mergeFiniBB(
Builder, FinIP.getBlock()))
7304 return std::move(Err);
7308 Builder.SetInsertPoint(FinIP.getBlock()->getTerminator());
7318 return IRBuilder<>::InsertPoint(ExitCall->
getParent(),
7352 "copyin.not.master.end");
7359 Builder.SetInsertPoint(OMP_Entry);
7360 Value *MasterPtr =
Builder.CreatePtrToInt(MasterAddr, IntPtrTy);
7361 Value *PrivatePtr =
Builder.CreatePtrToInt(PrivateAddr, IntPtrTy);
7362 Value *cmp =
Builder.CreateICmpNE(MasterPtr, PrivatePtr);
7363 Builder.CreateCondBr(cmp, CopyBegin, CopyEnd);
7365 Builder.SetInsertPoint(CopyBegin);
7382 Value *Args[] = {ThreadId,
Size, Allocator};
7399 Value *Args[] = {ThreadId, Addr, Allocator};
7407 Value *DependenceAddress,
bool HaveNowaitClause) {
7415 if (Device ==
nullptr)
7417 Constant *InteropTypeVal = ConstantInt::get(Int32, (
int)InteropType);
7418 if (NumDependences ==
nullptr) {
7419 NumDependences = ConstantInt::get(Int32, 0);
7423 Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause);
7425 Ident, ThreadId, InteropVar, InteropTypeVal,
7426 Device, NumDependences, DependenceAddress, HaveNowaitClauseVal};
7435 Value *NumDependences,
Value *DependenceAddress,
bool HaveNowaitClause) {
7443 if (Device ==
nullptr)
7445 if (NumDependences ==
nullptr) {
7446 NumDependences = ConstantInt::get(Int32, 0);
7450 Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause);
7452 Ident, ThreadId, InteropVar, Device,
7453 NumDependences, DependenceAddress, HaveNowaitClauseVal};
7462 Value *NumDependences,
7463 Value *DependenceAddress,
7464 bool HaveNowaitClause) {
7471 if (Device ==
nullptr)
7473 if (NumDependences ==
nullptr) {
7474 NumDependences = ConstantInt::get(Int32, 0);
7478 Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause);
7480 Ident, ThreadId, InteropVar, Device,
7481 NumDependences, DependenceAddress, HaveNowaitClauseVal};
7511 assert(!Attrs.MaxThreads.empty() && !Attrs.MaxTeams.empty() &&
7512 "expected num_threads and num_teams to be specified");
7531 const std::string DebugPrefix =
"_debug__";
7532 if (KernelName.
ends_with(DebugPrefix)) {
7533 KernelName = KernelName.
drop_back(DebugPrefix.length());
7534 Kernel =
M.getFunction(KernelName);
7540 if (Attrs.MinTeams > 1 || Attrs.MaxTeams.front() > 0)
7545 int32_t MaxThreadsVal = Attrs.MaxThreads.front();
7546 if (MaxThreadsVal < 0)
7547 MaxThreadsVal = std::max(
7550 if (MaxThreadsVal > 0)
7563 omp::RuntimeFunction::OMPRTL___kmpc_target_init);
7566 Twine DynamicEnvironmentName = KernelName +
"_dynamic_environment";
7567 Constant *DynamicEnvironmentInitializer =
7571 DynamicEnvironmentInitializer, DynamicEnvironmentName,
7573 DL.getDefaultGlobalsAddressSpace());
7577 DynamicEnvironmentGV->
getType() == DynamicEnvironmentPtr
7578 ? DynamicEnvironmentGV
7580 DynamicEnvironmentPtr);
7583 ConfigurationEnvironment, {
7584 UseGenericStateMachineVal,
7585 MayUseNestedParallelismVal,
7592 ReductionBufferLength,
7595 KernelEnvironment, {
7596 ConfigurationEnvironmentInitializer,
7600 std::string KernelEnvironmentName =
7601 (KernelName +
"_kernel_environment").str();
7604 KernelEnvironmentInitializer, KernelEnvironmentName,
7606 DL.getDefaultGlobalsAddressSpace());
7610 KernelEnvironmentGV->
getType() == KernelEnvironmentPtr
7611 ? KernelEnvironmentGV
7613 KernelEnvironmentPtr);
7614 Value *KernelLaunchEnvironment = DebugKernelWrapper->
getArg(0);
7616 KernelLaunchEnvironment =
7617 KernelLaunchEnvironment->
getType() == KernelLaunchEnvParamTy
7618 ? KernelLaunchEnvironment
7619 :
Builder.CreateAddrSpaceCast(KernelLaunchEnvironment,
7620 KernelLaunchEnvParamTy);
7622 Fn, {KernelEnvironment, KernelLaunchEnvironment});
7634 auto *UI =
Builder.CreateUnreachable();
7640 Builder.SetInsertPoint(WorkerExitBB);
7644 Builder.SetInsertPoint(CheckBBTI);
7645 Builder.CreateCondBr(ExecUserCode, UI->getParent(), WorkerExitBB);
7647 CheckBBTI->eraseFromParent();
7648 UI->eraseFromParent();
7656 int32_t TeamsReductionDataSize,
7657 int32_t TeamsReductionBufferLength) {
7662 omp::RuntimeFunction::OMPRTL___kmpc_target_deinit);
7666 if (!TeamsReductionBufferLength || !TeamsReductionDataSize)
7672 const std::string DebugPrefix =
"_debug__";
7674 KernelName = KernelName.
drop_back(DebugPrefix.length());
7675 auto *KernelEnvironmentGV =
7676 M.getNamedGlobal((KernelName +
"_kernel_environment").str());
7677 assert(KernelEnvironmentGV &&
"Expected kernel environment global\n");
7678 auto *KernelEnvironmentInitializer = KernelEnvironmentGV->getInitializer();
7680 KernelEnvironmentInitializer,
7681 ConstantInt::get(Int32, TeamsReductionDataSize), {0, 7});
7683 NewInitializer, ConstantInt::get(Int32, TeamsReductionBufferLength),
7685 KernelEnvironmentGV->setInitializer(NewInitializer);
7690 if (
Kernel.hasFnAttribute(Name)) {
7691 int32_t OldLimit =
Kernel.getFnAttributeAsParsedInteger(Name);
7697std::pair<int32_t, int32_t>
7699 int32_t ThreadLimit =
7700 Kernel.getFnAttributeAsParsedInteger(
"omp_target_thread_limit");
7703 const auto &Attr =
Kernel.getFnAttribute(
"amdgpu-flat-work-group-size");
7704 if (!Attr.isValid() || !Attr.isStringAttribute())
7705 return {0, ThreadLimit};
7706 auto [LBStr, UBStr] = Attr.getValueAsString().split(
',');
7709 return {0, ThreadLimit};
7710 UB = ThreadLimit ? std::min(ThreadLimit, UB) : UB;
7716 if (
Kernel.hasFnAttribute(
"nvvm.maxntid")) {
7717 int32_t UB =
Kernel.getFnAttributeAsParsedInteger(
"nvvm.maxntid");
7718 return {0, ThreadLimit ? std::min(ThreadLimit, UB) : UB};
7720 return {0, ThreadLimit};
7726 Kernel.addFnAttr(
"omp_target_thread_limit", std::to_string(UB));
7729 Kernel.addFnAttr(
"amdgpu-flat-work-group-size",
7737std::pair<int32_t, int32_t>
7740 return {0,
Kernel.getFnAttributeAsParsedInteger(
"omp_target_num_teams")};
7744 int32_t LB, int32_t UB) {
7751 Kernel.addFnAttr(
"omp_target_num_teams", std::to_string(LB));
7754void OpenMPIRBuilder::setOutlinedTargetRegionFunctionAttributes(
7763 else if (
T.isNVPTX())
7765 else if (
T.isSPIRV())
7770Constant *OpenMPIRBuilder::createOutlinedFunctionID(Function *OutlinedFn,
7771 StringRef EntryFnIDName) {
7772 if (
Config.isTargetDevice()) {
7773 assert(OutlinedFn &&
"The outlined function must exist if embedded");
7777 return new GlobalVariable(
7782Constant *OpenMPIRBuilder::createTargetRegionEntryAddr(Function *OutlinedFn,
7783 StringRef EntryFnName) {
7787 assert(!
M.getGlobalVariable(EntryFnName,
true) &&
7788 "Named kernel already exists?");
7789 return new GlobalVariable(
7802 if (
Config.isTargetDevice() || !
Config.openMPOffloadMandatory()) {
7806 OutlinedFn = *CBResult;
7808 OutlinedFn =
nullptr;
7814 if (!IsOffloadEntry)
7817 std::string EntryFnIDName =
7819 ? std::string(EntryFnName)
7823 EntryFnName, EntryFnIDName);
7831 setOutlinedTargetRegionFunctionAttributes(OutlinedFn);
7832 auto OutlinedFnID = createOutlinedFunctionID(OutlinedFn, EntryFnIDName);
7833 auto EntryAddr = createTargetRegionEntryAddr(OutlinedFn, EntryFnName);
7835 EntryInfo, EntryAddr, OutlinedFnID,
7837 return OutlinedFnID;
7854 bool IsStandAlone = !BodyGenCB;
7861 MapInfo = &GenMapInfoCB(
Builder.saveIP());
7863 AllocaIP,
Builder.saveIP(), *MapInfo, Info, CustomMapperCB,
7864 true, DeviceAddrCB))
7871 Value *PointerNum =
Builder.getInt32(Info.NumberOfPtrs);
7881 SrcLocInfo, DeviceID,
7888 assert(MapperFunc &&
"MapperFunc missing for standalone target data");
7892 if (Info.HasNoWait) {
7902 if (Info.HasNoWait) {
7906 emitBlock(OffloadContBlock, CurFn,
true);
7912 bool RequiresOuterTargetTask = Info.HasNoWait;
7913 if (!RequiresOuterTargetTask)
7914 cantFail(TaskBodyCB(
nullptr,
nullptr,
7918 {}, RTArgs, Info.HasNoWait));
7921 omp::OMPRTL___tgt_target_data_begin_mapper);
7925 for (
auto DeviceMap : Info.DevicePtrInfoMap) {
7929 Builder.CreateStore(LI, DeviceMap.second.second);
7965 Value *PointerNum =
Builder.getInt32(Info.NumberOfPtrs);
7974 Value *OffloadingArgs[] = {SrcLocInfo, DeviceID,
7996 return emitIfClause(IfCond, BeginThenGen, BeginElseGen, AllocaIP);
7997 return BeginThenGen(AllocaIP,
Builder.saveIP());
8012 return emitIfClause(IfCond, EndThenGen, EndElseGen, AllocaIP);
8013 return EndThenGen(AllocaIP,
Builder.saveIP());
8016 return emitIfClause(IfCond, BeginThenGen, EndElseGen, AllocaIP);
8017 return BeginThenGen(AllocaIP,
Builder.saveIP());
8028 bool IsGPUDistribute) {
8029 assert((IVSize == 32 || IVSize == 64) &&
8030 "IV size is not compatible with the omp runtime");
8032 if (IsGPUDistribute)
8034 ? (IVSigned ? omp::OMPRTL___kmpc_distribute_static_init_4
8035 : omp::OMPRTL___kmpc_distribute_static_init_4u)
8036 : (IVSigned ? omp::OMPRTL___kmpc_distribute_static_init_8
8037 : omp::OMPRTL___kmpc_distribute_static_init_8u);
8039 Name = IVSize == 32 ? (IVSigned ? omp::OMPRTL___kmpc_for_static_init_4
8040 : omp::OMPRTL___kmpc_for_static_init_4u)
8041 : (IVSigned ? omp::OMPRTL___kmpc_for_static_init_8
8042 : omp::OMPRTL___kmpc_for_static_init_8u);
8049 assert((IVSize == 32 || IVSize == 64) &&
8050 "IV size is not compatible with the omp runtime");
8052 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_init_4
8053 : omp::OMPRTL___kmpc_dispatch_init_4u)
8054 : (IVSigned ? omp::OMPRTL___kmpc_dispatch_init_8
8055 : omp::OMPRTL___kmpc_dispatch_init_8u);
8062 assert((IVSize == 32 || IVSize == 64) &&
8063 "IV size is not compatible with the omp runtime");
8065 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_next_4
8066 : omp::OMPRTL___kmpc_dispatch_next_4u)
8067 : (IVSigned ? omp::OMPRTL___kmpc_dispatch_next_8
8068 : omp::OMPRTL___kmpc_dispatch_next_8u);
8075 assert((IVSize == 32 || IVSize == 64) &&
8076 "IV size is not compatible with the omp runtime");
8078 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_fini_4
8079 : omp::OMPRTL___kmpc_dispatch_fini_4u)
8080 : (IVSigned ? omp::OMPRTL___kmpc_dispatch_fini_8
8081 : omp::OMPRTL___kmpc_dispatch_fini_8u);
8092 DenseMap<
Value *, std::tuple<Value *, unsigned>> &ValueReplacementMap) {
8100 auto GetUpdatedDIVariable = [&](
DILocalVariable *OldVar,
unsigned arg) {
8104 if (NewVar && (arg == NewVar->
getArg()))
8114 auto UpdateDebugRecord = [&](
auto *DR) {
8117 for (
auto Loc : DR->location_ops()) {
8118 auto Iter = ValueReplacementMap.find(
Loc);
8119 if (Iter != ValueReplacementMap.end()) {
8120 DR->replaceVariableLocationOp(
Loc, std::get<0>(Iter->second));
8121 ArgNo = std::get<1>(Iter->second) + 1;
8125 DR->setVariable(GetUpdatedDIVariable(OldVar, ArgNo));
8132 "Unexpected debug intrinsic");
8134 UpdateDebugRecord(&DVR);
8139 Module *M = Func->getParent();
8142 DB.createQualifiedType(dwarf::DW_TAG_pointer_type,
nullptr);
8144 NewSP,
"dyn_ptr", 1, NewSP->
getFile(), 0,
8145 VoidPtrTy,
false, DINode::DIFlags::FlagArtificial);
8147 DB.insertDeclare(&(*Func->arg_begin()), Var, DB.createExpression(),
Loc,
8173 for (
auto &Arg : Inputs)
8174 ParameterTypes.
push_back(Arg->getType()->isPointerTy()
8178 for (
auto &Arg : Inputs)
8179 ParameterTypes.
push_back(Arg->getType());
8182 auto BB = Builder.GetInsertBlock();
8183 auto M = BB->getModule();
8194 if (TargetCpuAttr.isStringAttribute())
8195 Func->addFnAttr(TargetCpuAttr);
8197 auto TargetFeaturesAttr = ParentFn->
getFnAttribute(
"target-features");
8198 if (TargetFeaturesAttr.isStringAttribute())
8199 Func->addFnAttr(TargetFeaturesAttr);
8204 OMPBuilder.
emitUsed(
"llvm.compiler.used", {ExecMode});
8215 Builder.SetInsertPoint(EntryBB);
8221 BasicBlock *UserCodeEntryBB = Builder.GetInsertBlock();
8231 splitBB(Builder,
true,
"outlined.body");
8237 Builder.restoreIP(*AfterIP);
8242 Builder.CreateRetVoid();
8246 auto AllocaIP = Builder.saveIP();
8251 const auto &ArgRange =
8253 ?
make_range(Func->arg_begin() + 1, Func->arg_end())
8286 if (Instr->getFunction() == Func)
8287 Instr->replaceUsesOfWith(
Input, InputCopy);
8293 for (
auto InArg :
zip(Inputs, ArgRange)) {
8295 Argument &Arg = std::get<1>(InArg);
8296 Value *InputCopy =
nullptr;
8299 ArgAccessorFuncCB(Arg,
Input, InputCopy, AllocaIP, Builder.saveIP());
8302 Builder.restoreIP(*AfterIP);
8303 ValueReplacementMap[
Input] = std::make_tuple(InputCopy, Arg.
getArgNo());
8323 DeferredReplacement.push_back(std::make_pair(
Input, InputCopy));
8330 ReplaceValue(
Input, InputCopy, Func);
8334 for (
auto Deferred : DeferredReplacement)
8335 ReplaceValue(std::get<0>(Deferred), std::get<1>(Deferred), Func);
8338 ValueReplacementMap);
8346 Value *TaskWithPrivates,
8347 Type *TaskWithPrivatesTy) {
8349 Type *TaskTy = OMPIRBuilder.Task;
8352 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskWithPrivates, 0);
8353 Value *Shareds = TaskT;
8363 if (TaskWithPrivatesTy != TaskTy)
8364 Shareds = Builder.CreateStructGEP(TaskTy, TaskT, 0);
8381 const size_t NumOffloadingArrays,
const int SharedArgsOperandNo) {
8386 assert((!NumOffloadingArrays || PrivatesTy) &&
8387 "PrivatesTy cannot be nullptr when there are offloadingArrays"
8420 Type *TaskPtrTy = OMPBuilder.TaskPtr;
8421 [[maybe_unused]]
Type *TaskTy = OMPBuilder.Task;
8427 ".omp_target_task_proxy_func",
8428 Builder.GetInsertBlock()->getModule());
8429 Value *ThreadId = ProxyFn->getArg(0);
8430 Value *TaskWithPrivates = ProxyFn->getArg(1);
8431 ThreadId->
setName(
"thread.id");
8432 TaskWithPrivates->
setName(
"task");
8434 bool HasShareds = SharedArgsOperandNo > 0;
8435 bool HasOffloadingArrays = NumOffloadingArrays > 0;
8438 Builder.SetInsertPoint(EntryBB);
8444 if (HasOffloadingArrays) {
8445 assert(TaskTy != TaskWithPrivatesTy &&
8446 "If there are offloading arrays to pass to the target"
8447 "TaskTy cannot be the same as TaskWithPrivatesTy");
8450 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskWithPrivates, 1);
8451 for (
unsigned int i = 0; i < NumOffloadingArrays; ++i)
8453 Builder.CreateStructGEP(PrivatesTy, Privates, i));
8457 auto *ArgStructAlloca =
8459 assert(ArgStructAlloca &&
8460 "Unable to find the alloca instruction corresponding to arguments "
8461 "for extracted function");
8465 Builder.CreateAlloca(ArgStructType,
nullptr,
"structArg");
8467 Value *SharedsSize =
8468 Builder.getInt64(M.getDataLayout().getTypeStoreSize(ArgStructType));
8471 OMPBuilder, Builder, TaskWithPrivates, TaskWithPrivatesTy);
8473 Builder.CreateMemCpy(
8474 NewArgStructAlloca, NewArgStructAlloca->
getAlign(), LoadShared,
8476 KernelLaunchArgs.
push_back(NewArgStructAlloca);
8479 Builder.CreateRetVoid();
8485 return GEP->getSourceElementType();
8487 return Alloca->getAllocatedType();
8510 if (OffloadingArraysToPrivatize.
empty())
8511 return OMPIRBuilder.Task;
8514 for (
Value *V : OffloadingArraysToPrivatize) {
8515 assert(V->getType()->isPointerTy() &&
8516 "Expected pointer to array to privatize. Got a non-pointer value "
8519 assert(ArrayTy &&
"ArrayType cannot be nullptr");
8525 "struct.task_with_privates");
8539 EntryFnName, Inputs, CBFunc,
8544 EntryInfo, GenerateOutlinedFunction, IsOffloadEntry, OutlinedFn,
8681 TargetTaskAllocaBB->
begin());
8685 OI.
EntryBB = TargetTaskAllocaBB;
8691 Builder, AllocaIP, ToBeDeleted, TargetTaskAllocaIP,
"global.tid",
false));
8694 Builder.restoreIP(TargetTaskBodyIP);
8695 if (
Error Err = TaskBodyCB(DeviceID, RTLoc, TargetTaskAllocaIP))
8713 bool NeedsTargetTask = HasNoWait && DeviceID;
8714 if (NeedsTargetTask) {
8720 OffloadingArraysToPrivatize.
push_back(V);
8725 OI.
PostOutlineCB = [
this, ToBeDeleted, Dependencies, NeedsTargetTask,
8726 DeviceID, OffloadingArraysToPrivatize](
8729 "there must be a single user for the outlined function");
8743 const unsigned int NumStaleCIArgs = StaleCI->
arg_size();
8744 bool HasShareds = NumStaleCIArgs > OffloadingArraysToPrivatize.
size() + 1;
8746 NumStaleCIArgs == (OffloadingArraysToPrivatize.
size() + 2)) &&
8747 "Wrong number of arguments for StaleCI when shareds are present");
8748 int SharedArgOperandNo =
8749 HasShareds ? OffloadingArraysToPrivatize.
size() + 1 : 0;
8755 if (!OffloadingArraysToPrivatize.
empty())
8760 *
this,
Builder, StaleCI, PrivatesTy, TaskWithPrivatesTy,
8761 OffloadingArraysToPrivatize.
size(), SharedArgOperandNo);
8763 LLVM_DEBUG(
dbgs() <<
"Proxy task entry function created: " << *ProxyFn
8766 Builder.SetInsertPoint(StaleCI);
8783 OMPRTL___kmpc_omp_target_task_alloc);
8795 M.getDataLayout().getTypeStoreSize(TaskWithPrivatesTy));
8802 auto *ArgStructAlloca =
8804 assert(ArgStructAlloca &&
8805 "Unable to find the alloca instruction corresponding to arguments "
8806 "for extracted function");
8807 auto *ArgStructType =
8809 assert(ArgStructType &&
"Unable to find struct type corresponding to "
8810 "arguments for extracted function");
8812 Builder.getInt64(
M.getDataLayout().getTypeStoreSize(ArgStructType));
8831 TaskSize, SharedsSize,
8834 if (NeedsTargetTask) {
8835 assert(DeviceID &&
"Expected non-empty device ID.");
8845 *
this,
Builder, TaskData, TaskWithPrivatesTy);
8846 Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment,
8849 if (!OffloadingArraysToPrivatize.
empty()) {
8851 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskData, 1);
8852 for (
unsigned int i = 0; i < OffloadingArraysToPrivatize.
size(); ++i) {
8853 Value *PtrToPrivatize = OffloadingArraysToPrivatize[i];
8860 "ElementType should match ArrayType");
8863 Value *Dst =
Builder.CreateStructGEP(PrivatesTy, Privates, i);
8865 Dst, Alignment, PtrToPrivatize, Alignment,
8866 Builder.getInt64(
M.getDataLayout().getTypeStoreSize(ElementType)));
8880 if (!NeedsTargetTask) {
8889 ConstantInt::get(
Builder.getInt32Ty(), 0),
8902 }
else if (DepArray) {
8910 {Ident, ThreadID, TaskData,
Builder.getInt32(Dependencies.
size()),
8911 DepArray, ConstantInt::get(
Builder.getInt32Ty(), 0),
8921 I->eraseFromParent();
8926 << *(
Builder.GetInsertBlock()) <<
"\n");
8928 << *(
Builder.GetInsertBlock()->getParent()->getParent())
8940 CustomMapperCB, IsNonContiguous, DeviceAddrCB))
8957 bool HasNoWait,
Value *DynCGroupMem,
8964 Builder.restoreIP(IP);
8966 return Builder.saveIP();
8969 bool HasDependencies = Dependencies.
size() > 0;
8970 bool RequiresOuterTargetTask = HasNoWait || HasDependencies;
8987 if (OutlinedFnID && DeviceID)
8989 EmitTargetCallFallbackCB, KArgs,
8990 DeviceID, RTLoc, TargetTaskAllocaIP);
8998 return EmitTargetCallFallbackCB(OMPBuilder.
Builder.
saveIP());
9005 auto &&EmitTargetCallElse =
9011 if (RequiresOuterTargetTask) {
9018 Dependencies, EmptyRTArgs, HasNoWait);
9020 return EmitTargetCallFallbackCB(Builder.saveIP());
9023 Builder.restoreIP(AfterIP);
9027 auto &&EmitTargetCallThen =
9030 Info.HasNoWait = HasNoWait;
9034 AllocaIP, Builder.saveIP(), Info, RTArgs, MapInfo, CustomMapperCB,
9042 NumTeamsC.
push_back(RuntimeVal ? RuntimeVal
9047 auto InitMaxThreadsClause = [&Builder](
Value *
Clause) {
9049 Clause = Builder.CreateIntCast(
Clause, Builder.getInt32Ty(),
9053 auto CombineMaxThreadsClauses = [&Builder](
Value *
Clause,
Value *&Result) {
9056 Result ? Builder.CreateSelect(Builder.CreateICmpULT(Result,
Clause),
9064 Value *MaxThreadsClause =
9066 ? InitMaxThreadsClause(RuntimeAttrs.
MaxThreads)
9069 for (
auto [TeamsVal, TargetVal] :
zip_equal(
9071 Value *TeamsThreadLimitClause = InitMaxThreadsClause(TeamsVal);
9072 Value *NumThreads = InitMaxThreadsClause(TargetVal);
9074 CombineMaxThreadsClauses(TeamsThreadLimitClause, NumThreads);
9075 CombineMaxThreadsClauses(MaxThreadsClause, NumThreads);
9077 NumThreadsC.
push_back(NumThreads ? NumThreads : Builder.getInt32(0));
9080 unsigned NumTargetItems = Info.NumberOfPtrs;
9088 Builder.getInt64Ty(),
9090 : Builder.getInt64(0);
9094 DynCGroupMem = Builder.getInt32(0);
9097 NumTargetItems, RTArgs, TripCount, NumTeamsC, NumThreadsC, DynCGroupMem,
9098 HasNoWait, DynCGroupMemFallback);
9105 if (RequiresOuterTargetTask)
9107 RTLoc, AllocaIP, Dependencies,
9108 KArgs.
RTArgs, Info.HasNoWait);
9111 Builder, OutlinedFnID, EmitTargetCallFallbackCB, KArgs,
9112 RuntimeAttrs.
DeviceID, RTLoc, AllocaIP);
9115 Builder.restoreIP(AfterIP);
9122 if (!OutlinedFnID) {
9123 cantFail(EmitTargetCallElse(AllocaIP, Builder.saveIP()));
9129 cantFail(EmitTargetCallThen(AllocaIP, Builder.saveIP()));
9134 EmitTargetCallElse, AllocaIP));
9161 *
this,
Builder, IsOffloadEntry, EntryInfo, DefaultAttrs, OutlinedFn,
9162 OutlinedFnID, Inputs, CBFunc, ArgAccessorFuncCB))
9168 if (!
Config.isTargetDevice())
9170 IfCond, OutlinedFn, OutlinedFnID, Inputs, GenMapInfoCB,
9171 CustomMapperCB, Dependencies, HasNowait, DynCGroupMem,
9172 DynCGroupMemFallback);
9186 return OS.
str().str();
9191 return OpenMPIRBuilder::getNameWithSeparators(Parts,
Config.firstSeparator(),
9197 auto &Elem = *
InternalVars.try_emplace(Name,
nullptr).first;
9199 assert(Elem.second->getValueType() == Ty &&
9200 "OMP internal variable has different type than requested");
9213 :
M.getTargetTriple().isAMDGPU()
9215 :
DL.getDefaultGlobalsAddressSpace();
9224 const llvm::Align PtrAlign =
DL.getPointerABIAlignment(AddressSpaceVal);
9225 GV->setAlignment(std::max(TypeAlign, PtrAlign));
9232Value *OpenMPIRBuilder::getOMPCriticalRegionLock(
StringRef CriticalName) {
9233 std::string Prefix =
Twine(
"gomp_critical_user_", CriticalName).
str();
9234 std::string Name = getNameWithSeparators({Prefix,
"var"},
".",
".");
9245 return SizePtrToInt;
9250 std::string VarName) {
9258 return MaptypesArrayGlobal;
9263 unsigned NumOperands,
9272 ArrI8PtrTy,
nullptr,
".offload_baseptrs");
9276 ArrI64Ty,
nullptr,
".offload_sizes");
9287 int64_t DeviceID,
unsigned NumOperands) {
9293 Value *ArgsBaseGEP =
9295 {Builder.getInt32(0), Builder.getInt32(0)});
9298 {Builder.getInt32(0), Builder.getInt32(0)});
9299 Value *ArgSizesGEP =
9301 {Builder.getInt32(0), Builder.getInt32(0)});
9305 Builder.getInt32(NumOperands),
9306 ArgsBaseGEP, ArgsGEP, ArgSizesGEP,
9307 MaptypesArg, MapnamesArg, NullPtr});
9314 assert((!ForEndCall || Info.separateBeginEndCalls()) &&
9315 "expected region end call to runtime only when end call is separate");
9317 auto VoidPtrTy = UnqualPtrTy;
9318 auto VoidPtrPtrTy = UnqualPtrTy;
9320 auto Int64PtrTy = UnqualPtrTy;
9322 if (!Info.NumberOfPtrs) {
9334 Info.RTArgs.BasePointersArray,
9337 ArrayType::get(VoidPtrTy, Info.NumberOfPtrs), Info.RTArgs.PointersArray,
9341 ArrayType::get(Int64Ty, Info.NumberOfPtrs), Info.RTArgs.SizesArray,
9345 ForEndCall && Info.RTArgs.MapTypesArrayEnd ? Info.RTArgs.MapTypesArrayEnd
9346 : Info.RTArgs.MapTypesArray,
9352 if (!Info.EmitDebug)
9356 ArrayType::get(VoidPtrTy, Info.NumberOfPtrs), Info.RTArgs.MapNamesArray,
9361 if (!Info.HasMapper)
9365 Builder.CreatePointerCast(Info.RTArgs.MappersArray, VoidPtrPtrTy);
9386 "struct.descriptor_dim");
9388 enum { OffsetFD = 0, CountFD, StrideFD };
9392 for (
unsigned I = 0, L = 0, E = NonContigInfo.
Dims.
size();
I < E; ++
I) {
9395 if (NonContigInfo.
Dims[
I] == 1)
9400 Builder.CreateAlloca(ArrayTy,
nullptr,
"dims");
9402 for (
unsigned II = 0, EE = NonContigInfo.
Dims[
I];
II < EE; ++
II) {
9403 unsigned RevIdx = EE -
II - 1;
9406 {Builder.getInt64(0), Builder.getInt64(II)});
9408 Value *OffsetLVal =
Builder.CreateStructGEP(DimTy, DimsLVal, OffsetFD);
9410 NonContigInfo.
Offsets[L][RevIdx], OffsetLVal,
9411 M.getDataLayout().getPrefTypeAlign(OffsetLVal->
getType()));
9413 Value *CountLVal =
Builder.CreateStructGEP(DimTy, DimsLVal, CountFD);
9415 NonContigInfo.
Counts[L][RevIdx], CountLVal,
9416 M.getDataLayout().getPrefTypeAlign(CountLVal->
getType()));
9418 Value *StrideLVal =
Builder.CreateStructGEP(DimTy, DimsLVal, StrideFD);
9420 NonContigInfo.
Strides[L][RevIdx], StrideLVal,
9421 M.getDataLayout().getPrefTypeAlign(CountLVal->
getType()));
9425 Value *DAddr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
9426 DimsAddr,
Builder.getPtrTy());
9429 Info.RTArgs.PointersArray, 0,
I);
9431 DAddr,
P,
M.getDataLayout().getPrefTypeAlign(
Builder.getPtrTy()));
9436void OpenMPIRBuilder::emitUDMapperArrayInitOrDel(
9440 StringRef Prefix = IsInit ?
".init" :
".del";
9446 Builder.CreateICmpSGT(
Size, Builder.getInt64(1),
"omp.arrayinit.isarray");
9447 Value *DeleteBit = Builder.CreateAnd(
9450 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9451 OpenMPOffloadMappingFlags::OMP_MAP_DELETE)));
9456 Value *BaseIsBegin = Builder.CreateICmpNE(
Base, Begin);
9457 Cond = Builder.CreateOr(IsArray, BaseIsBegin);
9458 DeleteCond = Builder.CreateIsNull(
9463 DeleteCond =
Builder.CreateIsNotNull(
9479 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9480 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9481 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9482 MapTypeArg =
Builder.CreateOr(
9485 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9486 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT)));
9490 Value *OffloadingArgs[] = {MapperHandle,
Base, Begin,
9491 ArraySize, MapTypeArg, MapName};
9517 MapperFn->
addFnAttr(Attribute::NoInline);
9518 MapperFn->
addFnAttr(Attribute::NoUnwind);
9528 auto SavedIP =
Builder.saveIP();
9529 Builder.SetInsertPoint(EntryBB);
9541 TypeSize ElementSize =
M.getDataLayout().getTypeStoreSize(ElemTy);
9543 Value *PtrBegin = BeginIn;
9549 emitUDMapperArrayInitOrDel(MapperFn, MapperHandle, BaseIn, BeginIn,
Size,
9550 MapType, MapName, ElementSize, HeadBB,
9561 Builder.CreateICmpEQ(PtrBegin, PtrEnd,
"omp.arraymap.isempty");
9562 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9568 Builder.CreatePHI(PtrBegin->
getType(), 2,
"omp.arraymap.ptrcurrent");
9569 PtrPHI->addIncoming(PtrBegin, HeadBB);
9574 return Info.takeError();
9578 Value *OffloadingArgs[] = {MapperHandle};
9582 Value *ShiftedPreviousSize =
9586 for (
unsigned I = 0;
I < Info->BasePointers.size(); ++
I) {
9587 Value *CurBaseArg = Info->BasePointers[
I];
9588 Value *CurBeginArg = Info->Pointers[
I];
9589 Value *CurSizeArg = Info->Sizes[
I];
9590 Value *CurNameArg = Info->Names.size()
9596 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9598 Value *MemberMapType =
9599 Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9616 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9617 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9618 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9628 Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9634 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9635 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9636 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9642 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9643 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
9644 Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9650 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9651 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9657 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9658 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9659 Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9665 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9666 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
9677 Value *OffloadingArgs[] = {MapperHandle, CurBaseArg, CurBeginArg,
9678 CurSizeArg, CurMapType, CurNameArg};
9680 auto ChildMapperFn = CustomMapperCB(
I);
9682 return ChildMapperFn.takeError();
9683 if (*ChildMapperFn) {
9698 Value *PtrNext =
Builder.CreateConstGEP1_32(ElemTy, PtrPHI, 1,
9699 "omp.arraymap.next");
9700 PtrPHI->addIncoming(PtrNext, LastBB);
9701 Value *IsDone =
Builder.CreateICmpEQ(PtrNext, PtrEnd,
"omp.arraymap.isdone");
9703 Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9708 emitUDMapperArrayInitOrDel(MapperFn, MapperHandle, BaseIn, BeginIn,
Size,
9709 MapType, MapName, ElementSize, DoneBB,
9723 bool IsNonContiguous,
9727 Info.clearArrayInfo();
9730 if (Info.NumberOfPtrs == 0)
9739 Info.RTArgs.BasePointersArray =
Builder.CreateAlloca(
9740 PointerArrayType,
nullptr,
".offload_baseptrs");
9742 Info.RTArgs.PointersArray =
Builder.CreateAlloca(
9743 PointerArrayType,
nullptr,
".offload_ptrs");
9745 PointerArrayType,
nullptr,
".offload_mappers");
9746 Info.RTArgs.MappersArray = MappersArray;
9753 ConstantInt::get(Int64Ty, 0));
9755 for (
unsigned I = 0, E = CombinedInfo.
Sizes.
size();
I < E; ++
I) {
9758 if (IsNonContiguous &&
9759 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9761 OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG))
9769 RuntimeSizes.
set(
I);
9772 if (RuntimeSizes.
all()) {
9774 Info.RTArgs.SizesArray =
Builder.CreateAlloca(
9775 SizeArrayType,
nullptr,
".offload_sizes");
9781 auto *SizesArrayGbl =
9786 if (!RuntimeSizes.
any()) {
9787 Info.RTArgs.SizesArray = SizesArrayGbl;
9789 unsigned IndexSize =
M.getDataLayout().getIndexSizeInBits(0);
9790 Align OffloadSizeAlign =
M.getDataLayout().getABIIntegerTypeAlignment(64);
9793 SizeArrayType,
nullptr,
".offload_sizes");
9797 Buffer,
M.getDataLayout().getPrefTypeAlign(Buffer->
getType()),
9798 SizesArrayGbl, OffloadSizeAlign,
9803 Info.RTArgs.SizesArray = Buffer;
9811 for (
auto mapFlag : CombinedInfo.
Types)
9813 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9817 Info.RTArgs.MapTypesArray = MapTypesArrayGbl;
9823 Info.RTArgs.MapNamesArray = MapNamesArrayGbl;
9824 Info.EmitDebug =
true;
9826 Info.RTArgs.MapNamesArray =
9828 Info.EmitDebug =
false;
9833 if (Info.separateBeginEndCalls()) {
9834 bool EndMapTypesDiffer =
false;
9836 if (
Type &
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9837 OpenMPOffloadMappingFlags::OMP_MAP_PRESENT)) {
9838 Type &= ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9839 OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
9840 EndMapTypesDiffer =
true;
9843 if (EndMapTypesDiffer) {
9845 Info.RTArgs.MapTypesArrayEnd = MapTypesArrayGbl;
9850 for (
unsigned I = 0;
I < Info.NumberOfPtrs; ++
I) {
9853 ArrayType::get(PtrTy, Info.NumberOfPtrs), Info.RTArgs.BasePointersArray,
9855 Builder.CreateAlignedStore(BPVal, BP,
9856 M.getDataLayout().getPrefTypeAlign(PtrTy));
9858 if (Info.requiresDevicePointerInfo()) {
9862 Info.DevicePtrInfoMap[BPVal] = {BP,
Builder.CreateAlloca(PtrTy)};
9865 DeviceAddrCB(
I, Info.DevicePtrInfoMap[BPVal].second);
9867 Info.DevicePtrInfoMap[BPVal] = {BP, BP};
9869 DeviceAddrCB(
I, BP);
9875 ArrayType::get(PtrTy, Info.NumberOfPtrs), Info.RTArgs.PointersArray, 0,
9878 Builder.CreateAlignedStore(PVal,
P,
9879 M.getDataLayout().getPrefTypeAlign(PtrTy));
9881 if (RuntimeSizes.
test(
I)) {
9883 ArrayType::get(Int64Ty, Info.NumberOfPtrs), Info.RTArgs.SizesArray,
9889 S,
M.getDataLayout().getPrefTypeAlign(PtrTy));
9892 unsigned IndexSize =
M.getDataLayout().getIndexSizeInBits(0);
9895 auto CustomMFunc = CustomMapperCB(
I);
9897 return CustomMFunc.takeError();
9899 MFunc =
Builder.CreatePointerCast(*CustomMFunc, PtrTy);
9903 {Builder.getIntN(IndexSize, 0), Builder.getIntN(IndexSize, I)});
9905 MFunc, MAddr,
M.getDataLayout().getPrefTypeAlign(MAddr->
getType()));
9909 Info.NumberOfPtrs == 0)
9926 Builder.ClearInsertionPoint();
9956 auto CondConstant = CI->getSExtValue();
9958 return ThenGen(AllocaIP,
Builder.saveIP());
9960 return ElseGen(AllocaIP,
Builder.saveIP());
9988bool OpenMPIRBuilder::checkAndEmitFlushAfterAtomic(
9992 "Unexpected Atomic Ordering.");
10056 assert(
X.Var->getType()->isPointerTy() &&
10057 "OMP Atomic expects a pointer to target memory");
10058 Type *XElemTy =
X.ElemTy;
10061 "OMP atomic read expected a scalar type");
10063 Value *XRead =
nullptr;
10067 Builder.CreateLoad(XElemTy,
X.Var,
X.IsVolatile,
"omp.atomic.read");
10076 unsigned LoadSize =
DL.getTypeStoreSize(XElemTy);
10079 OldVal->
getAlign(),
true , AllocaIP,
X.Var);
10081 XRead = AtomicLoadRes.first;
10088 Builder.CreateLoad(IntCastTy,
X.Var,
X.IsVolatile,
"omp.atomic.load");
10091 XRead =
Builder.CreateBitCast(XLoad, XElemTy,
"atomic.flt.cast");
10093 XRead =
Builder.CreateIntToPtr(XLoad, XElemTy,
"atomic.ptr.cast");
10096 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Read);
10097 Builder.CreateStore(XRead, V.Var, V.IsVolatile);
10108 assert(
X.Var->getType()->isPointerTy() &&
10109 "OMP Atomic expects a pointer to target memory");
10110 Type *XElemTy =
X.ElemTy;
10113 "OMP atomic write expected a scalar type");
10121 unsigned LoadSize =
DL.getTypeStoreSize(XElemTy);
10124 OldVal->
getAlign(),
true , AllocaIP,
X.Var);
10132 Builder.CreateBitCast(Expr, IntCastTy,
"atomic.src.int.cast");
10137 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Write);
10144 AtomicUpdateCallbackTy &UpdateOp,
bool IsXBinopExpr,
10145 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
10151 Type *XTy =
X.Var->getType();
10153 "OMP Atomic expects a pointer to target memory");
10154 Type *XElemTy =
X.ElemTy;
10157 "OMP atomic update expected a scalar type");
10160 "OpenMP atomic does not support LT or GT operations");
10164 AllocaIP,
X.Var,
X.ElemTy, Expr, AO, RMWOp, UpdateOp,
X.IsVolatile,
10165 IsXBinopExpr, IsIgnoreDenormalMode, IsFineGrainedMemory, IsRemoteMemory);
10167 return AtomicResult.takeError();
10168 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Update);
10173Value *OpenMPIRBuilder::emitRMWOpAsInstruction(
Value *Src1,
Value *Src2,
10177 return Builder.CreateAdd(Src1, Src2);
10179 return Builder.CreateSub(Src1, Src2);
10181 return Builder.CreateAnd(Src1, Src2);
10183 return Builder.CreateNeg(Builder.CreateAnd(Src1, Src2));
10185 return Builder.CreateOr(Src1, Src2);
10187 return Builder.CreateXor(Src1, Src2);
10209Expected<std::pair<Value *, Value *>> OpenMPIRBuilder::emitAtomicUpdate(
10212 AtomicUpdateCallbackTy &UpdateOp,
bool VolatileX,
bool IsXBinopExpr,
10213 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
10216 bool emitRMWOp =
false;
10224 emitRMWOp = XElemTy;
10227 emitRMWOp = (IsXBinopExpr && XElemTy);
10234 std::pair<Value *, Value *> Res;
10236 AtomicRMWInst *RMWInst =
10237 Builder.CreateAtomicRMW(RMWOp,
X, Expr, llvm::MaybeAlign(), AO);
10238 if (
T.isAMDGPU()) {
10239 if (IsIgnoreDenormalMode)
10240 RMWInst->
setMetadata(
"amdgpu.ignore.denormal.mode",
10242 if (!IsFineGrainedMemory)
10243 RMWInst->
setMetadata(
"amdgpu.no.fine.grained.memory",
10245 if (!IsRemoteMemory)
10249 Res.first = RMWInst;
10254 Res.second = Res.first;
10256 Res.second = emitRMWOpAsInstruction(Res.first, Expr, RMWOp);
10260 Builder.CreateLoad(XElemTy,
X,
X->getName() +
".atomic.load");
10263 unsigned LoadSize =
10266 OpenMPIRBuilder::AtomicInfo atomicInfo(
10268 OldVal->
getAlign(),
true , AllocaIP,
X);
10269 auto AtomicLoadRes = atomicInfo.EmitAtomicLoadLibcall(AO);
10272 CurBBTI = CurBBTI ? CurBBTI :
Builder.CreateUnreachable();
10279 AllocaInst *NewAtomicAddr =
Builder.CreateAlloca(XElemTy);
10280 NewAtomicAddr->
setName(
X->getName() +
"x.new.val");
10281 Builder.SetInsertPoint(ContBB);
10283 PHI->addIncoming(AtomicLoadRes.first, CurBB);
10285 Expected<Value *> CBResult = UpdateOp(OldExprVal,
Builder);
10288 Value *Upd = *CBResult;
10289 Builder.CreateStore(Upd, NewAtomicAddr);
10292 auto Result = atomicInfo.EmitAtomicCompareExchangeLibcall(
10293 AtomicLoadRes.second, NewAtomicAddr, AO, Failure);
10294 LoadInst *PHILoad =
Builder.CreateLoad(XElemTy,
Result.first);
10295 PHI->addIncoming(PHILoad,
Builder.GetInsertBlock());
10298 Res.first = OldExprVal;
10301 if (UnreachableInst *ExitTI =
10304 Builder.SetInsertPoint(ExitBB);
10306 Builder.SetInsertPoint(ExitTI);
10309 IntegerType *IntCastTy =
10312 Builder.CreateLoad(IntCastTy,
X,
X->getName() +
".atomic.load");
10321 CurBBTI = CurBBTI ? CurBBTI :
Builder.CreateUnreachable();
10328 AllocaInst *NewAtomicAddr =
Builder.CreateAlloca(XElemTy);
10329 NewAtomicAddr->
setName(
X->getName() +
"x.new.val");
10330 Builder.SetInsertPoint(ContBB);
10332 PHI->addIncoming(OldVal, CurBB);
10337 OldExprVal =
Builder.CreateBitCast(
PHI, XElemTy,
10338 X->getName() +
".atomic.fltCast");
10340 OldExprVal =
Builder.CreateIntToPtr(
PHI, XElemTy,
10341 X->getName() +
".atomic.ptrCast");
10345 Expected<Value *> CBResult = UpdateOp(OldExprVal,
Builder);
10348 Value *Upd = *CBResult;
10349 Builder.CreateStore(Upd, NewAtomicAddr);
10350 LoadInst *DesiredVal =
Builder.CreateLoad(IntCastTy, NewAtomicAddr);
10354 X,
PHI, DesiredVal, llvm::MaybeAlign(), AO, Failure);
10355 Result->setVolatile(VolatileX);
10356 Value *PreviousVal =
Builder.CreateExtractValue(Result, 0);
10357 Value *SuccessFailureVal =
Builder.CreateExtractValue(Result, 1);
10358 PHI->addIncoming(PreviousVal,
Builder.GetInsertBlock());
10359 Builder.CreateCondBr(SuccessFailureVal, ExitBB, ContBB);
10361 Res.first = OldExprVal;
10365 if (UnreachableInst *ExitTI =
10368 Builder.SetInsertPoint(ExitBB);
10370 Builder.SetInsertPoint(ExitTI);
10381 bool UpdateExpr,
bool IsPostfixUpdate,
bool IsXBinopExpr,
10382 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
10387 Type *XTy =
X.Var->getType();
10389 "OMP Atomic expects a pointer to target memory");
10390 Type *XElemTy =
X.ElemTy;
10393 "OMP atomic capture expected a scalar type");
10395 "OpenMP atomic does not support LT or GT operations");
10402 AllocaIP,
X.Var,
X.ElemTy, Expr, AO, AtomicOp, UpdateOp,
X.IsVolatile,
10403 IsXBinopExpr, IsIgnoreDenormalMode, IsFineGrainedMemory, IsRemoteMemory);
10406 Value *CapturedVal =
10407 (IsPostfixUpdate ? AtomicResult->first : AtomicResult->second);
10408 Builder.CreateStore(CapturedVal, V.Var, V.IsVolatile);
10410 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Capture);
10422 IsPostfixUpdate, IsFailOnly, Failure);
10434 assert(
X.Var->getType()->isPointerTy() &&
10435 "OMP atomic expects a pointer to target memory");
10438 assert(V.Var->getType()->isPointerTy() &&
"v.var must be of pointer type");
10439 assert(V.ElemTy ==
X.ElemTy &&
"x and v must be of same type");
10442 bool IsInteger = E->getType()->isIntegerTy();
10444 if (
Op == OMPAtomicCompareOp::EQ) {
10459 Value *OldValue =
Builder.CreateExtractValue(Result, 0);
10461 OldValue =
Builder.CreateBitCast(OldValue,
X.ElemTy);
10463 "OldValue and V must be of same type");
10464 if (IsPostfixUpdate) {
10465 Builder.CreateStore(OldValue, V.Var, V.IsVolatile);
10467 Value *SuccessOrFail =
Builder.CreateExtractValue(Result, 1);
10480 CurBBTI = CurBBTI ? CurBBTI :
Builder.CreateUnreachable();
10482 CurBBTI,
X.Var->getName() +
".atomic.exit");
10488 Builder.CreateCondBr(SuccessOrFail, ExitBB, ContBB);
10490 Builder.SetInsertPoint(ContBB);
10491 Builder.CreateStore(OldValue, V.Var);
10497 Builder.SetInsertPoint(ExitBB);
10499 Builder.SetInsertPoint(ExitTI);
10502 Value *CapturedValue =
10503 Builder.CreateSelect(SuccessOrFail, E, OldValue);
10504 Builder.CreateStore(CapturedValue, V.Var, V.IsVolatile);
10510 assert(R.Var->getType()->isPointerTy() &&
10511 "r.var must be of pointer type");
10512 assert(R.ElemTy->isIntegerTy() &&
"r must be of integral type");
10514 Value *SuccessFailureVal =
Builder.CreateExtractValue(Result, 1);
10515 Value *ResultCast = R.IsSigned
10516 ?
Builder.CreateSExt(SuccessFailureVal, R.ElemTy)
10517 :
Builder.CreateZExt(SuccessFailureVal, R.ElemTy);
10518 Builder.CreateStore(ResultCast, R.Var, R.IsVolatile);
10521 assert((
Op == OMPAtomicCompareOp::MAX ||
Op == OMPAtomicCompareOp::MIN) &&
10522 "Op should be either max or min at this point");
10523 assert(!IsFailOnly &&
"IsFailOnly is only valid when the comparison is ==");
10534 if (IsXBinopExpr) {
10563 Value *CapturedValue =
nullptr;
10564 if (IsPostfixUpdate) {
10565 CapturedValue = OldValue;
10590 Value *NonAtomicCmp =
Builder.CreateCmp(Pred, OldValue, E);
10591 CapturedValue =
Builder.CreateSelect(NonAtomicCmp, E, OldValue);
10593 Builder.CreateStore(CapturedValue, V.Var, V.IsVolatile);
10597 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Compare);
10617 if (&OuterAllocaBB ==
Builder.GetInsertBlock()) {
10644 bool SubClausesPresent =
10645 (NumTeamsLower || NumTeamsUpper || ThreadLimit || IfExpr);
10647 if (!
Config.isTargetDevice() && SubClausesPresent) {
10648 assert((NumTeamsLower ==
nullptr || NumTeamsUpper !=
nullptr) &&
10649 "if lowerbound is non-null, then upperbound must also be non-null "
10650 "for bounds on num_teams");
10652 if (NumTeamsUpper ==
nullptr)
10653 NumTeamsUpper =
Builder.getInt32(0);
10655 if (NumTeamsLower ==
nullptr)
10656 NumTeamsLower = NumTeamsUpper;
10660 "argument to if clause must be an integer value");
10664 IfExpr =
Builder.CreateICmpNE(IfExpr,
10665 ConstantInt::get(IfExpr->
getType(), 0));
10666 NumTeamsUpper =
Builder.CreateSelect(
10667 IfExpr, NumTeamsUpper,
Builder.getInt32(1),
"numTeamsUpper");
10670 NumTeamsLower =
Builder.CreateSelect(
10671 IfExpr, NumTeamsLower,
Builder.getInt32(1),
"numTeamsLower");
10674 if (ThreadLimit ==
nullptr)
10675 ThreadLimit =
Builder.getInt32(0);
10679 Value *NumTeamsLowerInt32 =
10681 Value *NumTeamsUpperInt32 =
10683 Value *ThreadLimitInt32 =
10690 {Ident, ThreadNum, NumTeamsLowerInt32, NumTeamsUpperInt32,
10691 ThreadLimitInt32});
10696 if (
Error Err = BodyGenCB(AllocaIP, CodeGenIP))
10708 Builder, OuterAllocaIP, ToBeDeleted, AllocaIP,
"gid",
true));
10710 Builder, OuterAllocaIP, ToBeDeleted, AllocaIP,
"tid",
true));
10712 auto HostPostOutlineCB = [
this, Ident,
10713 ToBeDeleted](
Function &OutlinedFn)
mutable {
10718 "there must be a single user for the outlined function");
10723 "Outlined function must have two or three arguments only");
10725 bool HasShared = OutlinedFn.
arg_size() == 3;
10733 assert(StaleCI &&
"Error while outlining - no CallInst user found for the "
10734 "outlined function.");
10735 Builder.SetInsertPoint(StaleCI);
10742 omp::RuntimeFunction::OMPRTL___kmpc_fork_teams),
10746 I->eraseFromParent();
10749 if (!
Config.isTargetDevice())
10768 if (OuterAllocaBB ==
Builder.GetInsertBlock()) {
10783 if (
Error Err = BodyGenCB(AllocaIP, CodeGenIP))
10788 if (
Config.isTargetDevice()) {
10803 std::string VarName) {
10812 return MapNamesArrayGlobal;
10817void OpenMPIRBuilder::initializeTypes(
Module &M) {
10821 unsigned ProgramAS = M.getDataLayout().getProgramAddressSpace();
10822#define OMP_TYPE(VarName, InitValue) VarName = InitValue;
10823#define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \
10824 VarName##Ty = ArrayType::get(ElemTy, ArraySize); \
10825 VarName##PtrTy = PointerType::get(Ctx, DefaultTargetAS);
10826#define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \
10827 VarName = FunctionType::get(ReturnType, {__VA_ARGS__}, IsVarArg); \
10828 VarName##Ptr = PointerType::get(Ctx, ProgramAS);
10829#define OMP_STRUCT_TYPE(VarName, StructName, Packed, ...) \
10830 T = StructType::getTypeByName(Ctx, StructName); \
10832 T = StructType::create(Ctx, {__VA_ARGS__}, StructName, Packed); \
10834 VarName##Ptr = PointerType::get(Ctx, DefaultTargetAS);
10835#include "llvm/Frontend/OpenMP/OMPKinds.def"
10846 while (!Worklist.
empty()) {
10850 if (
BlockSet.insert(SuccBB).second)
10862 Name.empty() ? Addr->
getName() : Name,
Size, Flags, 0);
10874 Fn->
addFnAttr(
"uniform-work-group-size",
"true");
10875 Fn->
addFnAttr(Attribute::MustProgress);
10893 auto &&GetMDInt = [
this](
unsigned V) {
10900 NamedMDNode *MD =
M.getOrInsertNamedMetadata(
"omp_offload.info");
10901 auto &&TargetRegionMetadataEmitter =
10902 [&
C, MD, &OrderedEntries, &GetMDInt, &GetMDString](
10917 GetMDInt(E.getKind()), GetMDInt(EntryInfo.DeviceID),
10918 GetMDInt(EntryInfo.FileID), GetMDString(EntryInfo.ParentName),
10919 GetMDInt(EntryInfo.Line), GetMDInt(EntryInfo.Count),
10920 GetMDInt(E.getOrder())};
10923 OrderedEntries[E.getOrder()] = std::make_pair(&E, EntryInfo);
10932 auto &&DeviceGlobalVarMetadataEmitter =
10933 [&
C, &OrderedEntries, &GetMDInt, &GetMDString, MD](
10943 Metadata *
Ops[] = {GetMDInt(E.getKind()), GetMDString(MangledName),
10944 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
10948 OrderedEntries[E.getOrder()] = std::make_pair(&E, varInfo);
10955 DeviceGlobalVarMetadataEmitter);
10957 for (
const auto &E : OrderedEntries) {
10958 assert(E.first &&
"All ordered entries must exist!");
10959 if (
const auto *CE =
10962 if (!CE->getID() || !CE->getAddress()) {
10966 if (!
M.getNamedValue(FnName))
10974 }
else if (
const auto *CE =
dyn_cast<
10983 if (
Config.isTargetDevice() &&
Config.hasRequiresUnifiedSharedMemory())
10985 if (!CE->getAddress()) {
10990 if (CE->getVarSize() == 0)
10994 assert(((
Config.isTargetDevice() && !CE->getAddress()) ||
10995 (!
Config.isTargetDevice() && CE->getAddress())) &&
10996 "Declaret target link address is set.");
10997 if (
Config.isTargetDevice())
10999 if (!CE->getAddress()) {
11006 if (!CE->getAddress()) {
11019 if ((
GV->hasLocalLinkage() ||
GV->hasHiddenVisibility()) &&
11023 OMPTargetGlobalVarEntryIndirectVTable))
11032 Flags, CE->getLinkage(), CE->getVarName());
11035 Flags, CE->getLinkage());
11046 if (
Config.hasRequiresFlags() && !
Config.isTargetDevice())
11052 Config.getRequiresFlags());
11062 OS <<
"_" <<
Count;
11067 unsigned NewCount = getTargetRegionEntryInfoCount(EntryInfo);
11070 EntryInfo.
Line, NewCount);
11078 auto FileIDInfo = CallBack();
11082 FileID =
Status->getUniqueID().getFile();
11086 FileID =
hash_value(std::get<0>(FileIDInfo));
11090 std::get<1>(FileIDInfo));
11096 static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
11098 !(Remain & 1); Remain = Remain >> 1)
11116 if (
static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
11118 static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
11125 if (
static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
11131 Flags &=
~omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
11132 Flags |= MemberOfFlag;
11138 bool IsDeclaration,
bool IsExternallyVisible,
11140 std::vector<GlobalVariable *> &GeneratedRefs,
bool OpenMPSIMD,
11141 std::vector<Triple> TargetTriple,
Type *LlvmPtrTy,
11142 std::function<
Constant *()> GlobalInitializer,
11153 Config.hasRequiresUnifiedSharedMemory())) {
11158 if (!IsExternallyVisible)
11160 OS <<
"_decl_tgt_ref_ptr";
11163 Value *Ptr =
M.getNamedValue(PtrName);
11172 if (!
Config.isTargetDevice()) {
11173 if (GlobalInitializer)
11174 GV->setInitializer(GlobalInitializer());
11180 CaptureClause, DeviceClause, IsDeclaration, IsExternallyVisible,
11181 EntryInfo, MangledName, GeneratedRefs, OpenMPSIMD, TargetTriple,
11182 GlobalInitializer, VariableLinkage, LlvmPtrTy,
cast<Constant>(Ptr));
11194 bool IsDeclaration,
bool IsExternallyVisible,
11196 std::vector<GlobalVariable *> &GeneratedRefs,
bool OpenMPSIMD,
11197 std::vector<Triple> TargetTriple,
11198 std::function<
Constant *()> GlobalInitializer,
11202 (TargetTriple.empty() && !
Config.isTargetDevice()))
11213 !
Config.hasRequiresUnifiedSharedMemory()) {
11215 VarName = MangledName;
11218 if (!IsDeclaration)
11220 M.getDataLayout().getTypeSizeInBits(LlvmVal->
getValueType()), 8);
11223 Linkage = (VariableLinkage) ? VariableLinkage() : LlvmVal->
getLinkage();
11227 if (
Config.isTargetDevice() &&
11236 if (!
M.getNamedValue(RefName)) {
11240 GvAddrRef->setConstant(
true);
11242 GvAddrRef->setInitializer(Addr);
11243 GeneratedRefs.push_back(GvAddrRef);
11252 if (
Config.isTargetDevice()) {
11253 VarName = (Addr) ? Addr->
getName() :
"";
11257 CaptureClause, DeviceClause, IsDeclaration, IsExternallyVisible,
11258 EntryInfo, MangledName, GeneratedRefs, OpenMPSIMD, TargetTriple,
11259 LlvmPtrTy, GlobalInitializer, VariableLinkage);
11260 VarName = (Addr) ? Addr->
getName() :
"";
11262 VarSize =
M.getDataLayout().getPointerSize();
11281 auto &&GetMDInt = [MN](
unsigned Idx) {
11286 auto &&GetMDString = [MN](
unsigned Idx) {
11288 return V->getString();
11291 switch (GetMDInt(0)) {
11295 case OffloadEntriesInfoManager::OffloadEntryInfo::
11296 OffloadingEntryInfoTargetRegion: {
11306 case OffloadEntriesInfoManager::OffloadEntryInfo::
11307 OffloadingEntryInfoDeviceGlobalVar:
11320 if (HostFilePath.
empty())
11324 if (std::error_code Err = Buf.getError()) {
11326 "OpenMPIRBuilder: " +
11334 if (std::error_code Err =
M.getError()) {
11336 (
"error parsing host file inside of OpenMPIRBuilder: " + Err.message())
11348 return OffloadEntriesTargetRegion.empty() &&
11349 OffloadEntriesDeviceGlobalVar.empty();
11352unsigned OffloadEntriesInfoManager::getTargetRegionEntryInfoCount(
11354 auto It = OffloadEntriesTargetRegionCount.find(
11355 getTargetRegionEntryCountKey(EntryInfo));
11356 if (It == OffloadEntriesTargetRegionCount.end())
11361void OffloadEntriesInfoManager::incrementTargetRegionEntryInfoCount(
11363 OffloadEntriesTargetRegionCount[getTargetRegionEntryCountKey(EntryInfo)] =
11364 EntryInfo.
Count + 1;
11370 OffloadEntriesTargetRegion[EntryInfo] =
11373 ++OffloadingEntriesNum;
11379 assert(EntryInfo.
Count == 0 &&
"expected default EntryInfo");
11382 EntryInfo.
Count = getTargetRegionEntryInfoCount(EntryInfo);
11386 if (OMPBuilder->Config.isTargetDevice()) {
11391 auto &Entry = OffloadEntriesTargetRegion[EntryInfo];
11392 Entry.setAddress(Addr);
11394 Entry.setFlags(Flags);
11400 "Target region entry already registered!");
11402 OffloadEntriesTargetRegion[EntryInfo] = Entry;
11403 ++OffloadingEntriesNum;
11405 incrementTargetRegionEntryInfoCount(EntryInfo);
11412 EntryInfo.
Count = getTargetRegionEntryInfoCount(EntryInfo);
11414 auto It = OffloadEntriesTargetRegion.find(EntryInfo);
11415 if (It == OffloadEntriesTargetRegion.end()) {
11419 if (!IgnoreAddressId && (It->second.getAddress() || It->second.getID()))
11427 for (
const auto &It : OffloadEntriesTargetRegion) {
11428 Action(It.first, It.second);
11434 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
11435 ++OffloadingEntriesNum;
11441 if (OMPBuilder->Config.isTargetDevice()) {
11445 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
11447 if (Entry.getVarSize() == 0) {
11448 Entry.setVarSize(VarSize);
11449 Entry.setLinkage(Linkage);
11453 Entry.setVarSize(VarSize);
11454 Entry.setLinkage(Linkage);
11455 Entry.setAddress(Addr);
11458 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
11459 assert(Entry.isValid() && Entry.getFlags() == Flags &&
11460 "Entry not initialized!");
11461 if (Entry.getVarSize() == 0) {
11462 Entry.setVarSize(VarSize);
11463 Entry.setLinkage(Linkage);
11470 OffloadEntriesDeviceGlobalVar.try_emplace(VarName, OffloadingEntriesNum,
11471 Addr, VarSize, Flags, Linkage,
11474 OffloadEntriesDeviceGlobalVar.try_emplace(
11475 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage,
"");
11476 ++OffloadingEntriesNum;
11483 for (
const auto &E : OffloadEntriesDeviceGlobalVar)
11484 Action(E.getKey(), E.getValue());
11491void CanonicalLoopInfo::collectControlBlocks(
11498 BBs.
append({getPreheader(), Header,
Cond, Latch, Exit, getAfter()});
11510void CanonicalLoopInfo::setTripCount(
Value *TripCount) {
11522void CanonicalLoopInfo::mapIndVar(
11532 for (
Use &U : OldIV->
uses()) {
11536 if (
User->getParent() == getCond())
11538 if (
User->getParent() == getLatch())
11544 Value *NewIV = Updater(OldIV);
11547 for (Use *U : ReplacableUses)
11568 "Preheader must terminate with unconditional branch");
11570 "Preheader must jump to header");
11574 "Header must terminate with unconditional branch");
11575 assert(Header->getSingleSuccessor() == Cond &&
11576 "Header must jump to exiting block");
11579 assert(Cond->getSinglePredecessor() == Header &&
11580 "Exiting block only reachable from header");
11583 "Exiting block must terminate with conditional branch");
11585 "Exiting block must have two successors");
11587 "Exiting block's first successor jump to the body");
11589 "Exiting block's second successor must exit the loop");
11593 "Body only reachable from exiting block");
11598 "Latch must terminate with unconditional branch");
11599 assert(Latch->getSingleSuccessor() == Header &&
"Latch must jump to header");
11602 assert(Latch->getSinglePredecessor() !=
nullptr);
11607 "Exit block must terminate with unconditional branch");
11608 assert(Exit->getSingleSuccessor() == After &&
11609 "Exit block must jump to after block");
11613 "After block only reachable from exit block");
11617 assert(IndVar &&
"Canonical induction variable not found?");
11619 "Induction variable must be an integer");
11621 "Induction variable must be a PHI in the loop header");
11627 auto *NextIndVar =
cast<PHINode>(IndVar)->getIncomingValue(1);
11635 assert(TripCount &&
"Loop trip count not found?");
11637 "Trip count and induction variable must have the same type");
11641 "Exit condition must be a signed less-than comparison");
11643 "Exit condition must compare the induction variable");
11645 "Exit condition must compare with the trip count");
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Expand Atomic instructions
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This header defines various interfaces for pass management in LLVM.
iv Induction Variable Users
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
static cl::opt< unsigned > TileSize("fuse-matrix-tile-size", cl::init(4), cl::Hidden, cl::desc("Tile size for matrix instruction fusion using square-shaped tiles."))
uint64_t IntrinsicInst * II
#define OMP_KERNEL_ARG_VERSION
Provides definitions for Target specific Grid Values.
static Value * removeASCastIfPresent(Value *V)
static void createTargetLoopWorkshareCall(OpenMPIRBuilder *OMPBuilder, WorksharingLoopType LoopType, BasicBlock *InsertBlock, Value *Ident, Value *LoopBodyArg, Value *TripCount, Function &LoopBodyFn, bool NoLoop)
Value * createFakeIntVal(IRBuilderBase &Builder, OpenMPIRBuilder::InsertPointTy OuterAllocaIP, llvm::SmallVectorImpl< Instruction * > &ToBeDeleted, OpenMPIRBuilder::InsertPointTy InnerAllocaIP, const Twine &Name="", bool AsPtr=true, bool Is64Bit=false)
static void redirectTo(BasicBlock *Source, BasicBlock *Target, DebugLoc DL)
Make Source branch to Target.
static FunctionCallee getKmpcDistForStaticInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
static void applyParallelAccessesMetadata(CanonicalLoopInfo *CLI, LLVMContext &Ctx, Loop *Loop, LoopInfo &LoopInfo, SmallVector< Metadata * > &LoopMDList)
static FunctionCallee getKmpcForDynamicFiniForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for finalizing the dynamic loop using depending on type.
static Expected< Function * > createOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, StringRef FuncName, SmallVectorImpl< Value * > &Inputs, OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc, OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB)
static void FixupDebugInfoForOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, Function *Func, DenseMap< Value *, std::tuple< Value *, unsigned > > &ValueReplacementMap)
static OMPScheduleType getOpenMPOrderingScheduleType(OMPScheduleType BaseScheduleType, bool HasOrderedClause)
Adds ordering modifier flags to schedule type.
static OMPScheduleType getOpenMPMonotonicityScheduleType(OMPScheduleType ScheduleType, bool HasSimdModifier, bool HasMonotonic, bool HasNonmonotonic, bool HasOrderedClause)
Adds monotonicity modifier flags to schedule type.
static void workshareLoopTargetCallback(OpenMPIRBuilder *OMPIRBuilder, CanonicalLoopInfo *CLI, Value *Ident, Function &OutlinedFn, const SmallVector< Instruction *, 4 > &ToBeDeleted, WorksharingLoopType LoopType, bool NoLoop)
static bool isValidWorkshareLoopScheduleType(OMPScheduleType SchedType)
static llvm::CallInst * emitNoUnwindRuntimeCall(IRBuilder<> &Builder, llvm::FunctionCallee Callee, ArrayRef< llvm::Value * > Args, const llvm::Twine &Name)
static Error populateReductionFunction(Function *ReductionFunc, ArrayRef< OpenMPIRBuilder::ReductionInfo > ReductionInfos, IRBuilder<> &Builder, ArrayRef< bool > IsByRef, bool IsGPU)
static Function * getFreshReductionFunc(Module &M)
static void raiseUserConstantDataAllocasToEntryBlock(IRBuilderBase &Builder, Function *Function)
static FunctionCallee getKmpcForDynamicNextForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for updating the next loop using OpenMP dynamic scheduling depending...
static bool isConflictIP(IRBuilder<>::InsertPoint IP1, IRBuilder<>::InsertPoint IP2)
Return whether IP1 and IP2 are ambiguous, i.e.
static void checkReductionInfos(ArrayRef< OpenMPIRBuilder::ReductionInfo > ReductionInfos, bool IsGPU)
static Type * getOffloadingArrayType(Value *V)
static OMPScheduleType getOpenMPBaseScheduleType(llvm::omp::ScheduleKind ClauseKind, bool HasChunks, bool HasSimdModifier, bool HasDistScheduleChunks)
Determine which scheduling algorithm to use, determined from schedule clause arguments.
static OMPScheduleType computeOpenMPScheduleType(ScheduleKind ClauseKind, bool HasChunks, bool HasSimdModifier, bool HasMonotonicModifier, bool HasNonmonotonicModifier, bool HasOrderedClause, bool HasDistScheduleChunks)
Determine the schedule type using schedule and ordering clause arguments.
static FunctionCallee getKmpcForDynamicInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for initializing loop bounds using OpenMP dynamic scheduling dependi...
static StructType * createTaskWithPrivatesTy(OpenMPIRBuilder &OMPIRBuilder, ArrayRef< Value * > OffloadingArraysToPrivatize)
static cl::opt< double > UnrollThresholdFactor("openmp-ir-builder-unroll-threshold-factor", cl::Hidden, cl::desc("Factor for the unroll threshold to account for code " "simplifications still taking place"), cl::init(1.5))
static int32_t computeHeuristicUnrollFactor(CanonicalLoopInfo *CLI)
Heuristically determine the best-performant unroll factor for CLI.
static Value * emitTaskDependencies(OpenMPIRBuilder &OMPBuilder, const SmallVectorImpl< OpenMPIRBuilder::DependData > &Dependencies)
static Error emitTargetOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, bool IsOffloadEntry, TargetRegionEntryInfo &EntryInfo, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, Function *&OutlinedFn, Constant *&OutlinedFnID, SmallVectorImpl< Value * > &Inputs, OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc, OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB)
static void updateNVPTXAttr(Function &Kernel, StringRef Name, int32_t Value, bool Min)
static OpenMPIRBuilder::InsertPointTy getInsertPointAfterInstr(Instruction *I)
static void redirectAllPredecessorsTo(BasicBlock *OldTarget, BasicBlock *NewTarget, DebugLoc DL)
Redirect all edges that branch to OldTarget to NewTarget.
static void hoistNonEntryAllocasToEntryBlock(llvm::BasicBlock &Block)
static std::unique_ptr< TargetMachine > createTargetMachine(Function *F, CodeGenOptLevel OptLevel)
Create the TargetMachine object to query the backend for optimization preferences.
static FunctionCallee getKmpcForStaticInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
static void addAccessGroupMetadata(BasicBlock *Block, MDNode *AccessGroup, LoopInfo &LI)
Attach llvm.access.group metadata to the memref instructions of Block.
static void addBasicBlockMetadata(BasicBlock *BB, ArrayRef< Metadata * > Properties)
Attach metadata Properties to the basic block described by BB.
static void restoreIPandDebugLoc(llvm::IRBuilderBase &Builder, llvm::IRBuilderBase::InsertPoint IP)
This is wrapper over IRBuilderBase::restoreIP that also restores the current debug location to the la...
static LoadInst * loadSharedDataFromTaskDescriptor(OpenMPIRBuilder &OMPIRBuilder, IRBuilderBase &Builder, Value *TaskWithPrivates, Type *TaskWithPrivatesTy)
Given a task descriptor, TaskWithPrivates, return the pointer to the block of pointers containing sha...
static cl::opt< bool > OptimisticAttributes("openmp-ir-builder-optimistic-attributes", cl::Hidden, cl::desc("Use optimistic attributes describing " "'as-if' properties of runtime calls."), cl::init(false))
static FunctionCallee getKmpcForStaticLoopForType(Type *Ty, OpenMPIRBuilder *OMPBuilder, WorksharingLoopType LoopType)
static void emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, OpenMPIRBuilder::InsertPointTy AllocaIP, OpenMPIRBuilder::TargetDataInfo &Info, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, const OpenMPIRBuilder::TargetKernelRuntimeAttrs &RuntimeAttrs, Value *IfCond, Function *OutlinedFn, Constant *OutlinedFnID, SmallVectorImpl< Value * > &Args, OpenMPIRBuilder::GenMapInfoCallbackTy GenMapInfoCB, OpenMPIRBuilder::CustomMapperCallbackTy CustomMapperCB, const SmallVector< llvm::OpenMPIRBuilder::DependData > &Dependencies, bool HasNoWait, Value *DynCGroupMem, OMPDynGroupprivateFallbackType DynCGroupMemFallback)
static const omp::GV & getGridValue(const Triple &T, Function *Kernel)
static Function * emitTargetTaskProxyFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, CallInst *StaleCI, StructType *PrivatesTy, StructType *TaskWithPrivatesTy, const size_t NumOffloadingArrays, const int SharedArgsOperandNo)
Create an entry point for a target task with the following.
static void addLoopMetadata(CanonicalLoopInfo *Loop, ArrayRef< Metadata * > Properties)
Attach loop metadata Properties to the loop described by Loop.
static void removeUnusedBlocksFromParent(ArrayRef< BasicBlock * > BBs)
Determine which blocks in BBs are reachable from outside and remove the ones that are not reachable f...
static void targetParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn, Function *OuterFn, BasicBlock *OuterAllocaBB, Value *Ident, Value *IfCondition, Value *NumThreads, Instruction *PrivTID, AllocaInst *PrivTIDAddr, Value *ThreadID, const SmallVector< Instruction *, 4 > &ToBeDeleted)
static void hostParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn, Function *OuterFn, Value *Ident, Value *IfCondition, Instruction *PrivTID, AllocaInst *PrivTIDAddr, const SmallVector< Instruction *, 4 > &ToBeDeleted)
FunctionAnalysisManager FAM
This file defines the Pass Instrumentation classes that provide instrumentation points into the pass ...
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
unsigned unsigned DefaultVal
std::unordered_set< BasicBlock * > BlockSet
This file implements the SmallBitVector class.
This file defines the SmallSet class.
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Defines the virtual file system interface vfs::FileSystem.
static cl::opt< unsigned > MaxThreads("xcore-max-threads", cl::Optional, cl::desc("Maximum number of threads (for emulation thread-local storage)"), cl::Hidden, cl::value_desc("number"), cl::init(8))
static const uint32_t IV[8]
Class for arbitrary precision integers.
This class represents a conversion between pointers from one address space to another.
an instruction to allocate memory on the stack
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
PointerType * getType() const
Overload to return most specific pointer type.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
unsigned getAddressSpace() const
Return the address space for the allocation.
LLVM_ABI std::optional< TypeSize > getAllocationSize(const DataLayout &DL) const
Get allocation size in bytes.
LLVM_ABI bool isArrayAllocation() const
Return true if there is an allocation size parameter to the allocation instruction that is not 1.
void setAlignment(Align Align)
const Value * getArraySize() const
Get the number of elements allocated.
bool registerPass(PassBuilderT &&PassBuilder)
Register an analysis pass with the manager.
This class represents an incoming formal argument to a Function.
unsigned getArgNo() const
Return the index of this formal argument in its containing function.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
Class to represent array types.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
A function analysis which provides an AssumptionCache.
LLVM_ABI AssumptionCache run(Function &F, FunctionAnalysisManager &)
A cache of @llvm.assume calls within a function.
An instruction that atomically checks whether a specified value is in a memory location,...
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
LLVM_ABI std::pair< LoadInst *, AllocaInst * > EmitAtomicLoadLibcall(AtomicOrdering AO)
LLVM_ABI void EmitAtomicStoreLibcall(AtomicOrdering AO, Value *Source)
an instruction that atomically reads a memory location, combines it with another value,...
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ USubCond
Subtract only if no unsigned overflow.
@ FMinimum
*p = minimum(old, v) minimum matches the behavior of llvm.minimum.
@ Min
*p = old <signed v ? old : v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ FMaximum
*p = maximum(old, v) maximum matches the behavior of llvm.maximum.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ UMax
*p = old >unsigned v ? old : v
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
This class holds the attributes for a particular argument, parameter, function, or return value.
LLVM_ABI AttributeSet addAttributes(LLVMContext &C, AttributeSet AS) const
Add attributes to the attribute set.
LLVM_ABI AttributeSet addAttribute(LLVMContext &C, Attribute::AttrKind Kind) const
Add an argument attribute.
LLVM Basic Block Representation.
LLVM_ABI void replaceSuccessorsPhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block's successors to refer to basic block New instead of basic bl...
iterator begin()
Instruction iterator methods.
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
reverse_iterator rbegin()
const Instruction & back() const
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
LLVM_ABI InstListType::const_iterator getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
LLVM_ABI const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Instruction & front() const
InstListType::reverse_iterator reverse_iterator
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
LLVM_ABI SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
InstListType::iterator iterator
Instruction iterators...
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
LLVM_ABI void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Conditional or Unconditional Branch instruction.
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Value * getArgOperand(unsigned i) const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
Class to represented the control flow structure of an OpenMP canonical loop.
Value * getTripCount() const
Returns the llvm::Value containing the number of loop iterations.
BasicBlock * getHeader() const
The header is the entry for each iteration.
LLVM_ABI void assertOK() const
Consistency self-check.
Type * getIndVarType() const
Return the type of the induction variable (and the trip count).
BasicBlock * getBody() const
The body block is the single entry for a loop iteration and not controlled by CanonicalLoopInfo.
bool isValid() const
Returns whether this object currently represents the IR of a loop.
void setLastIter(Value *IterVar)
Sets the last iteration variable for this loop.
OpenMPIRBuilder::InsertPointTy getAfterIP() const
Return the insertion point for user code after the loop.
OpenMPIRBuilder::InsertPointTy getBodyIP() const
Return the insertion point for user code in the body.
BasicBlock * getAfter() const
The after block is intended for clean-up code such as lifetime end markers.
Function * getFunction() const
LLVM_ABI void invalidate()
Invalidate this loop.
BasicBlock * getLatch() const
Reaching the latch indicates the end of the loop body code.
OpenMPIRBuilder::InsertPointTy getPreheaderIP() const
Return the insertion point for user code before the loop.
BasicBlock * getCond() const
The condition block computes whether there is another loop iteration.
BasicBlock * getExit() const
Reaching the exit indicates no more iterations are being executed.
LLVM_ABI BasicBlock * getPreheader() const
The preheader ensures that there is only a single edge entering the loop.
Instruction * getIndVar() const
Returns the instruction representing the current logical induction variable.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ ICMP_ULT
unsigned less than
@ ICMP_ULE
unsigned less or equal
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static LLVM_ABI Constant * getString(LLVMContext &Context, StringRef Initializer, bool AddNull=true)
This method constructs a CDS and initializes it with a text string.
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getTruncOrBitCast(Constant *C, Type *Ty)
static LLVM_ABI Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
static LLVM_ABI Constant * getSizeOf(Type *Ty)
getSizeOf constant expr - computes the (alloc) size of a type (in address-units, not bits) in a targe...
static LLVM_ABI Constant * getAddrSpaceCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
This is an important base class in LLVM.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
DILocalScope * getScope() const
Get the local scope for this variable.
DINodeArray getAnnotations() const
Subprogram description. Uses SubclassData1.
uint32_t getAlignInBits() const
StringRef getName() const
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Record of a variable value-assignment, aka a non instruction representation of the dbg....
Analysis pass which computes a DominatorTree.
LLVM_ABI DominatorTree run(Function &F, FunctionAnalysisManager &)
Run the analysis pass over a function and produce a dominator tree.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Represents either an error or a value T.
Lightweight error class with error context and mandatory checking.
static ErrorSuccess success()
Create a success value.
Tagged union holding either a T or a Error.
Error takeError()
Take ownership of the stored error.
reference get()
Returns a reference to the stored T value.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Class to represent function types.
Type * getParamType(unsigned i) const
Parameter type accessors.
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
void addFnAttr(Attribute::AttrKind Kind)
Add function attributes to this function.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
const BasicBlock & getEntryBlock() const
FunctionType * getFunctionType() const
Returns the FunctionType for me.
void removeFromParent()
removeFromParent - This method unlinks 'this' from the containing module, but does not delete it.
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
AttributeList getAttributes() const
Return the attribute list for this Function.
const Function & getFunction() const
void setAttributes(AttributeList Attrs)
Set the attribute list for this Function.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
adds the attribute to the list of attributes for the given arg.
Function::iterator insert(Function::iterator Position, BasicBlock *BB)
Insert BB in the basic block list at Position.
Type * getReturnType() const
Returns the type of the ret val.
void setCallingConv(CallingConv::ID CC)
Argument * getArg(unsigned i) const
bool hasMetadata() const
Return true if this value has any metadata attached to it.
LLVM_ABI void addMetadata(unsigned KindID, MDNode &MD)
Add a metadata attachment.
LinkageTypes getLinkage() const
void setLinkage(LinkageTypes LT)
Module * getParent()
Get the module that this global value is contained inside of...
void setDSOLocal(bool Local)
PointerType * getType() const
Global values are always pointers.
@ HiddenVisibility
The GV is hidden.
@ ProtectedVisibility
The GV is protected.
void setVisibility(VisibilityTypes V)
LinkageTypes
An enumeration for the kinds of linkage for global values.
@ PrivateLinkage
Like Internal, but omit from symbol table.
@ CommonLinkage
Tentative definitions.
@ InternalLinkage
Rename collisions when linking (static functions).
@ WeakODRLinkage
Same, but only replaced by something equivalent.
@ WeakAnyLinkage
Keep one copy of named function when linking (weak)
@ AppendingLinkage
Special purpose, only applies to global arrays.
@ LinkOnceODRLinkage
Same, but only replaced by something equivalent.
Type * getValueType() const
InsertPoint - A saved insertion point.
BasicBlock * getBlock() const
bool isSet() const
Returns true if this insert point is set.
BasicBlock::iterator getPoint() const
Common base class shared among various IRBuilders.
InsertPoint saveIP() const
Returns the current insert point.
void restoreIP(InsertPoint IP)
Sets the current insert point to a previously-saved location.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
LLVM_ABI const DebugLoc & getStableDebugLoc() const
Fetch the debug location for this node, unless this is a debug intrinsic, in which case fetch the deb...
LLVM_ABI void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI void moveBeforePreserving(InstListType::iterator MovePos)
Perform a moveBefore operation, while signalling that the caller intends to preserve the original ord...
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void insertAfter(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately after the specified instruction.
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Value * getPointerOperand()
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
Align getAlign() const
Return the alignment of the access that is being performed.
Analysis pass that exposes the LoopInfo for a function.
LLVM_ABI LoopInfo run(Function &F, FunctionAnalysisManager &AM)
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
This class represents a loop nest and can be used to query its properties.
Represents a single loop in the control flow graph.
LLVM_ABI MDNode * createCallbackEncoding(unsigned CalleeArgNo, ArrayRef< int > Arguments, bool VarArgsArePassed)
Return metadata describing a callback (see llvm::AbstractCallSite).
LLVM_ABI void replaceOperandWith(unsigned I, Metadata *New)
Replace a specific operand.
static MDTuple * getDistinct(LLVMContext &Context, ArrayRef< Metadata * > MDs)
ArrayRef< MDOperand > operands() const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
This class implements a map that also provides access to all stored values in a deterministic order.
A Module instance is used to store all the information related to an LLVM module.
LLVMContext & getContext() const
Get the global data context.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
iterator_range< op_iterator > operands()
LLVM_ABI void addOperand(MDNode *M)
Device global variable entries info.
Target region entries info.
Base class of the entries info.
Class that manages information about offload code regions and data.
function_ref< void(StringRef, const OffloadEntryInfoDeviceGlobalVar &)> OffloadDeviceGlobalVarEntryInfoActTy
Applies action Action on all registered entries.
OMPTargetDeviceClauseKind
Kind of device clause for declare target variables and functions NOTE: Currently not used as a part o...
@ OMPTargetDeviceClauseAny
The target is marked for all devices.
LLVM_ABI void registerDeviceGlobalVarEntryInfo(StringRef VarName, Constant *Addr, int64_t VarSize, OMPTargetGlobalVarEntryKind Flags, GlobalValue::LinkageTypes Linkage)
Register device global variable entry.
LLVM_ABI void initializeDeviceGlobalVarEntryInfo(StringRef Name, OMPTargetGlobalVarEntryKind Flags, unsigned Order)
Initialize device global variable entry.
LLVM_ABI void actOnDeviceGlobalVarEntriesInfo(const OffloadDeviceGlobalVarEntryInfoActTy &Action)
OMPTargetRegionEntryKind
Kind of the target registry entry.
@ OMPTargetRegionEntryTargetRegion
Mark the entry as target region.
LLVM_ABI void getTargetRegionEntryFnName(SmallVectorImpl< char > &Name, const TargetRegionEntryInfo &EntryInfo)
LLVM_ABI bool hasTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, bool IgnoreAddressId=false) const
Return true if a target region entry with the provided information exists.
LLVM_ABI void registerTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, Constant *Addr, Constant *ID, OMPTargetRegionEntryKind Flags)
Register target region entry.
LLVM_ABI void actOnTargetRegionEntriesInfo(const OffloadTargetRegionEntryInfoActTy &Action)
LLVM_ABI void initializeTargetRegionEntryInfo(const TargetRegionEntryInfo &EntryInfo, unsigned Order)
Initialize target region entry.
OMPTargetGlobalVarEntryKind
Kind of the global variable entry..
@ OMPTargetGlobalVarEntryEnter
Mark the entry as a declare target enter.
@ OMPTargetGlobalRegisterRequires
Mark the entry as a register requires global.
@ OMPTargetGlobalVarEntryIndirect
Mark the entry as a declare target indirect global.
@ OMPTargetGlobalVarEntryLink
Mark the entry as a to declare target link.
@ OMPTargetGlobalVarEntryTo
Mark the entry as a to declare target.
@ OMPTargetGlobalVarEntryIndirectVTable
Mark the entry as a declare target indirect vtable.
function_ref< void(const TargetRegionEntryInfo &EntryInfo, const OffloadEntryInfoTargetRegion &)> OffloadTargetRegionEntryInfoActTy
brief Applies action Action on all registered entries.
bool hasDeviceGlobalVarEntryInfo(StringRef VarName) const
Checks if the variable with the given name has been registered already.
LLVM_ABI bool empty() const
Return true if a there are no entries defined.
std::optional< bool > IsTargetDevice
Flag to define whether to generate code for the role of the OpenMP host (if set to false) or device (...
std::optional< bool > IsGPU
Flag for specifying if the compilation is done for an accelerator.
LLVM_ABI int64_t getRequiresFlags() const
Returns requires directive clauses as flags compatible with those expected by libomptarget.
std::optional< bool > OpenMPOffloadMandatory
Flag for specifying if offloading is mandatory.
LLVM_ABI void setHasRequiresReverseOffload(bool Value)
LLVM_ABI OpenMPIRBuilderConfig()
LLVM_ABI bool hasRequiresUnifiedSharedMemory() const
LLVM_ABI void setHasRequiresUnifiedSharedMemory(bool Value)
unsigned getDefaultTargetAS() const
LLVM_ABI bool hasRequiresDynamicAllocators() const
LLVM_ABI void setHasRequiresUnifiedAddress(bool Value)
bool isTargetDevice() const
LLVM_ABI void setHasRequiresDynamicAllocators(bool Value)
LLVM_ABI bool hasRequiresReverseOffload() const
bool hasRequiresFlags() const
LLVM_ABI bool hasRequiresUnifiedAddress() const
Struct that keeps the information that should be kept throughout a 'target data' region.
An interface to create LLVM-IR for OpenMP directives.
LLVM_ABI InsertPointOrErrorTy createOrderedThreadsSimd(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool IsThreads)
Generator for 'omp ordered [threads | simd]'.
LLVM_ABI Constant * getOrCreateIdent(Constant *SrcLocStr, uint32_t SrcLocStrSize, omp::IdentFlag Flags=omp::IdentFlag(0), unsigned Reserve2Flags=0)
Return an ident_t* encoding the source location SrcLocStr and Flags.
LLVM_ABI FunctionCallee getOrCreateRuntimeFunction(Module &M, omp::RuntimeFunction FnID)
Return the function declaration for the runtime function with FnID.
LLVM_ABI InsertPointOrErrorTy createCancel(const LocationDescription &Loc, Value *IfCondition, omp::Directive CanceledDirective)
Generator for 'omp cancel'.
std::function< Expected< Function * >(StringRef FunctionName)> FunctionGenCallback
Functions used to generate a function with the given name.
ReductionGenCBKind
Enum class for the RedctionGen CallBack type to be used.
LLVM_ABI CanonicalLoopInfo * collapseLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops, InsertPointTy ComputeIP)
Collapse a loop nest into a single loop.
LLVM_ABI InsertPointOrErrorTy createTask(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, bool Tied=true, Value *Final=nullptr, Value *IfCondition=nullptr, SmallVector< DependData > Dependencies={}, bool Mergeable=false, Value *EventHandle=nullptr, Value *Priority=nullptr)
Generator for #omp taskloop
LLVM_ABI void createTaskyield(const LocationDescription &Loc)
Generator for 'omp taskyield'.
std::function< Error(InsertPointTy CodeGenIP)> FinalizeCallbackTy
Callback type for variable finalization (think destructors).
LLVM_ABI void emitBranch(BasicBlock *Target)
LLVM_ABI Error emitCancelationCheckImpl(Value *CancelFlag, omp::Directive CanceledDirective)
Generate control flow and cleanup for cancellation.
static LLVM_ABI void writeThreadBoundsForKernel(const Triple &T, Function &Kernel, int32_t LB, int32_t UB)
LLVM_ABI void emitTaskwaitImpl(const LocationDescription &Loc)
Generate a taskwait runtime call.
LLVM_ABI Constant * registerTargetRegionFunction(TargetRegionEntryInfo &EntryInfo, Function *OutlinedFunction, StringRef EntryFnName, StringRef EntryFnIDName)
Registers the given function and sets up the attribtues of the function Returns the FunctionID.
LLVM_ABI GlobalVariable * emitKernelExecutionMode(StringRef KernelName, omp::OMPTgtExecModeFlags Mode)
Emit the kernel execution mode.
LLVM_ABI InsertPointOrErrorTy createDistribute(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB)
Generator for #omp distribute
LLVM_ABI void initialize()
Initialize the internal state, this will put structures types and potentially other helpers into the ...
LLVM_ABI void createTargetDeinit(const LocationDescription &Loc, int32_t TeamsReductionDataSize=0, int32_t TeamsReductionBufferLength=1024)
Create a runtime call for kmpc_target_deinit.
LLVM_ABI InsertPointOrErrorTy createTaskgroup(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB)
Generator for the taskgroup construct.
LLVM_ABI InsertPointTy createAtomicWrite(const LocationDescription &Loc, AtomicOpValue &X, Value *Expr, AtomicOrdering AO, InsertPointTy AllocaIP)
Emit atomic write for : X = Expr — Only Scalar data types.
LLVM_ABI void loadOffloadInfoMetadata(Module &M)
Loads all the offload entries information from the host IR metadata.
function_ref< MapInfosTy &(InsertPointTy CodeGenIP)> GenMapInfoCallbackTy
Callback type for creating the map infos for the kernel parameters.
LLVM_ABI Error emitOffloadingArrays(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo, TargetDataInfo &Info, CustomMapperCallbackTy CustomMapperCB, bool IsNonContiguous=false, function_ref< void(unsigned int, Value *)> DeviceAddrCB=nullptr)
Emit the arrays used to pass the captures and map information to the offloading runtime library.
LLVM_ABI void unrollLoopFull(DebugLoc DL, CanonicalLoopInfo *Loop)
Fully unroll a loop.
function_ref< Error(InsertPointTy CodeGenIP, Value *IndVar)> LoopBodyGenCallbackTy
Callback type for loop body code generation.
LLVM_ABI InsertPointOrErrorTy emitScanReduction(const LocationDescription &Loc, ArrayRef< llvm::OpenMPIRBuilder::ReductionInfo > ReductionInfos, ScanInfo *ScanRedInfo)
This function performs the scan reduction of the values updated in the input phase.
LLVM_ABI void emitFlush(const LocationDescription &Loc)
Generate a flush runtime call.
static LLVM_ABI std::pair< int32_t, int32_t > readThreadBoundsForKernel(const Triple &T, Function &Kernel)
}
OpenMPIRBuilderConfig Config
The OpenMPIRBuilder Configuration.
LLVM_ABI CallInst * createOMPInteropDestroy(const LocationDescription &Loc, Value *InteropVar, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_destroy.
LLVM_ABI Error emitIfClause(Value *Cond, BodyGenCallbackTy ThenGen, BodyGenCallbackTy ElseGen, InsertPointTy AllocaIP={})
Emits code for OpenMP 'if' clause using specified BodyGenCallbackTy Here is the logic: if (Cond) { Th...
function_ref< InsertPointOrErrorTy( Argument &Arg, Value *Input, Value *&RetVal, InsertPointTy AllocaIP, InsertPointTy CodeGenIP)> TargetGenArgAccessorsCallbackTy
LLVM_ABI void emitUsed(StringRef Name, ArrayRef< llvm::WeakTrackingVH > List)
Emit the llvm.used metadata.
LLVM_ABI InsertPointOrErrorTy createSingle(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool IsNowait, ArrayRef< llvm::Value * > CPVars={}, ArrayRef< llvm::Function * > CPFuncs={})
Generator for 'omp single'.
LLVM_ABI InsertPointOrErrorTy createTeams(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, Value *NumTeamsLower=nullptr, Value *NumTeamsUpper=nullptr, Value *ThreadLimit=nullptr, Value *IfExpr=nullptr)
Generator for #omp teams
std::forward_list< CanonicalLoopInfo > LoopInfos
Collection of owned canonical loop objects that eventually need to be free'd.
LLVM_ABI void createTaskwait(const LocationDescription &Loc)
Generator for 'omp taskwait'.
LLVM_ABI CanonicalLoopInfo * createLoopSkeleton(DebugLoc DL, Value *TripCount, Function *F, BasicBlock *PreInsertBefore, BasicBlock *PostInsertBefore, const Twine &Name={})
Create the control flow structure of a canonical OpenMP loop.
LLVM_ABI std::string createPlatformSpecificName(ArrayRef< StringRef > Parts) const
Get the create a name using the platform specific separators.
LLVM_ABI FunctionCallee createDispatchNextFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_next_* runtime function for the specified size IVSize and sign IVSigned.
static LLVM_ABI void getKernelArgsVector(TargetKernelArgs &KernelArgs, IRBuilderBase &Builder, SmallVector< Value * > &ArgsVector)
Create the kernel args vector used by emitTargetKernel.
function_ref< Error(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)> BodyGenCallbackTy
Callback type for body (=inner region) code generation.
LLVM_ABI void unrollLoopHeuristic(DebugLoc DL, CanonicalLoopInfo *Loop)
Fully or partially unroll a loop.
LLVM_ABI InsertPointOrErrorTy createParallel(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads, omp::ProcBindKind ProcBind, bool IsCancellable)
Generator for 'omp parallel'.
LLVM_ABI omp::OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position)
Get OMP_MAP_MEMBER_OF flag with extra bits reserved based on the position given.
LLVM_ABI void addAttributes(omp::RuntimeFunction FnID, Function &Fn)
Add attributes known for FnID to Fn.
Module & M
The underlying LLVM-IR module.
StringMap< Constant * > SrcLocStrMap
Map to remember source location strings.
LLVM_ABI void createMapperAllocas(const LocationDescription &Loc, InsertPointTy AllocaIP, unsigned NumOperands, struct MapperAllocas &MapperAllocas)
Create the allocas instruction used in call to mapper functions.
LLVM_ABI Constant * getOrCreateSrcLocStr(StringRef LocStr, uint32_t &SrcLocStrSize)
Return the (LLVM-IR) string describing the source location LocStr.
void addOutlineInfo(OutlineInfo &&OI)
Add a new region that will be outlined later.
LLVM_ABI Error emitTargetRegionFunction(TargetRegionEntryInfo &EntryInfo, FunctionGenCallback &GenerateFunctionCallback, bool IsOffloadEntry, Function *&OutlinedFn, Constant *&OutlinedFnID)
Create a unique name for the entry function using the source location information of the current targ...
LLVM_ABI Expected< SmallVector< llvm::CanonicalLoopInfo * > > createCanonicalScanLoops(const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop, InsertPointTy ComputeIP, const Twine &Name, ScanInfo *ScanRedInfo)
Generator for the control flow structure of an OpenMP canonical loops if the parent directive has an ...
LLVM_ABI FunctionCallee createDispatchFiniFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_fini_* runtime function for the specified size IVSize and sign IVSigned.
LLVM_ABI void unrollLoopPartial(DebugLoc DL, CanonicalLoopInfo *Loop, int32_t Factor, CanonicalLoopInfo **UnrolledCLI)
Partially unroll a loop.
function_ref< Error(Value *DeviceID, Value *RTLoc, IRBuilderBase::InsertPoint TargetTaskAllocaIP)> TargetTaskBodyCallbackTy
Callback type for generating the bodies of device directives that require outer target tasks (e....
Expected< MapInfosTy & > MapInfosOrErrorTy
LLVM_ABI void emitTaskyieldImpl(const LocationDescription &Loc)
Generate a taskyield runtime call.
LLVM_ABI void emitMapperCall(const LocationDescription &Loc, Function *MapperFunc, Value *SrcLocInfo, Value *MaptypesArg, Value *MapnamesArg, struct MapperAllocas &MapperAllocas, int64_t DeviceID, unsigned NumOperands)
Create the call for the target mapper function.
function_ref< Expected< Function * >(unsigned int)> CustomMapperCallbackTy
LLVM_ABI InsertPointTy createAtomicCompare(const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V, AtomicOpValue &R, Value *E, Value *D, AtomicOrdering AO, omp::OMPAtomicCompareOp Op, bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly)
Emit atomic compare for constructs: — Only scalar data types cond-expr-stmt: x = x ordop expr ?
LLVM_ABI InsertPointTy createOrderedDepend(const LocationDescription &Loc, InsertPointTy AllocaIP, unsigned NumLoops, ArrayRef< llvm::Value * > StoreValues, const Twine &Name, bool IsDependSource)
Generator for 'omp ordered depend (source | sink)'.
LLVM_ABI InsertPointTy createCopyinClauseBlocks(InsertPointTy IP, Value *MasterAddr, Value *PrivateAddr, llvm::IntegerType *IntPtrTy, bool BranchtoEnd=true)
Generate conditional branch and relevant BasicBlocks through which private threads copy the 'copyin' ...
function_ref< InsertPointOrErrorTy( InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &Original, Value &Inner, Value *&ReplVal)> PrivatizeCallbackTy
Callback type for variable privatization (think copy & default constructor).
LLVM_ABI bool isFinalized()
Check whether the finalize function has already run.
function_ref< InsertPointOrErrorTy( InsertPointTy AllocaIP, InsertPointTy CodeGenIP)> TargetBodyGenCallbackTy
SmallVector< FinalizationInfo, 8 > FinalizationStack
The finalization stack made up of finalize callbacks currently in-flight, wrapped into FinalizationIn...
LLVM_ABI std::vector< CanonicalLoopInfo * > tileLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops, ArrayRef< Value * > TileSizes)
Tile a loop nest.
LLVM_ABI CallInst * createOMPInteropInit(const LocationDescription &Loc, Value *InteropVar, omp::OMPInteropType InteropType, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_init.
LLVM_ABI void finalize(Function *Fn=nullptr)
Finalize the underlying module, e.g., by outlining regions.
SmallVector< OutlineInfo, 16 > OutlineInfos
Collection of regions that need to be outlined during finalization.
LLVM_ABI Function * getOrCreateRuntimeFunctionPtr(omp::RuntimeFunction FnID)
LLVM_ABI InsertPointTy createTargetInit(const LocationDescription &Loc, const llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs)
The omp target interface.
LLVM_ABI InsertPointOrErrorTy createReductions(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< ReductionInfo > ReductionInfos, ArrayRef< bool > IsByRef, bool IsNoWait=false, bool IsTeamsReduction=false)
Generator for 'omp reduction'.
const Triple T
The target triple of the underlying module.
DenseMap< std::pair< Constant *, uint64_t >, Constant * > IdentMap
Map to remember existing ident_t*.
LLVM_ABI CallInst * createOMPFree(const LocationDescription &Loc, Value *Addr, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_free.
LLVM_ABI FunctionCallee createForStaticInitFunction(unsigned IVSize, bool IVSigned, bool IsGPUDistribute)
Returns __kmpc_for_static_init_* runtime function for the specified size IVSize and sign IVSigned.
LLVM_ABI CallInst * createOMPAlloc(const LocationDescription &Loc, Value *Size, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_Alloc.
LLVM_ABI void emitNonContiguousDescriptor(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo, TargetDataInfo &Info)
Emit an array of struct descriptors to be assigned to the offload args.
LLVM_ABI InsertPointOrErrorTy createSection(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB)
Generator for 'omp section'.
function_ref< InsertPointOrErrorTy(InsertPointTy)> EmitFallbackCallbackTy
Callback function type for functions emitting the host fallback code that is executed when the kernel...
static LLVM_ABI TargetRegionEntryInfo getTargetEntryUniqueInfo(FileIdentifierInfoCallbackTy CallBack, vfs::FileSystem &VFS, StringRef ParentName="")
Creates a unique info for a target entry when provided a filename and line number from.
LLVM_ABI void emitBlock(BasicBlock *BB, Function *CurFn, bool IsFinished=false)
LLVM_ABI Value * getOrCreateThreadID(Value *Ident)
Return the current thread ID.
LLVM_ABI InsertPointOrErrorTy createMaster(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB)
Generator for 'omp master'.
LLVM_ABI InsertPointOrErrorTy createTargetData(const LocationDescription &Loc, InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value *DeviceID, Value *IfCond, TargetDataInfo &Info, GenMapInfoCallbackTy GenMapInfoCB, CustomMapperCallbackTy CustomMapperCB, omp::RuntimeFunction *MapperFunc=nullptr, function_ref< InsertPointOrErrorTy(InsertPointTy CodeGenIP, BodyGenTy BodyGenType)> BodyGenCB=nullptr, function_ref< void(unsigned int, Value *)> DeviceAddrCB=nullptr, Value *SrcLocInfo=nullptr)
Generator for 'omp target data'.
CallInst * createRuntimeFunctionCall(FunctionCallee Callee, ArrayRef< Value * > Args, StringRef Name="")
LLVM_ABI InsertPointOrErrorTy emitKernelLaunch(const LocationDescription &Loc, Value *OutlinedFnID, EmitFallbackCallbackTy EmitTargetCallFallbackCB, TargetKernelArgs &Args, Value *DeviceID, Value *RTLoc, InsertPointTy AllocaIP)
Generate a target region entry call and host fallback call.
StringMap< GlobalVariable *, BumpPtrAllocator > InternalVars
An ordered map of auto-generated variables to their unique names.
LLVM_ABI InsertPointOrErrorTy createCancellationPoint(const LocationDescription &Loc, omp::Directive CanceledDirective)
Generator for 'omp cancellation point'.
LLVM_ABI FunctionCallee createDispatchInitFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_init_* runtime function for the specified size IVSize and sign IVSigned.
LLVM_ABI InsertPointOrErrorTy createScan(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< llvm::Value * > ScanVars, ArrayRef< llvm::Type * > ScanVarsType, bool IsInclusive, ScanInfo *ScanRedInfo)
This directive split and directs the control flow to input phase blocks or scan phase blocks based on...
LLVM_ABI CallInst * createOMPInteropUse(const LocationDescription &Loc, Value *InteropVar, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_use.
IRBuilder<>::InsertPoint InsertPointTy
Type used throughout for insertion points.
LLVM_ABI GlobalVariable * getOrCreateInternalVariable(Type *Ty, const StringRef &Name, std::optional< unsigned > AddressSpace={})
Gets (if variable with the given name already exist) or creates internal global variable with the spe...
LLVM_ABI GlobalVariable * createOffloadMapnames(SmallVectorImpl< llvm::Constant * > &Names, std::string VarName)
Create the global variable holding the offload names information.
std::forward_list< ScanInfo > ScanInfos
Collection of owned ScanInfo objects that eventually need to be free'd.
static LLVM_ABI void writeTeamsForKernel(const Triple &T, Function &Kernel, int32_t LB, int32_t UB)
LLVM_ABI Value * calculateCanonicalLoopTripCount(const LocationDescription &Loc, Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop, const Twine &Name="loop")
Calculate the trip count of a canonical loop.
LLVM_ABI InsertPointOrErrorTy createBarrier(const LocationDescription &Loc, omp::Directive Kind, bool ForceSimpleCall=false, bool CheckCancelFlag=true)
Emitter methods for OpenMP directives.
LLVM_ABI void setCorrectMemberOfFlag(omp::OpenMPOffloadMappingFlags &Flags, omp::OpenMPOffloadMappingFlags MemberOfFlag)
Given an initial flag set, this function modifies it to contain the passed in MemberOfFlag generated ...
LLVM_ABI Error emitOffloadingArraysAndArgs(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info, TargetDataRTArgs &RTArgs, MapInfosTy &CombinedInfo, CustomMapperCallbackTy CustomMapperCB, bool IsNonContiguous=false, bool ForEndCall=false, function_ref< void(unsigned int, Value *)> DeviceAddrCB=nullptr)
Allocates memory for and populates the arrays required for offloading (offload_{baseptrs|ptrs|mappers...
LLVM_ABI Constant * getOrCreateDefaultSrcLocStr(uint32_t &SrcLocStrSize)
Return the (LLVM-IR) string describing the default source location.
LLVM_ABI InsertPointOrErrorTy createCritical(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, StringRef CriticalName, Value *HintInst)
Generator for 'omp critical'.
LLVM_ABI void createOffloadEntry(Constant *ID, Constant *Addr, uint64_t Size, int32_t Flags, GlobalValue::LinkageTypes, StringRef Name="")
Creates offloading entry for the provided entry ID ID, address Addr, size Size, and flags Flags.
static LLVM_ABI unsigned getOpenMPDefaultSimdAlign(const Triple &TargetTriple, const StringMap< bool > &Features)
Get the default alignment value for given target.
LLVM_ABI unsigned getFlagMemberOffset()
Get the offset of the OMP_MAP_MEMBER_OF field.
LLVM_ABI InsertPointOrErrorTy applyWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, bool NeedsBarrier, llvm::omp::ScheduleKind SchedKind=llvm::omp::OMP_SCHEDULE_Default, Value *ChunkSize=nullptr, bool HasSimdModifier=false, bool HasMonotonicModifier=false, bool HasNonmonotonicModifier=false, bool HasOrderedClause=false, omp::WorksharingLoopType LoopType=omp::WorksharingLoopType::ForStaticLoop, bool NoLoop=false, bool HasDistSchedule=false, Value *DistScheduleChunkSize=nullptr)
Modifies the canonical loop to be a workshare loop.
LLVM_ABI InsertPointOrErrorTy createAtomicCapture(const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X, AtomicOpValue &V, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool UpdateExpr, bool IsPostfixUpdate, bool IsXBinopExpr, bool IsIgnoreDenormalMode=false, bool IsFineGrainedMemory=false, bool IsRemoteMemory=false)
Emit atomic update for constructs: — Only Scalar data types V = X; X = X BinOp Expr ,...
LLVM_ABI void createOffloadEntriesAndInfoMetadata(EmitMetadataErrorReportFunctionTy &ErrorReportFunction)
LLVM_ABI void applySimd(CanonicalLoopInfo *Loop, MapVector< Value *, Value * > AlignedVars, Value *IfCond, omp::OrderKind Order, ConstantInt *Simdlen, ConstantInt *Safelen)
Add metadata to simd-ize a loop.
LLVM_ABI InsertPointOrErrorTy createAtomicUpdate(const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool IsXBinopExpr, bool IsIgnoreDenormalMode=false, bool IsFineGrainedMemory=false, bool IsRemoteMemory=false)
Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X For complex Operations: X = ...
std::function< std::tuple< std::string, uint64_t >()> FileIdentifierInfoCallbackTy
bool isLastFinalizationInfoCancellable(omp::Directive DK)
Return true if the last entry in the finalization stack is of kind DK and cancellable.
LLVM_ABI InsertPointTy emitTargetKernel(const LocationDescription &Loc, InsertPointTy AllocaIP, Value *&Return, Value *Ident, Value *DeviceID, Value *NumTeams, Value *NumThreads, Value *HostPtr, ArrayRef< Value * > KernelArgs)
Generate a target region entry call.
LLVM_ABI GlobalVariable * createOffloadMaptypes(SmallVectorImpl< uint64_t > &Mappings, std::string VarName)
Create the global variable holding the offload mappings information.
LLVM_ABI ~OpenMPIRBuilder()
LLVM_ABI CallInst * createCachedThreadPrivate(const LocationDescription &Loc, llvm::Value *Pointer, llvm::ConstantInt *Size, const llvm::Twine &Name=Twine(""))
Create a runtime call for kmpc_threadprivate_cached.
IRBuilder Builder
The LLVM-IR Builder used to create IR.
LLVM_ABI InsertPointOrErrorTy createTarget(const LocationDescription &Loc, bool IsOffloadEntry, OpenMPIRBuilder::InsertPointTy AllocaIP, OpenMPIRBuilder::InsertPointTy CodeGenIP, TargetDataInfo &Info, TargetRegionEntryInfo &EntryInfo, const TargetKernelDefaultAttrs &DefaultAttrs, const TargetKernelRuntimeAttrs &RuntimeAttrs, Value *IfCond, SmallVectorImpl< Value * > &Inputs, GenMapInfoCallbackTy GenMapInfoCB, TargetBodyGenCallbackTy BodyGenCB, TargetGenArgAccessorsCallbackTy ArgAccessorFuncCB, CustomMapperCallbackTy CustomMapperCB, const SmallVector< DependData > &Dependencies, bool HasNowait=false, Value *DynCGroupMem=nullptr, omp::OMPDynGroupprivateFallbackType DynCGroupMemFallback=omp::OMPDynGroupprivateFallbackType::Abort)
Generator for 'omp target'.
LLVM_ABI GlobalValue * createGlobalFlag(unsigned Value, StringRef Name)
Create a hidden global flag Name in the module with initial value Value.
LLVM_ABI void emitOffloadingArraysArgument(IRBuilderBase &Builder, OpenMPIRBuilder::TargetDataRTArgs &RTArgs, OpenMPIRBuilder::TargetDataInfo &Info, bool ForEndCall=false)
Emit the arguments to be passed to the runtime library based on the arrays of base pointers,...
LLVM_ABI InsertPointOrErrorTy createMasked(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, Value *Filter)
Generator for 'omp masked'.
LLVM_ABI Expected< CanonicalLoopInfo * > createCanonicalLoop(const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, Value *TripCount, const Twine &Name="loop")
Generator for the control flow structure of an OpenMP canonical loop.
function_ref< Expected< InsertPointTy >( InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value *DestPtr, Value *SrcPtr)> TaskDupCallbackTy
Callback type for task duplication function code generation.
LLVM_ABI Value * getSizeInBytes(Value *BasePtr)
Computes the size of type in bytes.
LLVM_ABI InsertPointOrErrorTy createReductionsGPU(const LocationDescription &Loc, InsertPointTy AllocaIP, InsertPointTy CodeGenIP, ArrayRef< ReductionInfo > ReductionInfos, ArrayRef< bool > IsByRef, bool IsNoWait=false, bool IsTeamsReduction=false, ReductionGenCBKind ReductionGenCBKind=ReductionGenCBKind::MLIR, std::optional< omp::GV > GridValue={}, unsigned ReductionBufNum=1024, Value *SrcLocInfo=nullptr)
Design of OpenMP reductions on the GPU.
LLVM_ABI Expected< Function * > emitUserDefinedMapper(function_ref< MapInfosOrErrorTy(InsertPointTy CodeGenIP, llvm::Value *PtrPHI, llvm::Value *BeginArg)> PrivAndGenMapInfoCB, llvm::Type *ElemTy, StringRef FuncName, CustomMapperCallbackTy CustomMapperCB)
Emit the user-defined mapper function.
LLVM_ABI FunctionCallee createDispatchDeinitFunction()
Returns __kmpc_dispatch_deinit runtime function.
LLVM_ABI void registerTargetGlobalVariable(OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause, OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause, bool IsDeclaration, bool IsExternallyVisible, TargetRegionEntryInfo EntryInfo, StringRef MangledName, std::vector< GlobalVariable * > &GeneratedRefs, bool OpenMPSIMD, std::vector< Triple > TargetTriple, std::function< Constant *()> GlobalInitializer, std::function< GlobalValue::LinkageTypes()> VariableLinkage, Type *LlvmPtrTy, Constant *Addr)
Registers a target variable for device or host.
BodyGenTy
Type of BodyGen to use for region codegen.
SmallVector< llvm::Function *, 16 > ConstantAllocaRaiseCandidates
A collection of candidate target functions that's constant allocas will attempt to be raised on a cal...
OffloadEntriesInfoManager OffloadInfoManager
Info manager to keep track of target regions.
static LLVM_ABI std::pair< int32_t, int32_t > readTeamBoundsForKernel(const Triple &T, Function &Kernel)
Read/write a bounds on teams for Kernel.
const std::string ompOffloadInfoName
OMP Offload Info Metadata name string.
Expected< InsertPointTy > InsertPointOrErrorTy
Type used to represent an insertion point or an error value.
LLVM_ABI InsertPointTy createCopyPrivate(const LocationDescription &Loc, llvm::Value *BufSize, llvm::Value *CpyBuf, llvm::Value *CpyFn, llvm::Value *DidIt)
Generator for __kmpc_copyprivate.
LLVM_ABI InsertPointOrErrorTy createSections(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< StorableBodyGenCallbackTy > SectionCBs, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, bool IsCancellable, bool IsNowait)
Generator for 'omp sections'.
LLVM_ABI InsertPointOrErrorTy emitTargetTask(TargetTaskBodyCallbackTy TaskBodyCB, Value *DeviceID, Value *RTLoc, OpenMPIRBuilder::InsertPointTy AllocaIP, const SmallVector< llvm::OpenMPIRBuilder::DependData > &Dependencies, const TargetDataRTArgs &RTArgs, bool HasNoWait)
Generate a target-task for the target construct.
std::function< void(EmitMetadataErrorKind, TargetRegionEntryInfo)> EmitMetadataErrorReportFunctionTy
Callback function type.
LLVM_ABI Expected< ScanInfo * > scanInfoInitialize()
Creates a ScanInfo object, allocates and returns the pointer.
LLVM_ABI InsertPointTy createAtomicRead(const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V, AtomicOrdering AO, InsertPointTy AllocaIP)
Emit atomic Read for : V = X — Only Scalar data types.
bool updateToLocation(const LocationDescription &Loc)
Update the internal location to Loc.
LLVM_ABI void createFlush(const LocationDescription &Loc)
Generator for 'omp flush'.
LLVM_ABI Constant * getAddrOfDeclareTargetVar(OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause, OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause, bool IsDeclaration, bool IsExternallyVisible, TargetRegionEntryInfo EntryInfo, StringRef MangledName, std::vector< GlobalVariable * > &GeneratedRefs, bool OpenMPSIMD, std::vector< Triple > TargetTriple, Type *LlvmPtrTy, std::function< Constant *()> GlobalInitializer, std::function< GlobalValue::LinkageTypes()> VariableLinkage)
Retrieve (or create if non-existent) the address of a declare target variable, used in conjunction wi...
EmitMetadataErrorKind
The kind of errors that can occur when emitting the offload entries and metadata.
@ EMIT_MD_DECLARE_TARGET_ERROR
@ EMIT_MD_GLOBAL_VAR_INDIRECT_ERROR
@ EMIT_MD_GLOBAL_VAR_LINK_ERROR
@ EMIT_MD_TARGET_REGION_ERROR
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Pseudo-analysis pass that exposes the PassInstrumentation to pass managers.
Class to represent pointers.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
Analysis pass that exposes the ScalarEvolution for a function.
LLVM_ABI ScalarEvolution run(Function &F, FunctionAnalysisManager &AM)
The main scalar evolution driver.
ScanInfo holds the information to assist in lowering of Scan reduction.
llvm::SmallDenseMap< llvm::Value *, llvm::Value * > * ScanBuffPtrs
Maps the private reduction variable to the pointer of the temporary buffer.
llvm::BasicBlock * OMPScanLoopExit
Exit block of loop body.
llvm::Value * IV
Keeps track of value of iteration variable for input/scan loop to be used for Scan directive lowering...
llvm::BasicBlock * OMPAfterScanBlock
Dominates the body of the loop before scan directive.
llvm::BasicBlock * OMPScanInit
Block before loop body where scan initializations are done.
llvm::BasicBlock * OMPBeforeScanBlock
Dominates the body of the loop before scan directive.
llvm::BasicBlock * OMPScanFinish
Block after loop body where scan finalizations are done.
llvm::Value * Span
Stores the span of canonical loop being lowered to be used for temporary buffer allocation or Finaliz...
bool OMPFirstScanLoop
If true, it indicates Input phase is lowered; else it indicates ScanPhase is lowered.
llvm::BasicBlock * OMPScanDispatch
Controls the flow to before or after scan blocks.
A vector that has set insertion semantics.
bool remove_if(UnaryPredicate P)
Remove items from the set vector based on a predicate function.
bool empty() const
Determine if the SetVector is empty or not.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
bool test(unsigned Idx) const
bool all() const
Returns true if all bits are set.
bool any() const
Returns true if any bit is set.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
bool remove_if(UnaryPredicate P)
Remove elements that match the given predicate.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
void append(StringRef RHS)
Append from a StringRef.
StringRef str() const
Explicit conversion to StringRef.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAlignment(Align Align)
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
ValueTy lookup(StringRef Key) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
StringRef - Represent a constant reference to a string, i.e.
std::string str() const
str - Get the contents as an std::string.
constexpr bool empty() const
empty - Check if the string is empty.
constexpr size_t size() const
size - Get the string size.
size_t count(char C) const
Return the number of occurrences of C in the string.
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
StringRef drop_back(size_t N=1) const
Return a StringRef equal to 'this' but with the last N elements dropped.
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
static LLVM_ABI StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
Type * getElementType(unsigned N) const
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
Analysis pass providing the TargetTransformInfo.
LLVM_ABI Result run(const Function &F, FunctionAnalysisManager &)
TargetTransformInfo Result
Analysis pass providing the TargetLibraryInfo.
Target - Wrapper for Target specific information.
TargetMachine * createTargetMachine(const Triple &TT, StringRef CPU, StringRef Features, const TargetOptions &Options, std::optional< Reloc::Model > RM, std::optional< CodeModel::Model > CM=std::nullopt, CodeGenOptLevel OL=CodeGenOptLevel::Default, bool JIT=false) const
createTargetMachine - Create a target specific machine implementation for the specified Triple.
Triple - Helper class for working with autoconf configuration names.
bool isPPC() const
Tests whether the target is PowerPC (32- or 64-bit LE or BE).
bool isX86() const
Tests whether the target is x86 (32- or 64-bit).
bool isWasm() const
Tests whether the target is wasm (32- and 64-bit).
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
LLVM_ABI unsigned getIntegerBitWidth() const
LLVM_ABI Type * getStructElementType(unsigned N) const
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
bool isStructTy() const
True if this is an instance of StructType.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
bool isVoidTy() const
Return true if this is 'void'.
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
This function has undefined behavior.
Produce an estimate of the unrolled cost of the specified loop.
LLVM_ABI bool canUnroll() const
Whether it is legal to unroll this loop.
uint64_t getRolledLoopSize() const
A Use represents the edge between a Value definition and its users.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
LLVM_ABI bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
LLVM_ABI void replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)
Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...
LLVM_ABI User * getUniqueUndroppableUser()
Return true if there is exactly one unique user of this value that cannot be dropped (that user can h...
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
iterator_range< use_iterator > uses()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
A raw_ostream that writes to an SmallVector or SmallString.
The virtual file system interface.
llvm::ErrorOr< std::unique_ptr< llvm::MemoryBuffer > > getBufferForFile(const Twine &Name, int64_t FileSize=-1, bool RequiresNullTerminator=true, bool IsVolatile=false, bool IsText=true)
This is a convenience method that opens a file, gets its content and then closes the file.
virtual llvm::ErrorOr< Status > status(const Twine &Path)=0
Get the status of the entry at Path, if one exists.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
initializer< Ty > init(const Ty &Val)
LLVM_ABI GlobalVariable * emitOffloadingEntry(Module &M, object::OffloadKind Kind, Constant *Addr, StringRef Name, uint64_t Size, uint32_t Flags, uint64_t Data, Constant *AuxAddr=nullptr, StringRef SectionName="llvm_offload_entries")
Create an offloading section struct used to register this global at runtime.
OpenMPOffloadMappingFlags
Values for bit flags used to specify the mapping type for offloading.
@ OMP_MAP_PTR_AND_OBJ
The element being mapped is a pointer-pointee pair; both the pointer and the pointee should be mapped...
@ OMP_MAP_MEMBER_OF
The 16 MSBs of the flags indicate whether the entry is member of some struct/class.
IdentFlag
IDs for all omp runtime library ident_t flag encodings (see their defintion in openmp/runtime/src/kmp...
RuntimeFunction
IDs for all omp runtime library (RTL) functions.
constexpr const GV & getAMDGPUGridValues()
static constexpr GV SPIRVGridValues
For generic SPIR-V GPUs.
OMPDynGroupprivateFallbackType
The fallback types for the dyn_groupprivate clause.
static constexpr GV NVPTXGridValues
For Nvidia GPUs.
Function * Kernel
Summary of a kernel (=entry point for target offloading).
WorksharingLoopType
A type of worksharing loop construct.
OMPAtomicCompareOp
Atomic compare operations. Currently OpenMP only supports ==, >, and <.
NodeAddr< PhiNode * > Phi
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
LLVM_ABI BasicBlock * splitBBWithSuffix(IRBuilderBase &Builder, bool CreateBranch, llvm::Twine Suffix=".split")
Like splitBB, but reuses the current block's name for the new name.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
FunctionAddr VTableAddr Value
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
hash_code hash_value(const FixedPointSemantics &Val)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI Expected< std::unique_ptr< Module > > parseBitcodeFile(MemoryBufferRef Buffer, LLVMContext &Context, ParserCallbacks Callbacks={})
Read the specified bitcode file, returning the module.
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
@ LLVM_MARK_AS_BITMASK_ENUM
LLVM_ABI BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, bool MapAtoms=true)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
unsigned getPointerAddressSpace(const Type *T)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
auto successors(const MachineBasicBlock *BB)
testing::Matcher< const detail::ErrorHolder & > Failed()
constexpr from_range_t from_range
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
LLVM_ABI BasicBlock * splitBB(IRBuilderBase::InsertPoint IP, bool CreateBranch, DebugLoc DL, llvm::Twine Name={})
Split a BasicBlock at an InsertPoint, even if the block is degenerate (missing the terminator).
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
std::string utostr(uint64_t X, bool isNeg=false)
ErrorOr< T > expectedToErrorOrAndEmitErrors(LLVMContext &Ctx, Expected< T > Val)
bool isa_and_nonnull(const Y &Val)
LLVM_ABI bool convertUsersOfConstantsToInstructions(ArrayRef< Constant * > Consts, Function *RestrictToFunc=nullptr, bool RemoveDeadConstants=true, bool IncludeSelf=false)
Replace constant expressions users of the given constants with instructions.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
FunctionAddr VTableAddr uintptr_t uintptr_t Version
auto reverse(ContainerTy &&C)
TargetTransformInfo::PeelingPreferences gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, std::optional< bool > UserAllowPeeling, std::optional< bool > UserAllowProfileBasedPeeling, bool UnrollingSpecficValues=false)
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
CodeGenOptLevel
Code generation optimization level.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
LLVM_ABI bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, AssumptionCache *AC, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, OptimizationRemarkEmitter *ORE, unsigned TripCount, unsigned MaxTripCount, bool MaxOrZero, unsigned TripMultiple, const UnrollCostEstimator &UCE, TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound)
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
LLVM_ABI bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
@ Mul
Product of integers.
FunctionAddr VTableAddr Next
DWARFExpression::Operation Op
LLVM_ABI void remapInstructionsInBlocks(ArrayRef< BasicBlock * > Blocks, ValueToValueMapTy &VMap)
Remaps instructions in Blocks using the mapping in VMap.
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, llvm::OptimizationRemarkEmitter &ORE, int OptLevel, std::optional< unsigned > UserThreshold, std::optional< unsigned > UserCount, std::optional< bool > UserAllowPartial, std::optional< bool > UserRuntime, std::optional< bool > UserUpperBound, std::optional< unsigned > UserFullUnrollMaxCount)
Gather the various unrolling parameters based on the defaults, compiler flags, TTI overrides and user...
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
LLVM_ABI void spliceBB(IRBuilderBase::InsertPoint IP, BasicBlock *New, bool CreateBranch, DebugLoc DL)
Move the instruction after an InsertPoint to the beginning of another BasicBlock.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto predecessors(const MachineBasicBlock *BB)
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
LLVM_ABI Constant * ConstantFoldInsertValueInstruction(Constant *Agg, Constant *Val, ArrayRef< unsigned > Idxs)
Attempt to constant fold an insertvalue instruction with the specified operands and indices.
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI void DeleteDeadBlocks(ArrayRef< BasicBlock * > BBs, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified blocks from BB.
bool to_integer(StringRef S, N &Num, unsigned Base=0)
Convert the string S to an integer of the specified type using the radix Base. If Base is 0,...
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
This struct is a compact representation of a valid (non-zero power of two) alignment.
static LLVM_ABI void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
a struct to pack relevant information while generating atomic Ops
Error mergeFiniBB(IRBuilderBase &Builder, BasicBlock *ExistingFiniBB)
For cases where there is an unavoidable existing finalization block (e.g.
Expected< BasicBlock * > getFiniBB(IRBuilderBase &Builder)
The basic block to which control should be transferred to implement the FiniCB.
Description of a LLVM-IR insertion point (IP) and a debug/source location (filename,...
MapNonContiguousArrayTy Offsets
MapNonContiguousArrayTy Counts
MapNonContiguousArrayTy Strides
This structure contains combined information generated for mappable clauses, including base pointers,...
MapDeviceInfoArrayTy DevicePointers
MapValuesArrayTy BasePointers
MapValuesArrayTy Pointers
StructNonContiguousInfo NonContigInfo
Helper that contains information about regions we need to outline during finalization.
PostOutlineCBTy PostOutlineCB
LLVM_ABI void collectBlocks(SmallPtrSetImpl< BasicBlock * > &BlockSet, SmallVectorImpl< BasicBlock * > &BlockVector)
Collect all blocks in between EntryBB and ExitBB in both the given vector and set.
SmallVector< Value *, 2 > ExcludeArgsFromAggregate
bool FixUpNonEntryAllocas
BasicBlock * OuterAllocaBB
Information about an OpenMP reduction.
EvalKind EvaluationKind
Reduction evaluation kind - scalar, complex or aggregate.
ReductionGenAtomicCBTy AtomicReductionGen
Callback for generating the atomic reduction body, may be null.
ReductionGenCBTy ReductionGen
Callback for generating the reduction body.
Value * Variable
Reduction variable of pointer type.
Value * PrivateVariable
Thread-private partial reduction variable.
ReductionGenClangCBTy ReductionGenClang
Clang callback for generating the reduction body.
Type * ElementType
Reduction element type, must match pointee type of variable.
ReductionGenDataPtrPtrCBTy DataPtrPtrGen
Container for the arguments used to pass data to the runtime library.
Value * SizesArray
The array of sizes passed to the runtime library.
Value * PointersArray
The array of section pointers passed to the runtime library.
Value * MappersArray
The array of user-defined mappers passed to the runtime library.
Value * MapTypesArrayEnd
The array of map types passed to the runtime library for the end of the region, or nullptr if there a...
Value * BasePointersArray
The array of base pointer passed to the runtime library.
Value * MapTypesArray
The array of map types passed to the runtime library for the beginning of the region or for the entir...
Value * MapNamesArray
The array of original declaration names of mapped pointers sent to the runtime library for debugging.
Data structure that contains the needed information to construct the kernel args vector.
ArrayRef< Value * > NumThreads
The number of threads.
TargetDataRTArgs RTArgs
Arguments passed to the runtime library.
Value * NumIterations
The number of iterations.
Value * DynCGroupMem
The size of the dynamic shared memory.
unsigned NumTargetItems
Number of arguments passed to the runtime library.
bool HasNoWait
True if the kernel has 'no wait' clause.
ArrayRef< Value * > NumTeams
The number of teams.
omp::OMPDynGroupprivateFallbackType DynCGroupMemFallback
The fallback mechanism for the shared memory.
Container to pass the default attributes with which a kernel must be launched, used to set kernel att...
omp::OMPTgtExecModeFlags ExecFlags
SmallVector< int32_t, 3 > MaxTeams
Container to pass LLVM IR runtime values or constants related to the number of teams and threads with...
Value * DeviceID
Device ID value used in the kernel launch.
SmallVector< Value *, 3 > MaxTeams
Value * MaxThreads
'parallel' construct 'num_threads' clause value, if present and it is an SPMD kernel.
Value * LoopTripCount
Total number of iterations of the SPMD or Generic-SPMD kernel or null if it is a generic kernel.
SmallVector< Value *, 3 > TargetThreadLimit
SmallVector< Value *, 3 > TeamsThreadLimit
Data structure to contain the information needed to uniquely identify a target entry.
static LLVM_ABI void getTargetRegionEntryFnName(SmallVectorImpl< char > &Name, StringRef ParentName, unsigned DeviceID, unsigned FileID, unsigned Line, unsigned Count)
static constexpr const char * KernelNamePrefix
The prefix used for kernel names.
static const Target * lookupTarget(StringRef TripleStr, std::string &Error)
lookupTarget - Lookup a target based on a target triple.
Defines various target-specific GPU grid values that must be consistent between host RTL (plugin),...