69#define DEBUG_TYPE "openmp-ir-builder"
76 cl::desc(
"Use optimistic attributes describing "
77 "'as-if' properties of runtime calls."),
81 "openmp-ir-builder-unroll-threshold-factor",
cl::Hidden,
82 cl::desc(
"Factor for the unroll threshold to account for code "
83 "simplifications still taking place"),
94 if (!IP1.isSet() || !IP2.isSet())
96 return IP1.getBlock() == IP2.getBlock() && IP1.getPoint() == IP2.getPoint();
101 switch (SchedType & ~OMPScheduleType::MonotonicityMask) {
102 case OMPScheduleType::UnorderedStaticChunked:
103 case OMPScheduleType::UnorderedStatic:
104 case OMPScheduleType::UnorderedDynamicChunked:
105 case OMPScheduleType::UnorderedGuidedChunked:
106 case OMPScheduleType::UnorderedRuntime:
107 case OMPScheduleType::UnorderedAuto:
108 case OMPScheduleType::UnorderedTrapezoidal:
109 case OMPScheduleType::UnorderedGreedy:
110 case OMPScheduleType::UnorderedBalanced:
111 case OMPScheduleType::UnorderedGuidedIterativeChunked:
112 case OMPScheduleType::UnorderedGuidedAnalyticalChunked:
113 case OMPScheduleType::UnorderedSteal:
114 case OMPScheduleType::UnorderedStaticBalancedChunked:
115 case OMPScheduleType::UnorderedGuidedSimd:
116 case OMPScheduleType::UnorderedRuntimeSimd:
117 case OMPScheduleType::OrderedStaticChunked:
118 case OMPScheduleType::OrderedStatic:
119 case OMPScheduleType::OrderedDynamicChunked:
120 case OMPScheduleType::OrderedGuidedChunked:
121 case OMPScheduleType::OrderedRuntime:
122 case OMPScheduleType::OrderedAuto:
123 case OMPScheduleType::OrderdTrapezoidal:
124 case OMPScheduleType::NomergeUnorderedStaticChunked:
125 case OMPScheduleType::NomergeUnorderedStatic:
126 case OMPScheduleType::NomergeUnorderedDynamicChunked:
127 case OMPScheduleType::NomergeUnorderedGuidedChunked:
128 case OMPScheduleType::NomergeUnorderedRuntime:
129 case OMPScheduleType::NomergeUnorderedAuto:
130 case OMPScheduleType::NomergeUnorderedTrapezoidal:
131 case OMPScheduleType::NomergeUnorderedGreedy:
132 case OMPScheduleType::NomergeUnorderedBalanced:
133 case OMPScheduleType::NomergeUnorderedGuidedIterativeChunked:
134 case OMPScheduleType::NomergeUnorderedGuidedAnalyticalChunked:
135 case OMPScheduleType::NomergeUnorderedSteal:
136 case OMPScheduleType::NomergeOrderedStaticChunked:
137 case OMPScheduleType::NomergeOrderedStatic:
138 case OMPScheduleType::NomergeOrderedDynamicChunked:
139 case OMPScheduleType::NomergeOrderedGuidedChunked:
140 case OMPScheduleType::NomergeOrderedRuntime:
141 case OMPScheduleType::NomergeOrderedAuto:
142 case OMPScheduleType::NomergeOrderedTrapezoidal:
143 case OMPScheduleType::OrderedDistributeChunked:
144 case OMPScheduleType::OrderedDistribute:
152 SchedType & OMPScheduleType::MonotonicityMask;
153 if (MonotonicityFlags == OMPScheduleType::MonotonicityMask)
165 Builder.restoreIP(IP);
173 return T.isAMDGPU() ||
T.isNVPTX() ||
T.isSPIRV();
179 Kernel->getFnAttribute(
"target-features").getValueAsString();
180 if (Features.
count(
"+wavefrontsize64"))
195 bool HasSimdModifier,
bool HasDistScheduleChunks) {
197 switch (ClauseKind) {
198 case OMP_SCHEDULE_Default:
199 case OMP_SCHEDULE_Static:
200 return HasChunks ? OMPScheduleType::BaseStaticChunked
201 : OMPScheduleType::BaseStatic;
202 case OMP_SCHEDULE_Dynamic:
203 return OMPScheduleType::BaseDynamicChunked;
204 case OMP_SCHEDULE_Guided:
205 return HasSimdModifier ? OMPScheduleType::BaseGuidedSimd
206 : OMPScheduleType::BaseGuidedChunked;
207 case OMP_SCHEDULE_Auto:
209 case OMP_SCHEDULE_Runtime:
210 return HasSimdModifier ? OMPScheduleType::BaseRuntimeSimd
211 : OMPScheduleType::BaseRuntime;
212 case OMP_SCHEDULE_Distribute:
213 return HasDistScheduleChunks ? OMPScheduleType::BaseDistributeChunked
214 : OMPScheduleType::BaseDistribute;
222 bool HasOrderedClause) {
223 assert((BaseScheduleType & OMPScheduleType::ModifierMask) ==
224 OMPScheduleType::None &&
225 "Must not have ordering nor monotonicity flags already set");
228 ? OMPScheduleType::ModifierOrdered
229 : OMPScheduleType::ModifierUnordered;
230 OMPScheduleType OrderingScheduleType = BaseScheduleType | OrderingModifier;
233 if (OrderingScheduleType ==
234 (OMPScheduleType::BaseGuidedSimd | OMPScheduleType::ModifierOrdered))
235 return OMPScheduleType::OrderedGuidedChunked;
236 else if (OrderingScheduleType == (OMPScheduleType::BaseRuntimeSimd |
237 OMPScheduleType::ModifierOrdered))
238 return OMPScheduleType::OrderedRuntime;
240 return OrderingScheduleType;
246 bool HasSimdModifier,
bool HasMonotonic,
247 bool HasNonmonotonic,
bool HasOrderedClause) {
248 assert((ScheduleType & OMPScheduleType::MonotonicityMask) ==
249 OMPScheduleType::None &&
250 "Must not have monotonicity flags already set");
251 assert((!HasMonotonic || !HasNonmonotonic) &&
252 "Monotonic and Nonmonotonic are contradicting each other");
255 return ScheduleType | OMPScheduleType::ModifierMonotonic;
256 }
else if (HasNonmonotonic) {
257 return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
267 if ((BaseScheduleType == OMPScheduleType::BaseStatic) ||
268 (BaseScheduleType == OMPScheduleType::BaseStaticChunked) ||
274 return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
282 bool HasSimdModifier,
bool HasMonotonicModifier,
283 bool HasNonmonotonicModifier,
bool HasOrderedClause,
284 bool HasDistScheduleChunks) {
286 ClauseKind, HasChunks, HasSimdModifier, HasDistScheduleChunks);
290 OrderedSchedule, HasSimdModifier, HasMonotonicModifier,
291 HasNonmonotonicModifier, HasOrderedClause);
304 if (
Instruction *Term = Source->getTerminatorOrNull()) {
313 NewBr->setDebugLoc(
DL);
318 assert(New->getFirstInsertionPt() == New->begin() &&
319 "Target BB must not have PHI nodes");
335 New->splice(New->begin(), Old, IP.
getPoint(), Old->
end());
339 NewBr->setDebugLoc(
DL);
351 Builder.SetInsertPoint(Old);
355 Builder.SetCurrentDebugLocation(
DebugLoc);
365 New->replaceSuccessorsPhiUsesWith(Old, New);
374 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
376 Builder.SetInsertPoint(Builder.GetInsertBlock());
379 Builder.SetCurrentDebugLocation(
DebugLoc);
388 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
390 Builder.SetInsertPoint(Builder.GetInsertBlock());
393 Builder.SetCurrentDebugLocation(
DebugLoc);
410 const Twine &Name =
"",
bool AsPtr =
true,
411 bool Is64Bit =
false) {
412 Builder.restoreIP(OuterAllocaIP);
416 Builder.CreateAlloca(IntTy,
nullptr, Name +
".addr");
420 FakeVal = FakeValAddr;
422 FakeVal = Builder.CreateLoad(IntTy, FakeValAddr, Name +
".val");
427 Builder.restoreIP(InnerAllocaIP);
430 UseFakeVal = Builder.CreateLoad(IntTy, FakeVal, Name +
".use");
433 FakeVal, Is64Bit ? Builder.getInt64(10) : Builder.getInt32(10)));
446enum OpenMPOffloadingRequiresDirFlags {
448 OMP_REQ_UNDEFINED = 0x000,
450 OMP_REQ_NONE = 0x001,
452 OMP_REQ_REVERSE_OFFLOAD = 0x002,
454 OMP_REQ_UNIFIED_ADDRESS = 0x004,
456 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008,
458 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010,
465 : RequiresFlags(OMP_REQ_UNDEFINED) {}
469 bool HasRequiresReverseOffload,
bool HasRequiresUnifiedAddress,
470 bool HasRequiresUnifiedSharedMemory,
bool HasRequiresDynamicAllocators)
473 RequiresFlags(OMP_REQ_UNDEFINED) {
474 if (HasRequiresReverseOffload)
475 RequiresFlags |= OMP_REQ_REVERSE_OFFLOAD;
476 if (HasRequiresUnifiedAddress)
477 RequiresFlags |= OMP_REQ_UNIFIED_ADDRESS;
478 if (HasRequiresUnifiedSharedMemory)
479 RequiresFlags |= OMP_REQ_UNIFIED_SHARED_MEMORY;
480 if (HasRequiresDynamicAllocators)
481 RequiresFlags |= OMP_REQ_DYNAMIC_ALLOCATORS;
485 return RequiresFlags & OMP_REQ_REVERSE_OFFLOAD;
489 return RequiresFlags & OMP_REQ_UNIFIED_ADDRESS;
493 return RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY;
497 return RequiresFlags & OMP_REQ_DYNAMIC_ALLOCATORS;
502 :
static_cast<int64_t
>(OMP_REQ_NONE);
507 RequiresFlags |= OMP_REQ_REVERSE_OFFLOAD;
509 RequiresFlags &= ~OMP_REQ_REVERSE_OFFLOAD;
514 RequiresFlags |= OMP_REQ_UNIFIED_ADDRESS;
516 RequiresFlags &= ~OMP_REQ_UNIFIED_ADDRESS;
521 RequiresFlags |= OMP_REQ_UNIFIED_SHARED_MEMORY;
523 RequiresFlags &= ~OMP_REQ_UNIFIED_SHARED_MEMORY;
528 RequiresFlags |= OMP_REQ_DYNAMIC_ALLOCATORS;
530 RequiresFlags &= ~OMP_REQ_DYNAMIC_ALLOCATORS;
543 constexpr size_t MaxDim = 3;
548 Value *DynCGroupMemFallbackFlag =
550 DynCGroupMemFallbackFlag =
Builder.CreateShl(DynCGroupMemFallbackFlag, 2);
551 Value *Flags =
Builder.CreateOr(HasNoWaitFlag, DynCGroupMemFallbackFlag);
557 Value *NumThreads3D =
588 auto FnAttrs = Attrs.getFnAttrs();
589 auto RetAttrs = Attrs.getRetAttrs();
591 for (
size_t ArgNo = 0; ArgNo < Fn.
arg_size(); ++ArgNo)
596 bool Param =
true) ->
void {
597 bool HasSignExt = AS.hasAttribute(Attribute::SExt);
598 bool HasZeroExt = AS.hasAttribute(Attribute::ZExt);
599 if (HasSignExt || HasZeroExt) {
600 assert(AS.getNumAttributes() == 1 &&
601 "Currently not handling extension attr combined with others.");
603 if (
auto AK = TargetLibraryInfo::getExtAttrForI32Param(
T, HasSignExt))
606 TargetLibraryInfo::getExtAttrForI32Return(
T, HasSignExt))
613#define OMP_ATTRS_SET(VarName, AttrSet) AttributeSet VarName = AttrSet;
614#include "llvm/Frontend/OpenMP/OMPKinds.def"
618#define OMP_RTL_ATTRS(Enum, FnAttrSet, RetAttrSet, ArgAttrSets) \
620 FnAttrs = FnAttrs.addAttributes(Ctx, FnAttrSet); \
621 addAttrSet(RetAttrs, RetAttrSet, false); \
622 for (size_t ArgNo = 0; ArgNo < ArgAttrSets.size(); ++ArgNo) \
623 addAttrSet(ArgAttrs[ArgNo], ArgAttrSets[ArgNo]); \
624 Fn.setAttributes(AttributeList::get(Ctx, FnAttrs, RetAttrs, ArgAttrs)); \
626#include "llvm/Frontend/OpenMP/OMPKinds.def"
640#define OMP_RTL(Enum, Str, IsVarArg, ReturnType, ...) \
642 FnTy = FunctionType::get(ReturnType, ArrayRef<Type *>{__VA_ARGS__}, \
644 Fn = M.getFunction(Str); \
646#include "llvm/Frontend/OpenMP/OMPKinds.def"
652#define OMP_RTL(Enum, Str, ...) \
654 Fn = Function::Create(FnTy, GlobalValue::ExternalLinkage, Str, M); \
656#include "llvm/Frontend/OpenMP/OMPKinds.def"
660 if (FnID == OMPRTL___kmpc_fork_call || FnID == OMPRTL___kmpc_fork_teams) {
670 LLVMContext::MD_callback,
672 2, {-1, -1},
true)}));
685 assert(Fn &&
"Failed to create OpenMP runtime function");
696 Builder.SetInsertPoint(FiniBB);
708 FiniBB = OtherFiniBB;
710 Builder.SetInsertPoint(FiniBB->getFirstNonPHIIt());
718 auto EndIt = FiniBB->end();
719 if (FiniBB->size() >= 1)
720 if (
auto Prev = std::prev(EndIt); Prev->isTerminator())
725 FiniBB->replaceAllUsesWith(OtherFiniBB);
726 FiniBB->eraseFromParent();
727 FiniBB = OtherFiniBB;
734 assert(Fn &&
"Failed to create OpenMP runtime function pointer");
757 for (
auto Inst =
Block->getReverseIterator()->begin();
758 Inst !=
Block->getReverseIterator()->end();) {
787 Block.getParent()->getEntryBlock().getTerminator()->getIterator();
812 ParallelRegionBlockSet.
clear();
814 OI.collectBlocks(ParallelRegionBlockSet, Blocks);
824 bool ArgsInZeroAddressSpace =
Config.isTargetDevice();
833 ".omp_par", ArgsInZeroAddressSpace);
837 <<
" Exit: " << OI.ExitBB->getName() <<
"\n");
838 assert(Extractor.isEligible() &&
839 "Expected OpenMP outlining to be possible!");
841 for (
auto *V : OI.ExcludeArgsFromAggregate)
842 Extractor.excludeArgFromAggregate(V);
845 Extractor.extractCodeRegion(CEAC, OI.Inputs, OI.Outputs);
849 if (TargetCpuAttr.isStringAttribute())
852 auto TargetFeaturesAttr = OuterFn->
getFnAttribute(
"target-features");
853 if (TargetFeaturesAttr.isStringAttribute())
854 OutlinedFn->
addFnAttr(TargetFeaturesAttr);
857 LLVM_DEBUG(
dbgs() <<
" Outlined function: " << *OutlinedFn <<
"\n");
859 "OpenMP outlined functions should not return a value!");
864 M.getFunctionList().insertAfter(OuterFn->
getIterator(), OutlinedFn);
871 assert(OI.EntryBB->getUniquePredecessor() == &ArtificialEntry);
878 "Expected instructions to add in the outlined region entry");
880 End = ArtificialEntry.
rend();
885 if (
I.isTerminator()) {
887 if (
Instruction *TI = OI.EntryBB->getTerminatorOrNull())
888 TI->adoptDbgRecords(&ArtificialEntry,
I.getIterator(),
false);
892 I.moveBeforePreserving(*OI.EntryBB, OI.EntryBB->getFirstInsertionPt());
895 OI.EntryBB->moveBefore(&ArtificialEntry);
902 if (OI.PostOutlineCB)
903 OI.PostOutlineCB(*OutlinedFn);
905 if (OI.FixUpNonEntryAllocas)
937 errs() <<
"Error of kind: " << Kind
938 <<
" when emitting offload entries and metadata during "
939 "OMPIRBuilder finalization \n";
945 if (
Config.EmitLLVMUsedMetaInfo.value_or(
false)) {
946 std::vector<WeakTrackingVH> LLVMCompilerUsed = {
947 M.getGlobalVariable(
"__openmp_nvptx_data_transfer_temporary_storage")};
948 emitUsed(
"llvm.compiler.used", LLVMCompilerUsed);
965 ConstantInt::get(I32Ty,
Value), Name);
978 for (
unsigned I = 0, E =
List.size();
I != E; ++
I)
982 if (UsedArray.
empty())
989 GV->setSection(
"llvm.metadata");
995 auto *Int8Ty =
Builder.getInt8Ty();
998 ConstantInt::get(Int8Ty, Mode),
Twine(KernelName,
"_exec_mode"));
1006 unsigned Reserve2Flags) {
1008 LocFlags |= OMP_IDENT_FLAG_KMPC;
1015 ConstantInt::get(Int32,
uint32_t(LocFlags)),
1016 ConstantInt::get(Int32, Reserve2Flags),
1017 ConstantInt::get(Int32, SrcLocStrSize), SrcLocStr};
1019 size_t SrcLocStrArgIdx = 4;
1020 if (OpenMPIRBuilder::Ident->getElementType(SrcLocStrArgIdx)
1024 SrcLocStr, OpenMPIRBuilder::Ident->getElementType(SrcLocStrArgIdx));
1031 if (
GV.getValueType() == OpenMPIRBuilder::Ident &&
GV.hasInitializer())
1032 if (
GV.getInitializer() == Initializer)
1037 M, OpenMPIRBuilder::Ident,
1040 M.getDataLayout().getDefaultGlobalsAddressSpace());
1052 SrcLocStrSize = LocStr.
size();
1061 if (
GV.isConstant() &&
GV.hasInitializer() &&
1062 GV.getInitializer() == Initializer)
1065 SrcLocStr =
Builder.CreateGlobalString(
1066 LocStr,
"",
M.getDataLayout().getDefaultGlobalsAddressSpace(),
1074 unsigned Line,
unsigned Column,
1080 Buffer.
append(FunctionName);
1082 Buffer.
append(std::to_string(Line));
1084 Buffer.
append(std::to_string(Column));
1092 StringRef UnknownLoc =
";unknown;unknown;0;0;;";
1103 if (
DIFile *DIF = DIL->getFile())
1104 if (std::optional<StringRef> Source = DIF->getSource())
1110 DIL->getColumn(), SrcLocStrSize);
1116 Loc.IP.getBlock()->getParent());
1122 "omp_global_thread_num");
1127 bool ForceSimpleCall,
bool CheckCancelFlag) {
1137 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_FOR;
1140 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SECTIONS;
1143 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SINGLE;
1146 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_EXPL;
1149 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL;
1162 bool UseCancelBarrier =
1167 ? OMPRTL___kmpc_cancel_barrier
1168 : OMPRTL___kmpc_barrier),
1171 if (UseCancelBarrier && CheckCancelFlag)
1181 omp::Directive CanceledDirective) {
1186 auto *UI =
Builder.CreateUnreachable();
1194 Builder.SetInsertPoint(ElseTI);
1195 auto ElseIP =
Builder.saveIP();
1203 Builder.SetInsertPoint(ThenTI);
1205 Value *CancelKind =
nullptr;
1206 switch (CanceledDirective) {
1207#define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
1208 case DirectiveEnum: \
1209 CancelKind = Builder.getInt32(Value); \
1211#include "llvm/Frontend/OpenMP/OMPKinds.def"
1228 Builder.SetInsertPoint(UI->getParent());
1229 UI->eraseFromParent();
1236 omp::Directive CanceledDirective) {
1241 auto *UI =
Builder.CreateUnreachable();
1244 Value *CancelKind =
nullptr;
1245 switch (CanceledDirective) {
1246#define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
1247 case DirectiveEnum: \
1248 CancelKind = Builder.getInt32(Value); \
1250#include "llvm/Frontend/OpenMP/OMPKinds.def"
1267 Builder.SetInsertPoint(UI->getParent());
1268 UI->eraseFromParent();
1281 auto *KernelArgsPtr =
1282 Builder.CreateAlloca(OpenMPIRBuilder::KernelArgs,
nullptr,
"kernel_args");
1287 Builder.CreateStructGEP(OpenMPIRBuilder::KernelArgs, KernelArgsPtr,
I);
1290 M.getDataLayout().getPrefTypeAlign(KernelArgs[
I]->getType()));
1294 NumThreads, HostPtr, KernelArgsPtr};
1321 assert(OutlinedFnID &&
"Invalid outlined function ID!");
1325 Value *Return =
nullptr;
1345 Builder, AllocaIP, Return, RTLoc, DeviceID, Args.NumTeams.front(),
1346 Args.NumThreads.front(), OutlinedFnID, ArgsVector));
1353 Builder.CreateCondBr(
Failed, OffloadFailedBlock, OffloadContBlock);
1355 auto CurFn =
Builder.GetInsertBlock()->getParent();
1362 emitBlock(OffloadContBlock, CurFn,
true);
1367 Value *CancelFlag, omp::Directive CanceledDirective) {
1369 "Unexpected cancellation!");
1389 Builder.CreateCondBr(Cmp, NonCancellationBlock, CancellationBlock,
1398 Builder.SetInsertPoint(CancellationBlock);
1399 Builder.CreateBr(*FiniBBOrErr);
1402 Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->
begin());
1421 OutlinedFn.
addFnAttr(Attribute::NoUnwind);
1424 "Expected at least tid and bounded tid as arguments");
1425 unsigned NumCapturedVars = OutlinedFn.
arg_size() - 2;
1428 assert(CI &&
"Expected call instruction to outlined function");
1429 CI->
getParent()->setName(
"omp_parallel");
1431 Builder.SetInsertPoint(CI);
1432 Type *PtrTy = OMPIRBuilder->VoidPtr;
1436 OpenMPIRBuilder ::InsertPointTy CurrentIP = Builder.saveIP();
1440 Value *Args = ArgsAlloca;
1444 Args = Builder.CreatePointerCast(ArgsAlloca, PtrTy);
1445 Builder.restoreIP(CurrentIP);
1448 for (
unsigned Idx = 0; Idx < NumCapturedVars; Idx++) {
1450 Value *StoreAddress = Builder.CreateConstInBoundsGEP2_64(
1452 Builder.CreateStore(V, StoreAddress);
1456 IfCondition ? Builder.CreateSExtOrTrunc(IfCondition, OMPIRBuilder->Int32)
1457 : Builder.getInt32(1);
1460 Value *Parallel60CallArgs[] = {
1464 NumThreads ? NumThreads : Builder.getInt32(-1),
1465 Builder.getInt32(-1),
1469 Builder.getInt64(NumCapturedVars),
1470 Builder.getInt32(0)};
1478 << *Builder.GetInsertBlock()->getParent() <<
"\n");
1481 Builder.SetInsertPoint(PrivTID);
1483 Builder.CreateStore(Builder.CreateLoad(OMPIRBuilder->Int32, OutlinedAI),
1490 I->eraseFromParent();
1513 if (!
F->hasMetadata(LLVMContext::MD_callback)) {
1521 F->addMetadata(LLVMContext::MD_callback,
1530 OutlinedFn.
addFnAttr(Attribute::NoUnwind);
1533 "Expected at least tid and bounded tid as arguments");
1534 unsigned NumCapturedVars = OutlinedFn.
arg_size() - 2;
1537 CI->
getParent()->setName(
"omp_parallel");
1538 Builder.SetInsertPoint(CI);
1541 Value *ForkCallArgs[] = {Ident, Builder.getInt32(NumCapturedVars),
1545 RealArgs.
append(std::begin(ForkCallArgs), std::end(ForkCallArgs));
1547 Value *
Cond = Builder.CreateSExtOrTrunc(IfCondition, OMPIRBuilder->Int32);
1554 auto PtrTy = OMPIRBuilder->VoidPtr;
1555 if (IfCondition && NumCapturedVars == 0) {
1563 << *Builder.GetInsertBlock()->getParent() <<
"\n");
1566 Builder.SetInsertPoint(PrivTID);
1568 Builder.CreateStore(Builder.CreateLoad(OMPIRBuilder->Int32, OutlinedAI),
1575 I->eraseFromParent();
1583 omp::ProcBindKind ProcBind,
bool IsCancellable) {
1592 const bool NeedThreadID = NumThreads ||
Config.isTargetDevice() ||
1593 (ProcBind != OMP_PROC_BIND_default);
1600 bool ArgsInZeroAddressSpace =
Config.isTargetDevice();
1604 if (NumThreads && !
Config.isTargetDevice()) {
1607 Builder.CreateIntCast(NumThreads, Int32,
false)};
1612 if (ProcBind != OMP_PROC_BIND_default) {
1616 ConstantInt::get(Int32,
unsigned(ProcBind),
true)};
1638 Builder.CreateAlloca(Int32,
nullptr,
"zero.addr");
1641 if (ArgsInZeroAddressSpace &&
M.getDataLayout().getAllocaAddrSpace() != 0) {
1644 TIDAddrAlloca, PointerType ::get(
M.getContext(), 0),
"tid.addr.ascast");
1648 PointerType ::get(
M.getContext(), 0),
1649 "zero.addr.ascast");
1673 if (IP.getBlock()->end() == IP.getPoint()) {
1679 assert(IP.getBlock()->getTerminator()->getNumSuccessors() == 1 &&
1680 IP.getBlock()->getTerminator()->getSuccessor(0) == PRegExitBB &&
1681 "Unexpected insertion point for finalization call!");
1693 Builder.CreateAlloca(Int32,
nullptr,
"tid.addr.local");
1699 Builder.CreateLoad(Int32, ZeroAddr,
"zero.addr.use");
1717 LLVM_DEBUG(
dbgs() <<
"Before body codegen: " << *OuterFn <<
"\n");
1720 assert(BodyGenCB &&
"Expected body generation callback!");
1722 if (
Error Err = BodyGenCB(InnerAllocaIP, CodeGenIP))
1725 LLVM_DEBUG(
dbgs() <<
"After body codegen: " << *OuterFn <<
"\n");
1728 if (
Config.isTargetDevice()) {
1731 std::move(ToBeDeleted)](
Function &OutlinedFn) {
1733 IfCondition, NumThreads, PrivTID, PrivTIDAddr,
1734 ThreadID, ToBeDeletedVec);
1740 std::move(ToBeDeleted)](
Function &OutlinedFn) {
1742 PrivTID, PrivTIDAddr, ToBeDeletedVec);
1764 ".omp_par", ArgsInZeroAddressSpace);
1769 Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
1771 Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands,
1776 return GV->getValueType() == OpenMPIRBuilder::Ident;
1781 LLVM_DEBUG(
dbgs() <<
"Before privatization: " << *OuterFn <<
"\n");
1787 if (&V == TIDAddr || &V == ZeroAddr) {
1793 for (
Use &U : V.uses())
1795 if (ParallelRegionBlockSet.
count(UserI->getParent()))
1805 if (!V.getType()->isPointerTy()) {
1809 Builder.restoreIP(OuterAllocaIP);
1811 Builder.CreateAlloca(V.getType(),
nullptr, V.getName() +
".reloaded");
1815 Builder.SetInsertPoint(InsertBB,
1820 Builder.restoreIP(InnerAllocaIP);
1821 Inner =
Builder.CreateLoad(V.getType(), Ptr);
1824 Value *ReplacementValue =
nullptr;
1827 ReplacementValue = PrivTID;
1830 PrivCB(InnerAllocaIP,
Builder.saveIP(), V, *Inner, ReplacementValue);
1838 assert(ReplacementValue &&
1839 "Expected copy/create callback to set replacement value!");
1840 if (ReplacementValue == &V)
1845 UPtr->set(ReplacementValue);
1870 for (
Value *Output : Outputs)
1874 "OpenMP outlining should not produce live-out values!");
1876 LLVM_DEBUG(
dbgs() <<
"After privatization: " << *OuterFn <<
"\n");
1878 for (
auto *BB : Blocks)
1879 dbgs() <<
" PBR: " << BB->getName() <<
"\n";
1887 assert(FiniInfo.DK == OMPD_parallel &&
1888 "Unexpected finalization stack state!");
1899 Builder.CreateBr(*FiniBBOrErr);
1903 Term->eraseFromParent();
1909 InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end());
1910 UI->eraseFromParent();
1977 if (Dependencies.
empty())
1997 Type *DependInfo = OMPBuilder.DependInfo;
2000 Value *DepArray =
nullptr;
2002 Builder.SetInsertPoint(
2006 DepArray = Builder.CreateAlloca(DepArrayTy,
nullptr,
".dep.arr.addr");
2008 Builder.restoreIP(OldIP);
2010 for (
const auto &[DepIdx, Dep] :
enumerate(Dependencies)) {
2012 Builder.CreateConstInBoundsGEP2_64(DepArrayTy, DepArray, 0, DepIdx);
2014 Value *Addr = Builder.CreateStructGEP(
2016 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr));
2017 Value *DepValPtr = Builder.CreatePtrToInt(Dep.DepVal, Builder.getInt64Ty());
2018 Builder.CreateStore(DepValPtr, Addr);
2021 DependInfo,
Base,
static_cast<unsigned int>(RTLDependInfoFields::Len));
2022 Builder.CreateStore(
2023 Builder.getInt64(M.getDataLayout().getTypeStoreSize(Dep.DepValueType)),
2026 Value *Flags = Builder.CreateStructGEP(
2028 static_cast<unsigned int>(RTLDependInfoFields::Flags));
2029 Builder.CreateStore(
2030 ConstantInt::get(Builder.getInt8Ty(),
2031 static_cast<unsigned int>(Dep.DepKind)),
2038Expected<Value *> OpenMPIRBuilder::createTaskDuplicationFunction(
2040 unsigned ProgramAddressSpace =
M.getDataLayout().getProgramAddressSpace();
2055 Builder.getVoidTy(), {VoidPtrTy, VoidPtrTy, Builder.getInt32Ty()},
2059 "omp_taskloop_dup",
M);
2062 Value *LastprivateFlagArg = DupFunction->
getArg(2);
2063 DestTaskArg->
setName(
"dest_task");
2064 SrcTaskArg->
setName(
"src_task");
2065 LastprivateFlagArg->
setName(
"lastprivate_flag");
2067 IRBuilderBase::InsertPointGuard Guard(
Builder);
2071 auto GetTaskContextPtrFromArg = [&](
Value *Arg) ->
Value * {
2072 Type *TaskWithPrivatesTy =
2075 TaskWithPrivatesTy, Arg, {
Builder.getInt32(0),
Builder.getInt32(1)});
2077 PrivatesTy, TaskPrivates,
2082 Value *DestTaskContextPtr = GetTaskContextPtrFromArg(DestTaskArg);
2083 Value *SrcTaskContextPtr = GetTaskContextPtrFromArg(SrcTaskArg);
2085 DestTaskContextPtr->
setName(
"destPtr");
2086 SrcTaskContextPtr->
setName(
"srcPtr");
2091 Expected<IRBuilderBase::InsertPoint> AfterIPOrError =
2092 DupCB(AllocaIP, CodeGenIP, DestTaskContextPtr, SrcTaskContextPtr);
2093 if (!AfterIPOrError)
2095 Builder.restoreIP(*AfterIPOrError);
2105 llvm::function_ref<llvm::Expected<llvm::CanonicalLoopInfo *>()> LoopInfo,
2107 Value *GrainSize,
bool NoGroup,
int Sched,
Value *Final,
bool Mergeable,
2109 Value *TaskContextStructPtrVal) {
2114 uint32_t SrcLocStrSize;
2130 if (
Error Err = BodyGenCB(TaskloopAllocaIP, TaskloopBodyIP))
2133 llvm::Expected<llvm::CanonicalLoopInfo *> result = LoopInfo();
2138 llvm::CanonicalLoopInfo *CLI = result.
get();
2140 OI.
EntryBB = TaskloopAllocaBB;
2141 OI.OuterAllocaBB = AllocaIP.getBlock();
2142 OI.ExitBB = TaskloopExitBB;
2148 Builder, AllocaIP, ToBeDeleted, TaskloopAllocaIP,
"global.tid",
false));
2150 TaskloopAllocaIP,
"lb",
false,
true);
2152 TaskloopAllocaIP,
"ub",
false,
true);
2154 TaskloopAllocaIP,
"step",
false,
true);
2157 OI.Inputs.insert(FakeLB);
2158 OI.Inputs.insert(FakeUB);
2159 OI.Inputs.insert(FakeStep);
2160 if (TaskContextStructPtrVal)
2161 OI.Inputs.insert(TaskContextStructPtrVal);
2162 assert(((TaskContextStructPtrVal && DupCB) ||
2163 (!TaskContextStructPtrVal && !DupCB)) &&
2164 "Task context struct ptr and duplication callback must be both set "
2170 unsigned ProgramAddressSpace =
M.getDataLayout().getProgramAddressSpace();
2174 {FakeLB->getType(), FakeUB->getType(), FakeStep->getType(), PointerTy});
2175 Expected<Value *> TaskDupFnOrErr = createTaskDuplicationFunction(
2178 if (!TaskDupFnOrErr) {
2181 Value *TaskDupFn = *TaskDupFnOrErr;
2183 OI.PostOutlineCB = [
this, Ident, LBVal, UBVal, StepVal, Untied,
2184 TaskloopAllocaBB, CLI, Loc, TaskDupFn, ToBeDeleted,
2185 IfCond, GrainSize, NoGroup, Sched, FakeLB, FakeUB,
2186 FakeStep, FakeSharedsTy, Final, Mergeable, Priority,
2187 NumOfCollapseLoops](
Function &OutlinedFn)
mutable {
2189 assert(OutlinedFn.hasOneUse() &&
2190 "there must be a single user for the outlined function");
2197 Value *CastedLBVal =
2198 Builder.CreateIntCast(LBVal,
Builder.getInt64Ty(),
true,
"lb64");
2199 Value *CastedUBVal =
2200 Builder.CreateIntCast(UBVal,
Builder.getInt64Ty(),
true,
"ub64");
2201 Value *CastedStepVal =
2202 Builder.CreateIntCast(StepVal,
Builder.getInt64Ty(),
true,
"step64");
2204 Builder.SetInsertPoint(StaleCI);
2217 Builder.CreateCall(TaskgroupFn, {Ident, ThreadID});
2238 divideCeil(
M.getDataLayout().getTypeSizeInBits(Task), 8));
2240 AllocaInst *ArgStructAlloca =
2242 assert(ArgStructAlloca &&
2243 "Unable to find the alloca instruction corresponding to arguments "
2244 "for extracted function");
2245 std::optional<TypeSize> ArgAllocSize =
2248 "Unable to determine size of arguments for extracted function");
2249 Value *SharedsSize =
Builder.getInt64(ArgAllocSize->getFixedValue());
2254 CallInst *TaskData =
Builder.CreateCall(
2255 TaskAllocFn, {Ident, ThreadID,
Flags,
2256 TaskSize, SharedsSize,
2261 Value *TaskShareds =
Builder.CreateLoad(VoidPtr, TaskData);
2262 Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment,
2267 FakeSharedsTy, TaskShareds, {
Builder.getInt32(0),
Builder.getInt32(0)});
2270 FakeSharedsTy, TaskShareds, {
Builder.getInt32(0),
Builder.getInt32(1)});
2273 FakeSharedsTy, TaskShareds, {
Builder.getInt32(0),
Builder.getInt32(2)});
2279 IfCond ?
Builder.CreateIntCast(IfCond,
Builder.getInt32Ty(),
true)
2285 Value *GrainSizeVal =
2286 GrainSize ?
Builder.CreateIntCast(GrainSize,
Builder.getInt64Ty(),
true)
2288 Value *TaskDup = TaskDupFn;
2290 Value *
Args[] = {Ident, ThreadID, TaskData, IfCondVal, Lb, Ub,
2291 Loadstep, NoGroupVal, SchedVal, GrainSizeVal, TaskDup};
2296 Builder.CreateCall(TaskloopFn, Args);
2303 Builder.CreateCall(EndTaskgroupFn, {Ident, ThreadID});
2308 Builder.SetInsertPoint(TaskloopAllocaBB, TaskloopAllocaBB->begin());
2310 LoadInst *SharedsOutlined =
2311 Builder.CreateLoad(VoidPtr, OutlinedFn.getArg(1));
2312 OutlinedFn.getArg(1)->replaceUsesWithIf(
2314 [SharedsOutlined](Use &U) {
return U.getUser() != SharedsOutlined; });
2317 Type *IVTy =
IV->getType();
2323 Value *TaskLB =
nullptr;
2324 Value *TaskUB =
nullptr;
2325 Value *LoadTaskLB =
nullptr;
2326 Value *LoadTaskUB =
nullptr;
2327 for (Instruction &
I : *TaskloopAllocaBB) {
2328 if (
I.getOpcode() == Instruction::GetElementPtr) {
2331 switch (CI->getZExtValue()) {
2340 }
else if (
I.getOpcode() == Instruction::Load) {
2342 if (
Load.getPointerOperand() == TaskLB) {
2343 assert(TaskLB !=
nullptr &&
"Expected value for TaskLB");
2345 }
else if (
Load.getPointerOperand() == TaskUB) {
2346 assert(TaskUB !=
nullptr &&
"Expected value for TaskUB");
2352 Builder.SetInsertPoint(CLI->getPreheader()->getTerminator());
2354 assert(LoadTaskLB !=
nullptr &&
"Expected value for LoadTaskLB");
2355 assert(LoadTaskUB !=
nullptr &&
"Expected value for LoadTaskUB");
2356 Value *TripCountMinusOne =
2357 Builder.CreateSDiv(
Builder.CreateSub(LoadTaskUB, LoadTaskLB), FakeStep);
2358 Value *TripCount =
Builder.CreateAdd(TripCountMinusOne, One,
"trip_cnt");
2359 Value *CastedTripCount =
Builder.CreateIntCast(TripCount, IVTy,
true);
2360 Value *CastedTaskLB =
Builder.CreateIntCast(LoadTaskLB, IVTy,
true);
2362 CLI->setTripCount(CastedTripCount);
2364 Builder.SetInsertPoint(CLI->getBody(),
2365 CLI->getBody()->getFirstInsertionPt());
2367 if (NumOfCollapseLoops > 1) {
2373 Builder.CreateSub(CastedTaskLB, ConstantInt::get(IVTy, 1)));
2376 for (
auto IVUse = CLI->getIndVar()->uses().begin();
2377 IVUse != CLI->getIndVar()->uses().end(); IVUse++) {
2378 User *IVUser = IVUse->getUser();
2380 if (
Op->getOpcode() == Instruction::URem ||
2381 Op->getOpcode() == Instruction::UDiv) {
2386 for (User *User : UsersToReplace) {
2387 User->replaceUsesOfWith(CLI->getIndVar(), IVPlusTaskLB);
2404 assert(CLI->getIndVar()->getNumUses() == 3 &&
2405 "Canonical loop should have exactly three uses of the ind var");
2406 for (User *IVUser : CLI->getIndVar()->users()) {
2408 if (
Mul->getOpcode() == Instruction::Mul) {
2409 for (User *MulUser :
Mul->users()) {
2411 if (
Add->getOpcode() == Instruction::Add) {
2412 Add->setOperand(1, CastedTaskLB);
2421 FakeLB->replaceAllUsesWith(CastedLBVal);
2422 FakeUB->replaceAllUsesWith(CastedUBVal);
2423 FakeStep->replaceAllUsesWith(CastedStepVal);
2425 I->eraseFromParent();
2430 Builder.SetInsertPoint(TaskloopExitBB, TaskloopExitBB->
begin());
2436 M.getContext(),
M.getDataLayout().getPointerSizeInBits());
2445 bool Mergeable,
Value *EventHandle,
Value *Priority) {
2477 if (
Error Err = BodyGenCB(TaskAllocaIP, TaskBodyIP))
2488 Builder, AllocaIP, ToBeDeleted, TaskAllocaIP,
"global.tid",
false));
2490 OI.
PostOutlineCB = [
this, Ident, Tied, Final, IfCondition, Dependencies,
2491 Affinities, Mergeable, Priority, EventHandle,
2492 TaskAllocaBB, ToBeDeleted](
Function &OutlinedFn)
mutable {
2494 assert(OutlinedFn.hasOneUse() &&
2495 "there must be a single user for the outlined function");
2500 bool HasShareds = StaleCI->
arg_size() > 1;
2501 Builder.SetInsertPoint(StaleCI);
2526 Flags =
Builder.CreateOr(FinalFlag, Flags);
2539 divideCeil(
M.getDataLayout().getTypeSizeInBits(Task), 8));
2548 assert(ArgStructAlloca &&
2549 "Unable to find the alloca instruction corresponding to arguments "
2550 "for extracted function");
2551 std::optional<TypeSize> ArgAllocSize =
2554 "Unable to determine size of arguments for extracted function");
2555 SharedsSize =
Builder.getInt64(ArgAllocSize->getFixedValue());
2561 TaskAllocFn, {Ident, ThreadID, Flags,
2562 TaskSize, SharedsSize,
2565 if (Affinities.
Count && Affinities.
Info) {
2567 OMPRTL___kmpc_omp_reg_task_with_affinity);
2578 OMPRTL___kmpc_task_allow_completion_event);
2582 Builder.CreatePointerBitCastOrAddrSpaceCast(EventHandle,
2584 EventVal =
Builder.CreatePtrToInt(EventVal,
Builder.getInt64Ty());
2585 Builder.CreateStore(EventVal, EventHandleAddr);
2591 Value *TaskShareds =
Builder.CreateLoad(VoidPtr, TaskData);
2592 Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment,
2610 Builder.CreateInBoundsGEP(TaskPtr, TaskData, {Zero, Zero});
2613 VoidPtr, VoidPtr,
Builder.getInt32Ty(), VoidPtr, VoidPtr);
2615 TaskStructType, TaskGEP, {Zero, ConstantInt::get(
Int32Ty, 4)});
2618 Value *CmplrData =
Builder.CreateInBoundsGEP(CmplrStructType,
2619 PriorityData, {Zero, Zero});
2620 Builder.CreateStore(Priority, CmplrData);
2647 Builder.GetInsertPoint()->getParent()->getTerminator();
2648 Instruction *ThenTI = IfTerminator, *ElseTI =
nullptr;
2649 Builder.SetInsertPoint(IfTerminator);
2652 Builder.SetInsertPoint(ElseTI);
2654 if (Dependencies.
size()) {
2659 {Ident, ThreadID,
Builder.getInt32(Dependencies.
size()), DepArray,
2660 ConstantInt::get(
Builder.getInt32Ty(), 0),
2675 Builder.SetInsertPoint(ThenTI);
2678 if (Dependencies.
size()) {
2683 {Ident, ThreadID, TaskData,
Builder.getInt32(Dependencies.
size()),
2684 DepArray, ConstantInt::get(
Builder.getInt32Ty(), 0),
2695 Builder.SetInsertPoint(TaskAllocaBB, TaskAllocaBB->
begin());
2697 LoadInst *Shareds =
Builder.CreateLoad(VoidPtr, OutlinedFn.getArg(1));
2698 OutlinedFn.getArg(1)->replaceUsesWithIf(
2699 Shareds, [Shareds](
Use &U) {
return U.getUser() != Shareds; });
2703 I->eraseFromParent();
2707 Builder.SetInsertPoint(TaskExitBB, TaskExitBB->
begin());
2733 Builder.SetInsertPoint(TaskgroupExitBB);
2776 unsigned CaseNumber = 0;
2777 for (
auto SectionCB : SectionCBs) {
2779 M.getContext(),
"omp_section_loop.body.case", CurFn,
Continue);
2781 Builder.SetInsertPoint(CaseBB);
2784 CaseEndBr->getIterator()}))
2795 Value *LB = ConstantInt::get(I32Ty, 0);
2796 Value *UB = ConstantInt::get(I32Ty, SectionCBs.
size());
2797 Value *ST = ConstantInt::get(I32Ty, 1);
2799 Loc, LoopBodyGenCB, LB, UB, ST,
true,
false, AllocaIP,
"section_loop");
2804 applyStaticWorkshareLoop(
Loc.DL, *
LoopInfo, AllocaIP,
2805 WorksharingLoopType::ForStaticLoop, !IsNowait);
2811 assert(LoopFini &&
"Bad structure of static workshare loop finalization");
2815 assert(FiniInfo.DK == OMPD_sections &&
2816 "Unexpected finalization stack state!");
2817 if (
Error Err = FiniInfo.mergeFiniBB(
Builder, LoopFini))
2831 if (IP.getBlock()->end() != IP.getPoint())
2842 auto *CaseBB =
Loc.IP.getBlock();
2843 auto *CondBB = CaseBB->getSinglePredecessor()->getSinglePredecessor();
2844 auto *ExitBB = CondBB->getTerminator()->getSuccessor(1);
2850 Directive OMPD = Directive::OMPD_sections;
2853 return EmitOMPInlinedRegion(OMPD,
nullptr,
nullptr, BodyGenCB, FiniCBWrapper,
2864Value *OpenMPIRBuilder::getGPUThreadID() {
2867 OMPRTL___kmpc_get_hardware_thread_id_in_block),
2871Value *OpenMPIRBuilder::getGPUWarpSize() {
2876Value *OpenMPIRBuilder::getNVPTXWarpID() {
2877 unsigned LaneIDBits =
Log2_32(
Config.getGridValue().GV_Warp_Size);
2878 return Builder.CreateAShr(getGPUThreadID(), LaneIDBits,
"nvptx_warp_id");
2881Value *OpenMPIRBuilder::getNVPTXLaneID() {
2882 unsigned LaneIDBits =
Log2_32(
Config.getGridValue().GV_Warp_Size);
2883 assert(LaneIDBits < 32 &&
"Invalid LaneIDBits size in NVPTX device.");
2884 unsigned LaneIDMask = ~0
u >> (32u - LaneIDBits);
2885 return Builder.CreateAnd(getGPUThreadID(),
Builder.getInt32(LaneIDMask),
2892 uint64_t FromSize =
M.getDataLayout().getTypeStoreSize(FromType);
2893 uint64_t ToSize =
M.getDataLayout().getTypeStoreSize(ToType);
2894 assert(FromSize > 0 &&
"From size must be greater than zero");
2895 assert(ToSize > 0 &&
"To size must be greater than zero");
2896 if (FromType == ToType)
2898 if (FromSize == ToSize)
2899 return Builder.CreateBitCast(From, ToType);
2901 return Builder.CreateIntCast(From, ToType,
true);
2907 Value *ValCastItem =
Builder.CreatePointerBitCastOrAddrSpaceCast(
2908 CastItem,
Builder.getPtrTy(0));
2909 Builder.CreateStore(From, ValCastItem);
2910 return Builder.CreateLoad(ToType, CastItem);
2917 uint64_t
Size =
M.getDataLayout().getTypeStoreSize(ElementType);
2918 assert(
Size <= 8 &&
"Unsupported bitwidth in shuffle instruction");
2922 Value *ElemCast = castValueToType(AllocaIP, Element, CastTy);
2924 Builder.CreateIntCast(getGPUWarpSize(),
Builder.getInt16Ty(),
true);
2926 Size <= 4 ? RuntimeFunction::OMPRTL___kmpc_shuffle_int32
2927 : RuntimeFunction::OMPRTL___kmpc_shuffle_int64);
2928 Value *WarpSizeCast =
2930 Value *ShuffleCall =
2932 return castValueToType(AllocaIP, ShuffleCall, CastTy);
2939 uint64_t
Size =
M.getDataLayout().getTypeStoreSize(ElemType);
2951 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
2952 Value *ElemPtr = DstAddr;
2953 Value *Ptr = SrcAddr;
2954 for (
unsigned IntSize = 8; IntSize >= 1; IntSize /= 2) {
2958 Ptr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
2961 Builder.CreateGEP(ElemType, SrcAddr, {ConstantInt::get(IndexTy, 1)});
2962 ElemPtr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
2966 if ((
Size / IntSize) > 1) {
2967 Value *PtrEnd =
Builder.CreatePointerBitCastOrAddrSpaceCast(
2968 SrcAddrGEP,
Builder.getPtrTy());
2985 Builder.CreatePointerBitCastOrAddrSpaceCast(Ptr,
Builder.getPtrTy()));
2987 Builder.CreateICmpSGT(PtrDiff,
Builder.getInt64(IntSize - 1)), ThenBB,
2990 Value *Res = createRuntimeShuffleFunction(
2993 IntType, Ptr,
M.getDataLayout().getPrefTypeAlign(ElemType)),
2995 Builder.CreateAlignedStore(Res, ElemPtr,
2996 M.getDataLayout().getPrefTypeAlign(ElemType));
2998 Builder.CreateGEP(IntType, Ptr, {ConstantInt::get(IndexTy, 1)});
2999 Value *LocalElemPtr =
3000 Builder.CreateGEP(IntType, ElemPtr, {ConstantInt::get(IndexTy, 1)});
3006 Value *Res = createRuntimeShuffleFunction(
3007 AllocaIP,
Builder.CreateLoad(IntType, Ptr), IntType,
Offset);
3010 Res =
Builder.CreateTrunc(Res, ElemType);
3011 Builder.CreateStore(Res, ElemPtr);
3012 Ptr =
Builder.CreateGEP(IntType, Ptr, {ConstantInt::get(IndexTy, 1)});
3014 Builder.CreateGEP(IntType, ElemPtr, {ConstantInt::get(IndexTy, 1)});
3020Error OpenMPIRBuilder::emitReductionListCopy(
3025 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
3026 Value *RemoteLaneOffset = CopyOptions.RemoteLaneOffset;
3030 for (
auto En :
enumerate(ReductionInfos)) {
3032 Value *SrcElementAddr =
nullptr;
3033 AllocaInst *DestAlloca =
nullptr;
3034 Value *DestElementAddr =
nullptr;
3035 Value *DestElementPtrAddr =
nullptr;
3037 bool ShuffleInElement =
false;
3040 bool UpdateDestListPtr =
false;
3044 ReductionArrayTy, SrcBase,
3045 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3046 SrcElementAddr =
Builder.CreateLoad(
Builder.getPtrTy(), SrcElementPtrAddr);
3050 DestElementPtrAddr =
Builder.CreateInBoundsGEP(
3051 ReductionArrayTy, DestBase,
3052 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3053 bool IsByRefElem = (!IsByRef.
empty() && IsByRef[En.index()]);
3059 Type *DestAllocaType =
3060 IsByRefElem ? RI.ByRefAllocatedType : RI.ElementType;
3061 DestAlloca =
Builder.CreateAlloca(DestAllocaType,
nullptr,
3062 ".omp.reduction.element");
3064 M.getDataLayout().getPrefTypeAlign(DestAllocaType));
3065 DestElementAddr = DestAlloca;
3068 DestElementAddr->
getName() +
".ascast");
3070 ShuffleInElement =
true;
3071 UpdateDestListPtr =
true;
3083 if (ShuffleInElement) {
3084 Type *ShuffleType = RI.ElementType;
3085 Value *ShuffleSrcAddr = SrcElementAddr;
3086 Value *ShuffleDestAddr = DestElementAddr;
3087 AllocaInst *LocalStorage =
nullptr;
3090 assert(RI.ByRefElementType &&
"Expected by-ref element type to be set");
3091 assert(RI.ByRefAllocatedType &&
3092 "Expected by-ref allocated type to be set");
3097 ShuffleType = RI.ByRefElementType;
3100 RI.DataPtrPtrGen(
Builder.saveIP(), ShuffleSrcAddr, ShuffleSrcAddr);
3103 return GenResult.takeError();
3105 ShuffleSrcAddr =
Builder.CreateLoad(
Builder.getPtrTy(), ShuffleSrcAddr);
3111 LocalStorage =
Builder.CreateAlloca(ShuffleType);
3113 ShuffleDestAddr = LocalStorage;
3117 shuffleAndStore(AllocaIP, ShuffleSrcAddr, ShuffleDestAddr, ShuffleType,
3118 RemoteLaneOffset, ReductionArrayTy, IsByRefElem);
3122 Value *DestDescriptorAddr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3123 DestAlloca,
Builder.getPtrTy(),
".ascast");
3126 DestDescriptorAddr, LocalStorage, SrcElementAddr,
3127 RI.ByRefAllocatedType, RI.DataPtrPtrGen);
3130 return GenResult.takeError();
3133 switch (RI.EvaluationKind) {
3135 Value *Elem =
Builder.CreateLoad(RI.ElementType, SrcElementAddr);
3137 Builder.CreateStore(Elem, DestElementAddr);
3141 Value *SrcRealPtr =
Builder.CreateConstInBoundsGEP2_32(
3142 RI.ElementType, SrcElementAddr, 0, 0,
".realp");
3144 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
3146 RI.ElementType, SrcElementAddr, 0, 1,
".imagp");
3148 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
3150 Value *DestRealPtr =
Builder.CreateConstInBoundsGEP2_32(
3151 RI.ElementType, DestElementAddr, 0, 0,
".realp");
3152 Value *DestImgPtr =
Builder.CreateConstInBoundsGEP2_32(
3153 RI.ElementType, DestElementAddr, 0, 1,
".imagp");
3154 Builder.CreateStore(SrcReal, DestRealPtr);
3155 Builder.CreateStore(SrcImg, DestImgPtr);
3160 M.getDataLayout().getTypeStoreSize(RI.ElementType));
3162 DestElementAddr,
M.getDataLayout().getPrefTypeAlign(RI.ElementType),
3163 SrcElementAddr,
M.getDataLayout().getPrefTypeAlign(RI.ElementType),
3175 if (UpdateDestListPtr) {
3176 Value *CastDestAddr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3177 DestElementAddr,
Builder.getPtrTy(),
3178 DestElementAddr->
getName() +
".ascast");
3179 Builder.CreateStore(CastDestAddr, DestElementPtrAddr);
3186Expected<Function *> OpenMPIRBuilder::emitInterWarpCopyFunction(
3190 LLVMContext &Ctx =
M.getContext();
3192 Builder.getVoidTy(), {Builder.getPtrTy(), Builder.getInt32Ty()},
3196 "_omp_reduction_inter_warp_copy_func", &
M);
3201 Builder.SetInsertPoint(EntryBB);
3218 StringRef TransferMediumName =
3219 "__openmp_nvptx_data_transfer_temporary_storage";
3220 GlobalVariable *TransferMedium =
M.getGlobalVariable(TransferMediumName);
3221 unsigned WarpSize =
Config.getGridValue().GV_Warp_Size;
3223 if (!TransferMedium) {
3224 TransferMedium =
new GlobalVariable(
3232 Value *GPUThreadID = getGPUThreadID();
3234 Value *LaneID = getNVPTXLaneID();
3236 Value *WarpID = getNVPTXWarpID();
3240 Builder.GetInsertBlock()->getFirstInsertionPt());
3244 AllocaInst *ReduceListAlloca =
Builder.CreateAlloca(
3245 Arg0Type,
nullptr, ReduceListArg->
getName() +
".addr");
3246 AllocaInst *NumWarpsAlloca =
3247 Builder.CreateAlloca(Arg1Type,
nullptr, NumWarpsArg->
getName() +
".addr");
3248 Value *ReduceListAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3249 ReduceListAlloca, Arg0Type, ReduceListAlloca->
getName() +
".ascast");
3250 Value *NumWarpsAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3251 NumWarpsAlloca,
Builder.getPtrTy(0),
3252 NumWarpsAlloca->
getName() +
".ascast");
3253 Builder.CreateStore(ReduceListArg, ReduceListAddrCast);
3254 Builder.CreateStore(NumWarpsArg, NumWarpsAddrCast);
3263 for (
auto En :
enumerate(ReductionInfos)) {
3269 bool IsByRefElem = !IsByRef.
empty() && IsByRef[En.index()];
3270 unsigned RealTySize =
M.getDataLayout().getTypeAllocSize(
3271 IsByRefElem ? RI.ByRefElementType : RI.ElementType);
3272 for (
unsigned TySize = 4; TySize > 0 && RealTySize > 0; TySize /= 2) {
3275 unsigned NumIters = RealTySize / TySize;
3278 Value *Cnt =
nullptr;
3279 Value *CntAddr =
nullptr;
3286 Builder.CreateAlloca(
Builder.getInt32Ty(),
nullptr,
".cnt.addr");
3288 CntAddr =
Builder.CreateAddrSpaceCast(CntAddr,
Builder.getPtrTy(),
3289 CntAddr->
getName() +
".ascast");
3301 Cnt, ConstantInt::get(
Builder.getInt32Ty(), NumIters));
3302 Builder.CreateCondBr(Cmp, BodyBB, ExitBB);
3309 omp::Directive::OMPD_unknown,
3313 return BarrierIP1.takeError();
3319 Value *IsWarpMaster =
Builder.CreateIsNull(LaneID,
"warp_master");
3320 Builder.CreateCondBr(IsWarpMaster, ThenBB, ElseBB);
3324 auto *RedListArrayTy =
3327 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
3329 Builder.CreateInBoundsGEP(RedListArrayTy, ReduceList,
3330 {ConstantInt::get(IndexTy, 0),
3331 ConstantInt::get(IndexTy, En.index())});
3337 RI.DataPtrPtrGen(
Builder.saveIP(), ElemPtr, ElemPtr);
3340 return GenRes.takeError();
3351 ArrayTy, TransferMedium, {
Builder.getInt64(0), WarpID});
3356 Builder.CreateStore(Elem, MediumPtr,
3368 omp::Directive::OMPD_unknown,
3372 return BarrierIP2.takeError();
3379 Value *NumWarpsVal =
3382 Value *IsActiveThread =
3383 Builder.CreateICmpULT(GPUThreadID, NumWarpsVal,
"is_active_thread");
3384 Builder.CreateCondBr(IsActiveThread, W0ThenBB, W0ElseBB);
3391 ArrayTy, TransferMedium, {
Builder.getInt64(0), GPUThreadID});
3393 Value *TargetElemPtrPtr =
3394 Builder.CreateInBoundsGEP(RedListArrayTy, ReduceList,
3395 {ConstantInt::get(IndexTy, 0),
3396 ConstantInt::get(IndexTy, En.index())});
3397 Value *TargetElemPtrVal =
3399 Value *TargetElemPtr = TargetElemPtrVal;
3403 RI.DataPtrPtrGen(
Builder.saveIP(), TargetElemPtr, TargetElemPtr);
3406 return GenRes.takeError();
3408 TargetElemPtr =
Builder.CreateLoad(
Builder.getPtrTy(), TargetElemPtr);
3416 Value *SrcMediumValue =
3417 Builder.CreateLoad(CType, SrcMediumPtrVal,
true);
3418 Builder.CreateStore(SrcMediumValue, TargetElemPtr);
3428 Cnt, ConstantInt::get(
Builder.getInt32Ty(), 1));
3429 Builder.CreateStore(Cnt, CntAddr,
false);
3431 auto *CurFn =
Builder.GetInsertBlock()->getParent();
3435 RealTySize %= TySize;
3445Expected<Function *> OpenMPIRBuilder::emitShuffleAndReduceFunction(
3448 LLVMContext &Ctx =
M.getContext();
3449 FunctionType *FuncTy =
3451 {Builder.getPtrTy(), Builder.getInt16Ty(),
3452 Builder.getInt16Ty(), Builder.getInt16Ty()},
3456 "_omp_reduction_shuffle_and_reduce_func", &
M);
3466 Builder.SetInsertPoint(EntryBB);
3477 Type *ReduceListArgType = ReduceListArg->
getType();
3481 ReduceListArgType,
nullptr, ReduceListArg->
getName() +
".addr");
3482 Value *LaneIdAlloca =
Builder.CreateAlloca(LaneIDArgType,
nullptr,
3483 LaneIDArg->
getName() +
".addr");
3485 LaneIDArgType,
nullptr, RemoteLaneOffsetArg->
getName() +
".addr");
3486 Value *AlgoVerAlloca =
Builder.CreateAlloca(LaneIDArgType,
nullptr,
3487 AlgoVerArg->
getName() +
".addr");
3494 RedListArrayTy,
nullptr,
".omp.reduction.remote_reduce_list");
3496 Value *ReduceListAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3497 ReduceListAlloca, ReduceListArgType,
3498 ReduceListAlloca->
getName() +
".ascast");
3499 Value *LaneIdAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3500 LaneIdAlloca, LaneIDArgPtrType, LaneIdAlloca->
getName() +
".ascast");
3501 Value *RemoteLaneOffsetAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3502 RemoteLaneOffsetAlloca, LaneIDArgPtrType,
3503 RemoteLaneOffsetAlloca->
getName() +
".ascast");
3504 Value *AlgoVerAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3505 AlgoVerAlloca, LaneIDArgPtrType, AlgoVerAlloca->
getName() +
".ascast");
3506 Value *RemoteListAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3507 RemoteReductionListAlloca,
Builder.getPtrTy(),
3508 RemoteReductionListAlloca->
getName() +
".ascast");
3510 Builder.CreateStore(ReduceListArg, ReduceListAddrCast);
3511 Builder.CreateStore(LaneIDArg, LaneIdAddrCast);
3512 Builder.CreateStore(RemoteLaneOffsetArg, RemoteLaneOffsetAddrCast);
3513 Builder.CreateStore(AlgoVerArg, AlgoVerAddrCast);
3515 Value *ReduceList =
Builder.CreateLoad(ReduceListArgType, ReduceListAddrCast);
3516 Value *LaneId =
Builder.CreateLoad(LaneIDArgType, LaneIdAddrCast);
3517 Value *RemoteLaneOffset =
3518 Builder.CreateLoad(LaneIDArgType, RemoteLaneOffsetAddrCast);
3519 Value *AlgoVer =
Builder.CreateLoad(LaneIDArgType, AlgoVerAddrCast);
3526 Error EmitRedLsCpRes = emitReductionListCopy(
3528 ReduceList, RemoteListAddrCast, IsByRef,
3529 {RemoteLaneOffset,
nullptr,
nullptr});
3532 return EmitRedLsCpRes;
3557 Value *LaneComp =
Builder.CreateICmpULT(LaneId, RemoteLaneOffset);
3562 Value *Algo2AndLaneIdComp =
Builder.CreateAnd(Algo2, LaneIdComp);
3563 Value *RemoteOffsetComp =
3565 Value *CondAlgo2 =
Builder.CreateAnd(Algo2AndLaneIdComp, RemoteOffsetComp);
3566 Value *CA0OrCA1 =
Builder.CreateOr(CondAlgo0, CondAlgo1);
3567 Value *CondReduce =
Builder.CreateOr(CA0OrCA1, CondAlgo2);
3573 Builder.CreateCondBr(CondReduce, ThenBB, ElseBB);
3575 Value *LocalReduceListPtr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3576 ReduceList,
Builder.getPtrTy());
3577 Value *RemoteReduceListPtr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3578 RemoteListAddrCast,
Builder.getPtrTy());
3580 ->addFnAttr(Attribute::NoUnwind);
3591 Value *LaneIdGtOffset =
Builder.CreateICmpUGE(LaneId, RemoteLaneOffset);
3592 Value *CondCopy =
Builder.CreateAnd(Algo1, LaneIdGtOffset);
3597 Builder.CreateCondBr(CondCopy, CpyThenBB, CpyElseBB);
3601 EmitRedLsCpRes = emitReductionListCopy(
3603 RemoteListAddrCast, ReduceList, IsByRef);
3606 return EmitRedLsCpRes;
3621OpenMPIRBuilder::generateReductionDescriptor(
3623 Type *DescriptorType,
3629 Value *DescriptorSize =
3630 Builder.getInt64(
M.getDataLayout().getTypeStoreSize(DescriptorType));
3632 DescriptorAddr,
M.getDataLayout().getPrefTypeAlign(DescriptorType),
3633 SrcDescriptorAddr,
M.getDataLayout().getPrefTypeAlign(DescriptorType),
3637 Value *DataPtrField;
3639 DataPtrPtrGen(
Builder.saveIP(), DescriptorAddr, DataPtrField);
3642 return GenResult.takeError();
3645 DataPtr,
Builder.getPtrTy(),
".ascast"),
3651Expected<Function *> OpenMPIRBuilder::emitListToGlobalCopyFunction(
3655 LLVMContext &Ctx =
M.getContext();
3658 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3662 "_omp_reduction_list_to_global_copy_func", &
M);
3669 Builder.SetInsertPoint(EntryBlock);
3679 BufferArg->
getName() +
".addr");
3683 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3684 Value *BufferArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3685 BufferArgAlloca,
Builder.getPtrTy(),
3686 BufferArgAlloca->
getName() +
".ascast");
3687 Value *IdxArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3688 IdxArgAlloca,
Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3689 Value *ReduceListArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3690 ReduceListArgAlloca,
Builder.getPtrTy(),
3691 ReduceListArgAlloca->
getName() +
".ascast");
3693 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3694 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3695 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3697 Value *LocalReduceList =
3699 Value *BufferArgVal =
3703 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
3704 for (
auto En :
enumerate(ReductionInfos)) {
3706 auto *RedListArrayTy =
3710 RedListArrayTy, LocalReduceList,
3711 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3717 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferArgVal, Idxs);
3719 ReductionsBufferTy, BufferVD, 0, En.index());
3721 switch (RI.EvaluationKind) {
3723 Value *TargetElement;
3725 if (IsByRef.
empty() || !IsByRef[En.index()]) {
3726 TargetElement =
Builder.CreateLoad(RI.ElementType, ElemPtr);
3729 RI.DataPtrPtrGen(
Builder.saveIP(), ElemPtr, ElemPtr);
3732 return GenResult.takeError();
3735 TargetElement =
Builder.CreateLoad(RI.ByRefElementType, ElemPtr);
3738 Builder.CreateStore(TargetElement, GlobVal);
3742 Value *SrcRealPtr =
Builder.CreateConstInBoundsGEP2_32(
3743 RI.ElementType, ElemPtr, 0, 0,
".realp");
3745 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
3747 RI.ElementType, ElemPtr, 0, 1,
".imagp");
3749 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
3751 Value *DestRealPtr =
Builder.CreateConstInBoundsGEP2_32(
3752 RI.ElementType, GlobVal, 0, 0,
".realp");
3753 Value *DestImgPtr =
Builder.CreateConstInBoundsGEP2_32(
3754 RI.ElementType, GlobVal, 0, 1,
".imagp");
3755 Builder.CreateStore(SrcReal, DestRealPtr);
3756 Builder.CreateStore(SrcImg, DestImgPtr);
3761 Builder.getInt64(
M.getDataLayout().getTypeStoreSize(RI.ElementType));
3763 GlobVal,
M.getDataLayout().getPrefTypeAlign(RI.ElementType), ElemPtr,
3764 M.getDataLayout().getPrefTypeAlign(RI.ElementType), SizeVal,
false);
3775Expected<Function *> OpenMPIRBuilder::emitListToGlobalReduceFunction(
3779 LLVMContext &Ctx =
M.getContext();
3782 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3786 "_omp_reduction_list_to_global_reduce_func", &
M);
3793 Builder.SetInsertPoint(EntryBlock);
3803 BufferArg->
getName() +
".addr");
3807 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3808 auto *RedListArrayTy =
3813 Value *LocalReduceList =
3814 Builder.CreateAlloca(RedListArrayTy,
nullptr,
".omp.reduction.red_list");
3818 Value *BufferArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3819 BufferArgAlloca,
Builder.getPtrTy(),
3820 BufferArgAlloca->
getName() +
".ascast");
3821 Value *IdxArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3822 IdxArgAlloca,
Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3823 Value *ReduceListArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3824 ReduceListArgAlloca,
Builder.getPtrTy(),
3825 ReduceListArgAlloca->
getName() +
".ascast");
3826 Value *LocalReduceListAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3827 LocalReduceList,
Builder.getPtrTy(),
3828 LocalReduceList->
getName() +
".ascast");
3830 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3831 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3832 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3837 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
3838 for (
auto En :
enumerate(ReductionInfos)) {
3842 if (!IsByRef.
empty() && IsByRef[En.index()]) {
3846 ByRefAlloc =
Builder.CreateAlloca(RI.ByRefAllocatedType);
3847 ByRefAlloc =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3848 ByRefAlloc,
Builder.getPtrTy(), ByRefAlloc->
getName() +
".ascast");
3853 Value *TargetElementPtrPtr =
Builder.CreateInBoundsGEP(
3854 RedListArrayTy, LocalReduceListAddrCast,
3855 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3857 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
3859 Value *GlobValPtr =
Builder.CreateConstInBoundsGEP2_32(
3860 ReductionsBufferTy, BufferVD, 0, En.index());
3862 if (!IsByRef.
empty() && IsByRef[En.index()]) {
3866 Value *SrcElementPtrPtr =
3867 Builder.CreateInBoundsGEP(RedListArrayTy, ReduceList,
3868 {ConstantInt::get(IndexTy, 0),
3869 ConstantInt::get(IndexTy, En.index())});
3870 Value *SrcDescriptorAddr =
3875 generateReductionDescriptor(ByRefAlloc, GlobValPtr, SrcDescriptorAddr,
3876 RI.ByRefAllocatedType, RI.DataPtrPtrGen);
3879 return GenResult.takeError();
3881 Builder.CreateStore(ByRefAlloc, TargetElementPtrPtr);
3883 Builder.CreateStore(GlobValPtr, TargetElementPtrPtr);
3891 ->addFnAttr(Attribute::NoUnwind);
3897Expected<Function *> OpenMPIRBuilder::emitGlobalToListCopyFunction(
3901 LLVMContext &Ctx =
M.getContext();
3904 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3908 "_omp_reduction_global_to_list_copy_func", &
M);
3915 Builder.SetInsertPoint(EntryBlock);
3925 BufferArg->
getName() +
".addr");
3929 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3930 Value *BufferArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3931 BufferArgAlloca,
Builder.getPtrTy(),
3932 BufferArgAlloca->
getName() +
".ascast");
3933 Value *IdxArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3934 IdxArgAlloca,
Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3935 Value *ReduceListArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3936 ReduceListArgAlloca,
Builder.getPtrTy(),
3937 ReduceListArgAlloca->
getName() +
".ascast");
3938 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3939 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3940 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3942 Value *LocalReduceList =
3947 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
3948 for (
auto En :
enumerate(ReductionInfos)) {
3949 const OpenMPIRBuilder::ReductionInfo &RI = En.value();
3950 auto *RedListArrayTy =
3954 RedListArrayTy, LocalReduceList,
3955 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3960 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
3961 Value *GlobValPtr =
Builder.CreateConstInBoundsGEP2_32(
3962 ReductionsBufferTy, BufferVD, 0, En.index());
3968 if (!IsByRef.
empty() && IsByRef[En.index()]) {
3974 return GenResult.takeError();
3979 Value *TargetElement =
Builder.CreateLoad(ElemType, GlobValPtr);
3980 Builder.CreateStore(TargetElement, ElemPtr);
3984 Value *SrcRealPtr =
Builder.CreateConstInBoundsGEP2_32(
3993 Value *DestRealPtr =
Builder.CreateConstInBoundsGEP2_32(
3995 Value *DestImgPtr =
Builder.CreateConstInBoundsGEP2_32(
3997 Builder.CreateStore(SrcReal, DestRealPtr);
3998 Builder.CreateStore(SrcImg, DestImgPtr);
4005 ElemPtr,
M.getDataLayout().getPrefTypeAlign(RI.
ElementType),
4006 GlobValPtr,
M.getDataLayout().getPrefTypeAlign(RI.
ElementType),
4018Expected<Function *> OpenMPIRBuilder::emitGlobalToListReduceFunction(
4022 LLVMContext &Ctx =
M.getContext();
4025 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
4029 "_omp_reduction_global_to_list_reduce_func", &
M);
4036 Builder.SetInsertPoint(EntryBlock);
4046 BufferArg->
getName() +
".addr");
4050 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
4056 Value *LocalReduceList =
4057 Builder.CreateAlloca(RedListArrayTy,
nullptr,
".omp.reduction.red_list");
4061 Value *BufferArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4062 BufferArgAlloca,
Builder.getPtrTy(),
4063 BufferArgAlloca->
getName() +
".ascast");
4064 Value *IdxArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4065 IdxArgAlloca,
Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
4066 Value *ReduceListArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4067 ReduceListArgAlloca,
Builder.getPtrTy(),
4068 ReduceListArgAlloca->
getName() +
".ascast");
4069 Value *ReductionList =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4070 LocalReduceList,
Builder.getPtrTy(),
4071 LocalReduceList->
getName() +
".ascast");
4073 Builder.CreateStore(BufferArg, BufferArgAddrCast);
4074 Builder.CreateStore(IdxArg, IdxArgAddrCast);
4075 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
4080 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
4081 for (
auto En :
enumerate(ReductionInfos)) {
4085 if (!IsByRef.
empty() && IsByRef[En.index()]) {
4089 ByRefAlloc =
Builder.CreateAlloca(RI.ByRefAllocatedType);
4090 ByRefAlloc =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4091 ByRefAlloc,
Builder.getPtrTy(), ByRefAlloc->
getName() +
".ascast");
4096 Value *TargetElementPtrPtr =
Builder.CreateInBoundsGEP(
4097 RedListArrayTy, ReductionList,
4098 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
4101 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
4102 Value *GlobValPtr =
Builder.CreateConstInBoundsGEP2_32(
4103 ReductionsBufferTy, BufferVD, 0, En.index());
4105 if (!IsByRef.
empty() && IsByRef[En.index()]) {
4107 Value *ReduceListVal =
4109 Value *SrcElementPtrPtr =
4110 Builder.CreateInBoundsGEP(RedListArrayTy, ReduceListVal,
4111 {ConstantInt::get(IndexTy, 0),
4112 ConstantInt::get(IndexTy, En.index())});
4113 Value *SrcDescriptorAddr =
4118 generateReductionDescriptor(ByRefAlloc, GlobValPtr, SrcDescriptorAddr,
4119 RI.ByRefAllocatedType, RI.DataPtrPtrGen);
4121 return GenResult.takeError();
4123 Builder.CreateStore(ByRefAlloc, TargetElementPtrPtr);
4125 Builder.CreateStore(GlobValPtr, TargetElementPtrPtr);
4133 ->addFnAttr(Attribute::NoUnwind);
4139std::string OpenMPIRBuilder::getReductionFuncName(StringRef Name)
const {
4140 std::string Suffix =
4142 return (Name + Suffix).str();
4145Expected<Function *> OpenMPIRBuilder::createReductionFunction(
4148 AttributeList FuncAttrs) {
4150 {Builder.getPtrTy(), Builder.getPtrTy()},
4152 std::string
Name = getReductionFuncName(ReducerName);
4160 Builder.SetInsertPoint(EntryBB);
4164 Value *LHSArrayPtr =
nullptr;
4165 Value *RHSArrayPtr =
nullptr;
4172 Builder.CreateAlloca(Arg0Type,
nullptr, Arg0->
getName() +
".addr");
4174 Builder.CreateAlloca(Arg1Type,
nullptr, Arg1->
getName() +
".addr");
4175 Value *LHSAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4176 LHSAlloca, Arg0Type, LHSAlloca->
getName() +
".ascast");
4177 Value *RHSAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4178 RHSAlloca, Arg1Type, RHSAlloca->
getName() +
".ascast");
4179 Builder.CreateStore(Arg0, LHSAddrCast);
4180 Builder.CreateStore(Arg1, RHSAddrCast);
4181 LHSArrayPtr =
Builder.CreateLoad(Arg0Type, LHSAddrCast);
4182 RHSArrayPtr =
Builder.CreateLoad(Arg1Type, RHSAddrCast);
4186 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
4188 for (
auto En :
enumerate(ReductionInfos)) {
4191 RedArrayTy, RHSArrayPtr,
4192 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
4194 Value *RHSPtr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4195 RHSI8Ptr, RI.PrivateVariable->getType(),
4196 RHSI8Ptr->
getName() +
".ascast");
4199 RedArrayTy, LHSArrayPtr,
4200 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
4202 Value *LHSPtr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4203 LHSI8Ptr, RI.Variable->getType(), LHSI8Ptr->
getName() +
".ascast");
4212 if (!IsByRef.
empty() && !IsByRef[En.index()]) {
4213 LHS =
Builder.CreateLoad(RI.ElementType, LHSPtr);
4214 RHS =
Builder.CreateLoad(RI.ElementType, RHSPtr);
4221 return AfterIP.takeError();
4222 if (!
Builder.GetInsertBlock())
4223 return ReductionFunc;
4227 if (!IsByRef.
empty() && !IsByRef[En.index()])
4228 Builder.CreateStore(Reduced, LHSPtr);
4233 for (
auto En :
enumerate(ReductionInfos)) {
4234 unsigned Index = En.index();
4236 Value *LHSFixupPtr, *RHSFixupPtr;
4237 Builder.restoreIP(RI.ReductionGenClang(
4238 Builder.saveIP(), Index, &LHSFixupPtr, &RHSFixupPtr, ReductionFunc));
4243 LHSPtrs[Index], [ReductionFunc](
const Use &U) {
4248 RHSPtrs[Index], [ReductionFunc](
const Use &U) {
4262 return ReductionFunc;
4270 assert(RI.Variable &&
"expected non-null variable");
4271 assert(RI.PrivateVariable &&
"expected non-null private variable");
4272 assert((RI.ReductionGen || RI.ReductionGenClang) &&
4273 "expected non-null reduction generator callback");
4276 RI.Variable->getType() == RI.PrivateVariable->getType() &&
4277 "expected variables and their private equivalents to have the same "
4280 assert(RI.Variable->getType()->isPointerTy() &&
4281 "expected variables to be pointers");
4290 unsigned ReductionBufNum,
Value *SrcLocInfo) {
4304 if (ReductionInfos.
size() == 0)
4314 Builder.SetInsertPoint(InsertBlock, InsertBlock->
end());
4318 AttributeList FuncAttrs;
4319 AttrBuilder AttrBldr(Ctx);
4321 AttrBldr.addAttribute(Attr);
4322 AttrBldr.removeAttribute(Attribute::OptimizeNone);
4323 FuncAttrs = FuncAttrs.addFnAttributes(Ctx, AttrBldr);
4327 Builder.GetInsertBlock()->getParent()->getName(), ReductionInfos, IsByRef,
4329 if (!ReductionResult)
4331 Function *ReductionFunc = *ReductionResult;
4335 if (GridValue.has_value())
4336 Config.setGridValue(GridValue.value());
4351 Builder.getPtrTy(
M.getDataLayout().getProgramAddressSpace());
4355 Value *ReductionListAlloca =
4356 Builder.CreateAlloca(RedArrayTy,
nullptr,
".omp.reduction.red_list");
4357 Value *ReductionList =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4358 ReductionListAlloca, PtrTy, ReductionListAlloca->
getName() +
".ascast");
4361 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
4362 for (
auto En :
enumerate(ReductionInfos)) {
4365 RedArrayTy, ReductionList,
4366 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
4369 bool IsByRefElem = !IsByRef.
empty() && IsByRef[En.index()];
4374 Builder.CreatePointerBitCastOrAddrSpaceCast(PrivateVar, PtrTy);
4375 Builder.CreateStore(CastElem, ElemPtr);
4379 ReductionInfos, ReductionFunc, FuncAttrs, IsByRef);
4385 emitInterWarpCopyFunction(
Loc, ReductionInfos, FuncAttrs, IsByRef);
4391 Value *RL =
Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList, PtrTy);
4393 unsigned MaxDataSize = 0;
4395 for (
auto En :
enumerate(ReductionInfos)) {
4396 auto Size =
M.getDataLayout().getTypeStoreSize(En.value().ElementType);
4397 if (
Size > MaxDataSize)
4399 Type *RedTypeArg = (!IsByRef.
empty() && IsByRef[En.index()])
4400 ? En.value().ByRefElementType
4401 : En.value().ElementType;
4404 Value *ReductionDataSize =
4405 Builder.getInt64(MaxDataSize * ReductionInfos.
size());
4406 if (!IsTeamsReduction) {
4407 Value *SarFuncCast =
4408 Builder.CreatePointerBitCastOrAddrSpaceCast(*SarFunc, FuncPtrTy);
4410 Builder.CreatePointerBitCastOrAddrSpaceCast(WcFunc, FuncPtrTy);
4411 Value *Args[] = {SrcLocInfo, ReductionDataSize, RL, SarFuncCast,
4414 RuntimeFunction::OMPRTL___kmpc_nvptx_parallel_reduce_nowait_v2);
4419 Ctx, ReductionTypeArgs,
"struct._globalized_locals_ty");
4421 RuntimeFunction::OMPRTL___kmpc_reduction_get_fixed_buffer);
4424 ReductionInfos, ReductionsBufferTy, FuncAttrs, IsByRef);
4429 ReductionInfos, ReductionFunc, ReductionsBufferTy, FuncAttrs, IsByRef);
4434 ReductionInfos, ReductionsBufferTy, FuncAttrs, IsByRef);
4439 ReductionInfos, ReductionFunc, ReductionsBufferTy, FuncAttrs, IsByRef);
4446 RedFixedBufferFn, {},
"_openmp_teams_reductions_buffer_$_$ptr");
4448 Value *Args3[] = {SrcLocInfo,
4449 KernelTeamsReductionPtr,
4450 Builder.getInt32(ReductionBufNum),
4461 RuntimeFunction::OMPRTL___kmpc_nvptx_teams_reduce_nowait_v2);
4478 for (
auto En :
enumerate(ReductionInfos)) {
4486 Value *LHSPtr, *RHSPtr;
4488 &LHSPtr, &RHSPtr, CurFunc));
4501 if (IsByRef.
empty() || !IsByRef[En.index()]) {
4503 "red.value." +
Twine(En.index()));
4514 if (!IsByRef.
empty() && !IsByRef[En.index()])
4519 if (ContinuationBlock) {
4520 Builder.CreateBr(ContinuationBlock);
4521 Builder.SetInsertPoint(ContinuationBlock);
4523 Config.setEmitLLVMUsed();
4534 ".omp.reduction.func", &M);
4544 Builder.SetInsertPoint(ReductionFuncBlock);
4545 Value *LHSArrayPtr =
nullptr;
4546 Value *RHSArrayPtr =
nullptr;
4557 Builder.CreateAlloca(Arg0Type,
nullptr, Arg0->
getName() +
".addr");
4559 Builder.CreateAlloca(Arg1Type,
nullptr, Arg1->
getName() +
".addr");
4560 Value *LHSAddrCast =
4561 Builder.CreatePointerBitCastOrAddrSpaceCast(LHSAlloca, Arg0Type);
4562 Value *RHSAddrCast =
4563 Builder.CreatePointerBitCastOrAddrSpaceCast(RHSAlloca, Arg1Type);
4564 Builder.CreateStore(Arg0, LHSAddrCast);
4565 Builder.CreateStore(Arg1, RHSAddrCast);
4566 LHSArrayPtr = Builder.CreateLoad(Arg0Type, LHSAddrCast);
4567 RHSArrayPtr = Builder.CreateLoad(Arg1Type, RHSAddrCast);
4569 LHSArrayPtr = ReductionFunc->
getArg(0);
4570 RHSArrayPtr = ReductionFunc->
getArg(1);
4573 unsigned NumReductions = ReductionInfos.
size();
4576 for (
auto En :
enumerate(ReductionInfos)) {
4578 Value *LHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
4579 RedArrayTy, LHSArrayPtr, 0, En.index());
4580 Value *LHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), LHSI8PtrPtr);
4581 Value *LHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
4584 Value *RHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
4585 RedArrayTy, RHSArrayPtr, 0, En.index());
4586 Value *RHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), RHSI8PtrPtr);
4587 Value *RHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
4596 Builder.restoreIP(*AfterIP);
4598 if (!Builder.GetInsertBlock())
4602 if (!IsByRef[En.index()])
4603 Builder.CreateStore(Reduced, LHSPtr);
4605 Builder.CreateRetVoid();
4612 bool IsNoWait,
bool IsTeamsReduction) {
4616 IsByRef, IsNoWait, IsTeamsReduction);
4623 if (ReductionInfos.
size() == 0)
4633 unsigned NumReductions = ReductionInfos.
size();
4636 Value *RedArray =
Builder.CreateAlloca(RedArrayTy,
nullptr,
"red.array");
4638 Builder.SetInsertPoint(InsertBlock, InsertBlock->
end());
4640 for (
auto En :
enumerate(ReductionInfos)) {
4641 unsigned Index = En.index();
4643 Value *RedArrayElemPtr =
Builder.CreateConstInBoundsGEP2_64(
4644 RedArrayTy, RedArray, 0, Index,
"red.array.elem." +
Twine(Index));
4651 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
4661 ? IdentFlag::OMP_IDENT_FLAG_ATOMIC_REDUCE
4666 unsigned RedArrayByteSize =
DL.getTypeStoreSize(RedArrayTy);
4667 Constant *RedArraySize = ConstantInt::get(IndexTy, RedArrayByteSize);
4669 Value *Lock = getOMPCriticalRegionLock(
".reduction");
4671 IsNoWait ? RuntimeFunction::OMPRTL___kmpc_reduce_nowait
4672 : RuntimeFunction::OMPRTL___kmpc_reduce);
4675 {Ident, ThreadId, NumVariables, RedArraySize,
4676 RedArray, ReductionFunc, Lock},
4687 Builder.CreateSwitch(ReduceCall, ContinuationBlock, 2);
4688 Switch->addCase(
Builder.getInt32(1), NonAtomicRedBlock);
4689 Switch->addCase(
Builder.getInt32(2), AtomicRedBlock);
4694 Builder.SetInsertPoint(NonAtomicRedBlock);
4695 for (
auto En :
enumerate(ReductionInfos)) {
4701 if (!IsByRef[En.index()]) {
4703 "red.value." +
Twine(En.index()));
4705 Value *PrivateRedValue =
4707 "red.private.value." +
Twine(En.index()));
4715 if (!
Builder.GetInsertBlock())
4718 if (!IsByRef[En.index()])
4722 IsNoWait ? RuntimeFunction::OMPRTL___kmpc_end_reduce_nowait
4723 : RuntimeFunction::OMPRTL___kmpc_end_reduce);
4725 Builder.CreateBr(ContinuationBlock);
4730 Builder.SetInsertPoint(AtomicRedBlock);
4731 if (CanGenerateAtomic &&
llvm::none_of(IsByRef, [](
bool P) {
return P; })) {
4738 if (!
Builder.GetInsertBlock())
4741 Builder.CreateBr(ContinuationBlock);
4754 if (!
Builder.GetInsertBlock())
4757 Builder.SetInsertPoint(ContinuationBlock);
4768 Directive OMPD = Directive::OMPD_master;
4773 Value *Args[] = {Ident, ThreadId};
4781 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
4792 Directive OMPD = Directive::OMPD_masked;
4798 Value *ArgsEnd[] = {Ident, ThreadId};
4806 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
4816 Call->setDoesNotThrow();
4831 bool IsInclusive,
ScanInfo *ScanRedInfo) {
4833 llvm::Error Err = emitScanBasedDirectiveDeclsIR(AllocaIP, ScanVars,
4834 ScanVarsType, ScanRedInfo);
4845 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4848 Type *DestTy = ScanVarsType[i];
4849 Value *Val =
Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
4852 Builder.CreateStore(Src, Val);
4857 Builder.GetInsertBlock()->getParent());
4860 IV = ScanRedInfo->
IV;
4863 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4866 Type *DestTy = ScanVarsType[i];
4868 Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
4870 Builder.CreateStore(Src, ScanVars[i]);
4884 Builder.GetInsertBlock()->getParent());
4889Error OpenMPIRBuilder::emitScanBasedDirectiveDeclsIR(
4893 Builder.restoreIP(AllocaIP);
4895 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4897 Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
"vla");
4904 Builder.restoreIP(CodeGenIP);
4906 Builder.CreateAdd(ScanRedInfo->
Span, Builder.getInt32(1));
4907 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4911 Value *Buff = Builder.CreateMalloc(IntPtrTy, ScanVarsType[i], Allocsize,
4912 AllocSpan,
nullptr,
"arr");
4913 Builder.CreateStore(Buff, (*(ScanRedInfo->
ScanBuffPtrs))[ScanVars[i]]);
4931 Builder.SetInsertPoint(
Builder.GetInsertBlock()->getTerminator());
4940Error OpenMPIRBuilder::emitScanBasedDirectiveFinalsIR(
4946 Value *PrivateVar = RedInfo.PrivateVariable;
4947 Value *OrigVar = RedInfo.Variable;
4951 Type *SrcTy = RedInfo.ElementType;
4956 Builder.CreateStore(Src, OrigVar);
4979 Builder.SetInsertPoint(
Builder.GetInsertBlock()->getTerminator());
5004 Builder.GetInsertBlock()->getModule(),
5011 Builder.GetInsertBlock()->getModule(),
5017 llvm::ConstantInt::get(ScanRedInfo->
Span->
getType(), 1));
5018 Builder.SetInsertPoint(InputBB);
5021 Builder.SetInsertPoint(LoopBB);
5037 Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
5039 Builder.SetInsertPoint(InnerLoopBB);
5043 Value *ReductionVal = RedInfo.PrivateVariable;
5046 Type *DestTy = RedInfo.ElementType;
5049 Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
5052 Builder.CreateInBoundsGEP(DestTy, Buff, OffsetIval,
"arrayOffset");
5057 RedInfo.ReductionGen(
Builder.saveIP(), LHS, RHS, Result);
5060 Builder.CreateStore(Result, LHSPtr);
5063 IVal, llvm::ConstantInt::get(
Builder.getInt32Ty(), 1));
5065 CmpI =
Builder.CreateICmpUGE(NextIVal, Pow2K);
5066 Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
5069 Counter, llvm::ConstantInt::get(Counter->
getType(), 1));
5075 Builder.CreateCondBr(Cmp, LoopBB, ExitBB);
5096 Error Err = emitScanBasedDirectiveFinalsIR(ReductionInfos, ScanRedInfo);
5103Error OpenMPIRBuilder::emitScanBasedDirectiveIR(
5115 Error Err = InputLoopGen();
5126 Error Err = ScanLoopGen(Builder.saveIP());
5133void OpenMPIRBuilder::createScanBBs(ScanInfo *ScanRedInfo) {
5170 Builder.SetInsertPoint(Preheader);
5173 Builder.SetInsertPoint(Header);
5174 PHINode *IndVarPHI =
Builder.CreatePHI(IndVarTy, 2,
"omp_" + Name +
".iv");
5175 IndVarPHI->
addIncoming(ConstantInt::get(IndVarTy, 0), Preheader);
5180 Builder.CreateICmpULT(IndVarPHI, TripCount,
"omp_" + Name +
".cmp");
5181 Builder.CreateCondBr(Cmp, Body, Exit);
5186 Builder.SetInsertPoint(Latch);
5188 "omp_" + Name +
".next",
true);
5199 CL->Header = Header;
5218 NextBB, NextBB, Name);
5250 Value *Start,
Value *Stop,
Value *Step,
bool IsSigned,
bool InclusiveStop,
5259 ComputeLoc, Start, Stop, Step, IsSigned, InclusiveStop, Name);
5260 ScanRedInfo->
Span = TripCount;
5266 ScanRedInfo->
IV =
IV;
5267 createScanBBs(ScanRedInfo);
5270 assert(Terminator->getNumSuccessors() == 1);
5271 BasicBlock *ContinueBlock = Terminator->getSuccessor(0);
5274 Builder.GetInsertBlock()->getParent());
5277 Builder.GetInsertBlock()->getParent());
5278 Builder.CreateBr(ContinueBlock);
5284 const auto &&InputLoopGen = [&]() ->
Error {
5286 Builder.saveIP(), BodyGen, Start, Stop, Step, IsSigned, InclusiveStop,
5287 ComputeIP, Name,
true, ScanRedInfo);
5291 Builder.restoreIP((*LoopInfo)->getAfterIP());
5297 InclusiveStop, ComputeIP, Name,
true, ScanRedInfo);
5301 Builder.restoreIP((*LoopInfo)->getAfterIP());
5305 Error Err = emitScanBasedDirectiveIR(InputLoopGen, ScanLoopGen, ScanRedInfo);
5313 bool IsSigned,
bool InclusiveStop,
const Twine &Name) {
5323 assert(IndVarTy == Stop->
getType() &&
"Stop type mismatch");
5324 assert(IndVarTy == Step->
getType() &&
"Step type mismatch");
5328 ConstantInt *Zero = ConstantInt::get(IndVarTy, 0);
5344 Incr =
Builder.CreateSelect(IsNeg,
Builder.CreateNeg(Step), Step);
5347 Span =
Builder.CreateSub(UB, LB,
"",
false,
true);
5351 Span =
Builder.CreateSub(Stop, Start,
"",
true);
5356 Value *CountIfLooping;
5357 if (InclusiveStop) {
5358 CountIfLooping =
Builder.CreateAdd(
Builder.CreateUDiv(Span, Incr), One);
5364 CountIfLooping =
Builder.CreateSelect(OneCmp, One, CountIfTwo);
5367 return Builder.CreateSelect(ZeroCmp, Zero, CountIfLooping,
5368 "omp_" + Name +
".tripcount");
5373 Value *Start,
Value *Stop,
Value *Step,
bool IsSigned,
bool InclusiveStop,
5380 ComputeLoc, Start, Stop, Step, IsSigned, InclusiveStop, Name);
5387 ScanRedInfo->
IV = IndVar;
5388 return BodyGenCB(
Builder.saveIP(), IndVar);
5394 Builder.getCurrentDebugLocation());
5405 unsigned Bitwidth = Ty->getIntegerBitWidth();
5408 M, omp::RuntimeFunction::OMPRTL___kmpc_dist_for_static_init_4u);
5411 M, omp::RuntimeFunction::OMPRTL___kmpc_dist_for_static_init_8u);
5421 unsigned Bitwidth = Ty->getIntegerBitWidth();
5424 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_4u);
5427 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_8u);
5435 assert(CLI->
isValid() &&
"Requires a valid canonical loop");
5437 "Require dedicated allocate IP");
5443 uint32_t SrcLocStrSize;
5449 Type *IVTy =
IV->getType();
5450 FunctionCallee StaticInit =
5451 LoopType == WorksharingLoopType::DistributeForStaticLoop
5454 FunctionCallee StaticFini =
5458 Builder.SetInsertPoint(AllocaIP.getBlock()->getFirstNonPHIOrDbgOrAlloca());
5461 Value *PLastIter =
Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
5462 Value *PLowerBound =
Builder.CreateAlloca(IVTy,
nullptr,
"p.lowerbound");
5463 Value *PUpperBound =
Builder.CreateAlloca(IVTy,
nullptr,
"p.upperbound");
5464 Value *PStride =
Builder.CreateAlloca(IVTy,
nullptr,
"p.stride");
5473 Constant *One = ConstantInt::get(IVTy, 1);
5474 Builder.CreateStore(Zero, PLowerBound);
5476 Builder.CreateStore(UpperBound, PUpperBound);
5477 Builder.CreateStore(One, PStride);
5482 (LoopType == WorksharingLoopType::DistributeStaticLoop)
5483 ? OMPScheduleType::OrderedDistribute
5486 ConstantInt::get(I32Type,
static_cast<int>(SchedType));
5490 auto BuildInitCall = [LoopType, SrcLoc, ThreadNum, PLastIter, PLowerBound,
5491 PUpperBound, IVTy, PStride, One,
Zero, StaticInit,
5494 PLowerBound, PUpperBound});
5495 if (LoopType == WorksharingLoopType::DistributeForStaticLoop) {
5496 Value *PDistUpperBound =
5497 Builder.CreateAlloca(IVTy,
nullptr,
"p.distupperbound");
5498 Args.push_back(PDistUpperBound);
5503 BuildInitCall(SchedulingType,
Builder);
5504 if (HasDistSchedule &&
5505 LoopType != WorksharingLoopType::DistributeStaticLoop) {
5506 Constant *DistScheduleSchedType = ConstantInt::get(
5511 BuildInitCall(DistScheduleSchedType,
Builder);
5513 Value *LowerBound =
Builder.CreateLoad(IVTy, PLowerBound);
5514 Value *InclusiveUpperBound =
Builder.CreateLoad(IVTy, PUpperBound);
5515 Value *TripCountMinusOne =
Builder.CreateSub(InclusiveUpperBound, LowerBound);
5516 Value *TripCount =
Builder.CreateAdd(TripCountMinusOne, One);
5517 CLI->setTripCount(TripCount);
5523 CLI->mapIndVar([&](Instruction *OldIV) ->
Value * {
5527 return Builder.CreateAdd(OldIV, LowerBound);
5539 omp::Directive::OMPD_for,
false,
5542 return BarrierIP.takeError();
5569 Reachable.insert(
Block);
5579 Ctx, {
MDString::get(Ctx,
"llvm.loop.parallel_accesses"), AccessGroup}));
5583OpenMPIRBuilder::applyStaticChunkedWorkshareLoop(
5587 assert(CLI->
isValid() &&
"Requires a valid canonical loop");
5588 assert((ChunkSize || DistScheduleChunkSize) &&
"Chunk size is required");
5593 Type *IVTy =
IV->getType();
5595 "Max supported tripcount bitwidth is 64 bits");
5597 :
Type::getInt64Ty(Ctx);
5600 Constant *One = ConstantInt::get(InternalIVTy, 1);
5606 for (BasicBlock &BB : *
F)
5607 if (!BB.hasTerminator())
5608 UIs.
push_back(
new UnreachableInst(
F->getContext(), &BB));
5613 LoopInfo &&LI = LIA.
run(*
F,
FAM);
5614 for (Instruction *
I : UIs)
5615 I->eraseFromParent();
5618 if (ChunkSize || DistScheduleChunkSize)
5623 FunctionCallee StaticInit =
5625 FunctionCallee StaticFini =
5631 Value *PLastIter =
Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
5632 Value *PLowerBound =
5633 Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.lowerbound");
5634 Value *PUpperBound =
5635 Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.upperbound");
5636 Value *PStride =
Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.stride");
5645 ChunkSize ? ChunkSize : Zero, InternalIVTy,
"chunksize");
5646 Value *CastedDistScheduleChunkSize =
Builder.CreateZExtOrTrunc(
5647 DistScheduleChunkSize ? DistScheduleChunkSize : Zero, InternalIVTy,
5648 "distschedulechunksize");
5649 Value *CastedTripCount =
5650 Builder.CreateZExt(OrigTripCount, InternalIVTy,
"tripcount");
5653 ConstantInt::get(I32Type,
static_cast<int>(SchedType));
5655 ConstantInt::get(I32Type,
static_cast<int>(DistScheduleSchedType));
5656 Builder.CreateStore(Zero, PLowerBound);
5657 Value *OrigUpperBound =
Builder.CreateSub(CastedTripCount, One);
5658 Value *IsTripCountZero =
Builder.CreateICmpEQ(CastedTripCount, Zero);
5660 Builder.CreateSelect(IsTripCountZero, Zero, OrigUpperBound);
5661 Builder.CreateStore(UpperBound, PUpperBound);
5662 Builder.CreateStore(One, PStride);
5666 uint32_t SrcLocStrSize;
5670 auto BuildInitCall = [StaticInit, SrcLoc, ThreadNum, PLastIter, PLowerBound,
5671 PUpperBound, PStride, One,
5672 this](
Value *SchedulingType,
Value *ChunkSize,
5675 StaticInit, {SrcLoc, ThreadNum,
5676 SchedulingType, PLastIter,
5677 PLowerBound, PUpperBound,
5681 BuildInitCall(SchedulingType, CastedChunkSize,
Builder);
5682 if (DistScheduleSchedType != OMPScheduleType::None &&
5683 SchedType != OMPScheduleType::OrderedDistributeChunked &&
5684 SchedType != OMPScheduleType::OrderedDistribute) {
5688 BuildInitCall(DistSchedulingType, CastedDistScheduleChunkSize,
Builder);
5692 Value *FirstChunkStart =
5693 Builder.CreateLoad(InternalIVTy, PLowerBound,
"omp_firstchunk.lb");
5694 Value *FirstChunkStop =
5695 Builder.CreateLoad(InternalIVTy, PUpperBound,
"omp_firstchunk.ub");
5696 Value *FirstChunkEnd =
Builder.CreateAdd(FirstChunkStop, One);
5698 Builder.CreateSub(FirstChunkEnd, FirstChunkStart,
"omp_chunk.range");
5699 Value *NextChunkStride =
5700 Builder.CreateLoad(InternalIVTy, PStride,
"omp_dispatch.stride");
5704 Value *DispatchCounter;
5712 DispatchCounter = Counter;
5715 FirstChunkStart, CastedTripCount, NextChunkStride,
5738 Value *ChunkEnd =
Builder.CreateAdd(DispatchCounter, ChunkRange);
5739 Value *IsLastChunk =
5740 Builder.CreateICmpUGE(ChunkEnd, CastedTripCount,
"omp_chunk.is_last");
5741 Value *CountUntilOrigTripCount =
5742 Builder.CreateSub(CastedTripCount, DispatchCounter);
5744 IsLastChunk, CountUntilOrigTripCount, ChunkRange,
"omp_chunk.tripcount");
5745 Value *BackcastedChunkTC =
5746 Builder.CreateTrunc(ChunkTripCount, IVTy,
"omp_chunk.tripcount.trunc");
5747 CLI->setTripCount(BackcastedChunkTC);
5752 Value *BackcastedDispatchCounter =
5753 Builder.CreateTrunc(DispatchCounter, IVTy,
"omp_dispatch.iv.trunc");
5754 CLI->mapIndVar([&](Instruction *) ->
Value * {
5756 return Builder.CreateAdd(
IV, BackcastedDispatchCounter);
5769 return AfterIP.takeError();
5784static FunctionCallee
5787 unsigned Bitwidth = Ty->getIntegerBitWidth();
5790 case WorksharingLoopType::ForStaticLoop:
5793 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_loop_4u);
5796 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_loop_8u);
5798 case WorksharingLoopType::DistributeStaticLoop:
5801 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_static_loop_4u);
5804 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_static_loop_8u);
5806 case WorksharingLoopType::DistributeForStaticLoop:
5809 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_for_static_loop_4u);
5812 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_for_static_loop_8u);
5815 if (Bitwidth != 32 && Bitwidth != 64) {
5827 Function &LoopBodyFn,
bool NoLoop) {
5838 if (LoopType == WorksharingLoopType::DistributeStaticLoop) {
5839 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
5840 RealArgs.
push_back(ConstantInt::get(Builder.getInt8Ty(), 0));
5841 Builder.restoreIP({InsertBlock, std::prev(InsertBlock->
end())});
5846 M, omp::RuntimeFunction::OMPRTL_omp_get_num_threads);
5847 Builder.restoreIP({InsertBlock, std::prev(InsertBlock->
end())});
5851 Builder.CreateZExtOrTrunc(NumThreads, TripCountTy,
"num.threads.cast"));
5852 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
5853 if (LoopType == WorksharingLoopType::DistributeForStaticLoop) {
5854 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
5855 RealArgs.
push_back(ConstantInt::get(Builder.getInt8Ty(), NoLoop));
5857 RealArgs.
push_back(ConstantInt::get(Builder.getInt8Ty(), 0));
5881 Builder.restoreIP({Preheader, Preheader->
end()});
5884 Builder.CreateBr(CLI->
getExit());
5892 CleanUpInfo.
collectBlocks(RegionBlockSet, BlocksToBeRemoved);
5900 "Expected unique undroppable user of outlined function");
5902 assert(OutlinedFnCallInstruction &&
"Expected outlined function call");
5904 "Expected outlined function call to be located in loop preheader");
5906 if (OutlinedFnCallInstruction->
arg_size() > 1)
5913 LoopBodyArg, TripCount, OutlinedFn, NoLoop);
5915 for (
auto &ToBeDeletedItem : ToBeDeleted)
5916 ToBeDeletedItem->eraseFromParent();
5923 uint32_t SrcLocStrSize;
5932 SmallVector<Instruction *, 4> ToBeDeleted;
5934 OI.OuterAllocaBB = AllocaIP.getBlock();
5957 SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet;
5959 OI.collectBlocks(ParallelRegionBlockSet, Blocks);
5961 CodeExtractorAnalysisCache CEAC(*OuterFn);
5962 CodeExtractor Extractor(Blocks,
5975 SetVector<Value *> SinkingCands, HoistingCands;
5979 Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
5986 for (
auto Use :
Users) {
5988 if (ParallelRegionBlockSet.
count(Inst->getParent())) {
5989 Inst->replaceUsesOfWith(CLI->
getIndVar(), NewLoopCntLoad);
5995 OI.ExcludeArgsFromAggregate.push_back(NewLoopCntLoad);
6002 OI.PostOutlineCB = [=, ToBeDeletedVec =
6003 std::move(ToBeDeleted)](
Function &OutlinedFn) {
6013 bool NeedsBarrier, omp::ScheduleKind SchedKind,
Value *ChunkSize,
6014 bool HasSimdModifier,
bool HasMonotonicModifier,
6015 bool HasNonmonotonicModifier,
bool HasOrderedClause,
6017 Value *DistScheduleChunkSize) {
6018 if (
Config.isTargetDevice())
6019 return applyWorkshareLoopTarget(
DL, CLI, AllocaIP, LoopType, NoLoop);
6021 SchedKind, ChunkSize, HasSimdModifier, HasMonotonicModifier,
6022 HasNonmonotonicModifier, HasOrderedClause, DistScheduleChunkSize);
6024 bool IsOrdered = (EffectiveScheduleType & OMPScheduleType::ModifierOrdered) ==
6025 OMPScheduleType::ModifierOrdered;
6027 if (HasDistSchedule) {
6028 DistScheduleSchedType = DistScheduleChunkSize
6029 ? OMPScheduleType::OrderedDistributeChunked
6030 : OMPScheduleType::OrderedDistribute;
6032 switch (EffectiveScheduleType & ~OMPScheduleType::ModifierMask) {
6033 case OMPScheduleType::BaseStatic:
6034 case OMPScheduleType::BaseDistribute:
6035 assert((!ChunkSize || !DistScheduleChunkSize) &&
6036 "No chunk size with static-chunked schedule");
6037 if (IsOrdered && !HasDistSchedule)
6038 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
6039 NeedsBarrier, ChunkSize);
6041 if (DistScheduleChunkSize)
6042 return applyStaticChunkedWorkshareLoop(
6043 DL, CLI, AllocaIP, NeedsBarrier, ChunkSize, EffectiveScheduleType,
6044 DistScheduleChunkSize, DistScheduleSchedType);
6045 return applyStaticWorkshareLoop(
DL, CLI, AllocaIP, LoopType, NeedsBarrier,
6048 case OMPScheduleType::BaseStaticChunked:
6049 case OMPScheduleType::BaseDistributeChunked:
6050 if (IsOrdered && !HasDistSchedule)
6051 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
6052 NeedsBarrier, ChunkSize);
6054 return applyStaticChunkedWorkshareLoop(
6055 DL, CLI, AllocaIP, NeedsBarrier, ChunkSize, EffectiveScheduleType,
6056 DistScheduleChunkSize, DistScheduleSchedType);
6058 case OMPScheduleType::BaseRuntime:
6059 case OMPScheduleType::BaseAuto:
6060 case OMPScheduleType::BaseGreedy:
6061 case OMPScheduleType::BaseBalanced:
6062 case OMPScheduleType::BaseSteal:
6063 case OMPScheduleType::BaseRuntimeSimd:
6065 "schedule type does not support user-defined chunk sizes");
6067 case OMPScheduleType::BaseGuidedSimd:
6068 case OMPScheduleType::BaseDynamicChunked:
6069 case OMPScheduleType::BaseGuidedChunked:
6070 case OMPScheduleType::BaseGuidedIterativeChunked:
6071 case OMPScheduleType::BaseGuidedAnalyticalChunked:
6072 case OMPScheduleType::BaseStaticBalancedChunked:
6073 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
6074 NeedsBarrier, ChunkSize);
6087 unsigned Bitwidth = Ty->getIntegerBitWidth();
6090 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_4u);
6093 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_8u);
6101static FunctionCallee
6103 unsigned Bitwidth = Ty->getIntegerBitWidth();
6106 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_4u);
6109 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_8u);
6116static FunctionCallee
6118 unsigned Bitwidth = Ty->getIntegerBitWidth();
6121 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_4u);
6124 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_8u);
6129OpenMPIRBuilder::applyDynamicWorkshareLoop(
DebugLoc DL, CanonicalLoopInfo *CLI,
6132 bool NeedsBarrier,
Value *Chunk) {
6133 assert(CLI->
isValid() &&
"Requires a valid canonical loop");
6135 "Require dedicated allocate IP");
6137 "Require valid schedule type");
6139 bool Ordered = (SchedType & OMPScheduleType::ModifierOrdered) ==
6140 OMPScheduleType::ModifierOrdered;
6145 uint32_t SrcLocStrSize;
6151 Type *IVTy =
IV->getType();
6156 Builder.SetInsertPoint(AllocaIP.getBlock()->getFirstNonPHIOrDbgOrAlloca());
6158 Value *PLastIter =
Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
6159 Value *PLowerBound =
Builder.CreateAlloca(IVTy,
nullptr,
"p.lowerbound");
6160 Value *PUpperBound =
Builder.CreateAlloca(IVTy,
nullptr,
"p.upperbound");
6161 Value *PStride =
Builder.CreateAlloca(IVTy,
nullptr,
"p.stride");
6170 Constant *One = ConstantInt::get(IVTy, 1);
6171 Builder.CreateStore(One, PLowerBound);
6173 Builder.CreateStore(UpperBound, PUpperBound);
6174 Builder.CreateStore(One, PStride);
6191 ConstantInt::get(I32Type,
static_cast<int>(SchedType));
6203 Builder.SetInsertPoint(OuterCond, OuterCond->getFirstInsertionPt());
6206 {SrcLoc, ThreadNum, PLastIter, PLowerBound, PUpperBound, PStride});
6207 Constant *Zero32 = ConstantInt::get(I32Type, 0);
6210 Builder.CreateSub(
Builder.CreateLoad(IVTy, PLowerBound), One,
"lb");
6211 Builder.CreateCondBr(MoreWork, Header, Exit);
6217 PI->setIncomingBlock(0, OuterCond);
6218 PI->setIncomingValue(0, LowerBound);
6223 Br->setSuccessor(OuterCond);
6229 UpperBound =
Builder.CreateLoad(IVTy, PUpperBound,
"ub");
6232 CI->setOperand(1, UpperBound);
6236 assert(BI->getSuccessor(1) == Exit);
6237 BI->setSuccessor(1, OuterCond);
6251 omp::Directive::OMPD_for,
false,
6254 return BarrierIP.takeError();
6273 auto HasRemainingUses = [&BBsToErase](
BasicBlock *BB) {
6278 if (BBsToErase.
count(UseInst->getParent()))
6285 while (BBsToErase.
remove_if(HasRemainingUses)) {
6296 assert(
Loops.size() >= 1 &&
"At least one loop required");
6297 size_t NumLoops =
Loops.size();
6301 return Loops.front();
6313 Loop->collectControlBlocks(OldControlBBs);
6317 if (ComputeIP.
isSet())
6324 Value *CollapsedTripCount =
nullptr;
6327 "All loops to collapse must be valid canonical loops");
6328 Value *OrigTripCount = L->getTripCount();
6329 if (!CollapsedTripCount) {
6330 CollapsedTripCount = OrigTripCount;
6335 CollapsedTripCount =
6336 Builder.CreateNUWMul(CollapsedTripCount, OrigTripCount);
6342 OrigPreheader->
getNextNode(), OrigAfter,
"collapsed");
6348 Builder.restoreIP(Result->getBodyIP());
6350 Value *Leftover = Result->getIndVar();
6352 NewIndVars.
resize(NumLoops);
6353 for (
int i = NumLoops - 1; i >= 1; --i) {
6354 Value *OrigTripCount =
Loops[i]->getTripCount();
6356 Value *NewIndVar =
Builder.CreateURem(Leftover, OrigTripCount);
6357 NewIndVars[i] = NewIndVar;
6359 Leftover =
Builder.CreateUDiv(Leftover, OrigTripCount);
6362 NewIndVars[0] = Leftover;
6371 BasicBlock *ContinueBlock = Result->getBody();
6373 auto ContinueWith = [&ContinueBlock, &ContinuePred,
DL](
BasicBlock *Dest,
6380 ContinueBlock =
nullptr;
6381 ContinuePred = NextSrc;
6388 for (
size_t i = 0; i < NumLoops - 1; ++i)
6389 ContinueWith(
Loops[i]->getBody(),
Loops[i + 1]->getHeader());
6395 for (
size_t i = NumLoops - 1; i > 0; --i)
6396 ContinueWith(
Loops[i]->getAfter(),
Loops[i - 1]->getLatch());
6399 ContinueWith(Result->getLatch(),
nullptr);
6406 for (
size_t i = 0; i < NumLoops; ++i)
6407 Loops[i]->getIndVar()->replaceAllUsesWith(NewIndVars[i]);
6421std::vector<CanonicalLoopInfo *>
6425 "Must pass as many tile sizes as there are loops");
6426 int NumLoops =
Loops.size();
6427 assert(NumLoops >= 1 &&
"At least one loop to tile required");
6439 Loop->collectControlBlocks(OldControlBBs);
6447 assert(L->isValid() &&
"All input loops must be valid canonical loops");
6448 OrigTripCounts.
push_back(L->getTripCount());
6459 for (
int i = 0; i < NumLoops - 1; ++i) {
6472 for (
int i = 0; i < NumLoops; ++i) {
6474 Value *OrigTripCount = OrigTripCounts[i];
6487 Value *FloorTripOverflow =
6488 Builder.CreateICmpNE(FloorTripRem, ConstantInt::get(IVType, 0));
6490 FloorTripOverflow =
Builder.CreateZExt(FloorTripOverflow, IVType);
6491 Value *FloorTripCount =
6492 Builder.CreateAdd(FloorCompleteTripCount, FloorTripOverflow,
6493 "omp_floor" +
Twine(i) +
".tripcount",
true);
6496 FloorCompleteCount.
push_back(FloorCompleteTripCount);
6502 std::vector<CanonicalLoopInfo *> Result;
6503 Result.reserve(NumLoops * 2);
6516 auto EmbeddNewLoop =
6517 [
this,
DL,
F, InnerEnter, &Enter, &
Continue, &OutroInsertBefore](
6520 DL, TripCount,
F, InnerEnter, OutroInsertBefore, Name);
6525 Enter = EmbeddedLoop->
getBody();
6527 OutroInsertBefore = EmbeddedLoop->
getLatch();
6528 return EmbeddedLoop;
6532 const Twine &NameBase) {
6535 EmbeddNewLoop(
P.value(), NameBase +
Twine(
P.index()));
6536 Result.push_back(EmbeddedLoop);
6540 EmbeddNewLoops(FloorCount,
"floor");
6546 for (
int i = 0; i < NumLoops; ++i) {
6550 Value *FloorIsEpilogue =
6552 Value *TileTripCount =
6559 EmbeddNewLoops(TileCounts,
"tile");
6564 for (std::pair<BasicBlock *, BasicBlock *>
P : InbetweenCode) {
6573 BodyEnter =
nullptr;
6574 BodyEntered = ExitBB;
6586 Builder.restoreIP(Result.back()->getBodyIP());
6587 for (
int i = 0; i < NumLoops; ++i) {
6590 Value *OrigIndVar = OrigIndVars[i];
6618 if (Properties.
empty())
6641 assert(
Loop->isValid() &&
"Expecting a valid CanonicalLoopInfo");
6645 assert(Latch &&
"A valid CanonicalLoopInfo must have a unique latch");
6653 if (
I.mayReadOrWriteMemory()) {
6657 I.setMetadata(LLVMContext::MD_access_group, AccessGroup);
6671 Loop->collectControlBlocks(oldControlBBs);
6676 assert(L->isValid() &&
"All input loops must be valid canonical loops");
6677 origTripCounts.
push_back(L->getTripCount());
6686 Builder.SetInsertPoint(TCBlock);
6687 Value *fusedTripCount =
nullptr;
6689 assert(L->isValid() &&
"All loops to fuse must be valid canonical loops");
6690 Value *origTripCount = L->getTripCount();
6691 if (!fusedTripCount) {
6692 fusedTripCount = origTripCount;
6695 Value *condTP =
Builder.CreateICmpSGT(fusedTripCount, origTripCount);
6696 fusedTripCount =
Builder.CreateSelect(condTP, fusedTripCount, origTripCount,
6710 for (
size_t i = 0; i <
Loops.size() - 1; ++i) {
6711 Loops[i]->getPreheader()->moveBefore(TCBlock);
6712 Loops[i]->getAfter()->moveBefore(TCBlock);
6716 for (
size_t i = 0; i <
Loops.size() - 1; ++i) {
6728 for (
size_t i = 0; i <
Loops.size(); ++i) {
6730 F->getContext(),
"omp.fused.inner.cond",
F,
Loops[i]->getBody());
6731 Builder.SetInsertPoint(condBlock);
6739 for (
size_t i = 0; i <
Loops.size() - 1; ++i) {
6740 Builder.SetInsertPoint(condBBs[i]);
6741 Builder.CreateCondBr(condValues[i],
Loops[i]->getBody(), condBBs[i + 1]);
6757 "omp.fused.pre_latch");
6790 const Twine &NamePrefix) {
6819 C, NamePrefix +
".if.then",
Cond->getParent(),
Cond->getNextNode());
6821 C, NamePrefix +
".if.else",
Cond->getParent(), CanonicalLoop->
getExit());
6824 Builder.SetInsertPoint(SplitBeforeIt);
6826 Builder.CreateCondBr(IfCond, ThenBlock, ElseBlock);
6829 spliceBB(IP, ThenBlock,
false, Builder.getCurrentDebugLocation());
6832 Builder.SetInsertPoint(ElseBlock);
6838 ExistingBlocks.
reserve(L->getNumBlocks() + 1);
6840 ExistingBlocks.
append(L->block_begin(), L->block_end());
6846 assert(LoopCond && LoopHeader &&
"Invalid loop structure");
6848 if (
Block == L->getLoopPreheader() ||
Block == L->getLoopLatch() ||
6855 if (
Block == ThenBlock)
6856 NewBB->
setName(NamePrefix +
".if.else");
6859 VMap[
Block] = NewBB;
6867 L->getLoopLatch()->splitBasicBlockBefore(
L->getLoopLatch()->begin(),
6868 NamePrefix +
".pre_latch");
6872 L->addBasicBlockToLoop(ThenBlock, LI);
6878 if (TargetTriple.
isX86()) {
6879 if (Features.
lookup(
"avx512f"))
6881 else if (Features.
lookup(
"avx"))
6885 if (TargetTriple.
isPPC())
6887 if (TargetTriple.
isWasm())
6894 Value *IfCond, OrderKind Order,
6904 if (!BB.hasTerminator())
6920 I->eraseFromParent();
6923 if (AlignedVars.
size()) {
6925 for (
auto &AlignedItem : AlignedVars) {
6926 Value *AlignedPtr = AlignedItem.first;
6927 Value *Alignment = AlignedItem.second;
6930 Builder.CreateAlignmentAssumption(
F->getDataLayout(), AlignedPtr,
6938 createIfVersion(CanonicalLoop, IfCond, VMap, LIA, LI, L,
"simd");
6951 Reachable.insert(
Block);
6961 if ((Safelen ==
nullptr) || (Order == OrderKind::OMP_ORDER_concurrent))
6977 Ctx, {
MDString::get(Ctx,
"llvm.loop.vectorize.enable"), BoolConst}));
6979 if (Simdlen || Safelen) {
6983 ConstantInt *VectorizeWidth = Simdlen ==
nullptr ? Safelen : Simdlen;
7009static std::unique_ptr<TargetMachine>
7013 StringRef CPU =
F->getFnAttribute(
"target-cpu").getValueAsString();
7014 StringRef Features =
F->getFnAttribute(
"target-features").getValueAsString();
7025 std::nullopt, OptLevel));
7043 if (!BB.hasTerminator())
7056 [&](
const Function &
F) {
return TM->getTargetTransformInfo(
F); });
7057 FAM.registerPass([&]() {
return TIRA; });
7071 I->eraseFromParent();
7074 assert(L &&
"Expecting CanonicalLoopInfo to be recognized as a loop");
7079 nullptr, ORE,
static_cast<int>(OptLevel),
7100 <<
" Threshold=" << UP.
Threshold <<
"\n"
7103 <<
" PartialOptSizeThreshold="
7123 Ptr = Load->getPointerOperand();
7125 Ptr = Store->getPointerOperand();
7132 if (Alloca->getParent() == &
F->getEntryBlock())
7152 int MaxTripCount = 0;
7153 bool MaxOrZero =
false;
7154 unsigned TripMultiple = 0;
7157 MaxTripCount, MaxOrZero, TripMultiple, UCE, UP, PP);
7158 unsigned Factor = UP.
Count;
7159 LLVM_DEBUG(
dbgs() <<
"Suggesting unroll factor of " << Factor <<
"\n");
7170 assert(Factor >= 0 &&
"Unroll factor must not be negative");
7186 Ctx, {
MDString::get(Ctx,
"llvm.loop.unroll.count"), FactorConst}));
7199 *UnrolledCLI =
Loop;
7204 "unrolling only makes sense with a factor of 2 or larger");
7206 Type *IndVarTy =
Loop->getIndVarType();
7213 std::vector<CanonicalLoopInfo *>
LoopNest =
7228 Ctx, {
MDString::get(Ctx,
"llvm.loop.unroll.count"), FactorConst})});
7231 (*UnrolledCLI)->assertOK();
7249 Value *Args[] = {Ident, ThreadId, BufSize, CpyBuf, CpyFn, DidItLD};
7268 if (!CPVars.
empty()) {
7273 Directive OMPD = Directive::OMPD_single;
7278 Value *Args[] = {Ident, ThreadId};
7287 if (
Error Err = FiniCB(IP))
7308 EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCBWrapper,
7315 for (
size_t I = 0, E = CPVars.
size();
I < E; ++
I)
7318 ConstantInt::get(Int64, 0), CPVars[
I],
7321 }
else if (!IsNowait) {
7324 omp::Directive::OMPD_unknown,
false,
7339 Directive OMPD = Directive::OMPD_critical;
7344 Value *LockVar = getOMPCriticalRegionLock(CriticalName);
7345 Value *Args[] = {Ident, ThreadId, LockVar};
7362 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
7370 const Twine &Name,
bool IsDependSource) {
7374 "OpenMP runtime requires depend vec with i64 type");
7387 for (
unsigned I = 0;
I < NumLoops; ++
I) {
7401 Value *Args[] = {Ident, ThreadId, DependBaseAddrGEP};
7419 Directive OMPD = Directive::OMPD_ordered;
7428 Value *Args[] = {Ident, ThreadId};
7438 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
7445 bool HasFinalize,
bool IsCancellable) {
7452 BasicBlock *EntryBB = Builder.GetInsertBlock();
7461 emitCommonDirectiveEntry(OMPD, EntryCall, ExitBB, Conditional);
7472 "Unexpected control flow graph state!!");
7474 emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize);
7476 return AfterIP.takeError();
7481 "Unexpected Insertion point location!");
7484 auto InsertBB = merged ? ExitPredBB : ExitBB;
7487 Builder.SetInsertPoint(InsertBB);
7489 return Builder.saveIP();
7493 Directive OMPD,
Value *EntryCall, BasicBlock *ExitBB,
bool Conditional) {
7495 if (!Conditional || !EntryCall)
7501 auto *UI =
new UnreachableInst(
Builder.getContext(), ThenBB);
7511 Builder.CreateCondBr(CallBool, ThenBB, ExitBB);
7515 UI->eraseFromParent();
7523 omp::Directive OMPD,
InsertPointTy FinIP, Instruction *ExitCall,
7531 "Unexpected finalization stack state!");
7534 assert(Fi.DK == OMPD &&
"Unexpected Directive for Finalization call!");
7536 if (
Error Err = Fi.mergeFiniBB(
Builder, FinIP.getBlock()))
7537 return std::move(Err);
7541 Builder.SetInsertPoint(FinIP.getBlock()->getTerminator());
7551 return IRBuilder<>::InsertPoint(ExitCall->
getParent(),
7585 "copyin.not.master.end");
7592 Builder.SetInsertPoint(OMP_Entry);
7593 Value *MasterPtr =
Builder.CreatePtrToInt(MasterAddr, IntPtrTy);
7594 Value *PrivatePtr =
Builder.CreatePtrToInt(PrivateAddr, IntPtrTy);
7595 Value *cmp =
Builder.CreateICmpNE(MasterPtr, PrivatePtr);
7596 Builder.CreateCondBr(cmp, CopyBegin, CopyEnd);
7598 Builder.SetInsertPoint(CopyBegin);
7615 Value *Args[] = {ThreadId,
Size, Allocator};
7632 Value *Args[] = {ThreadId, Addr, Allocator};
7640 Value *DependenceAddress,
bool HaveNowaitClause) {
7648 if (Device ==
nullptr)
7650 Constant *InteropTypeVal = ConstantInt::get(Int32, (
int)InteropType);
7651 if (NumDependences ==
nullptr) {
7652 NumDependences = ConstantInt::get(Int32, 0);
7656 Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause);
7658 Ident, ThreadId, InteropVar, InteropTypeVal,
7659 Device, NumDependences, DependenceAddress, HaveNowaitClauseVal};
7668 Value *NumDependences,
Value *DependenceAddress,
bool HaveNowaitClause) {
7676 if (Device ==
nullptr)
7678 if (NumDependences ==
nullptr) {
7679 NumDependences = ConstantInt::get(Int32, 0);
7683 Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause);
7685 Ident, ThreadId, InteropVar, Device,
7686 NumDependences, DependenceAddress, HaveNowaitClauseVal};
7695 Value *NumDependences,
7696 Value *DependenceAddress,
7697 bool HaveNowaitClause) {
7704 if (Device ==
nullptr)
7706 if (NumDependences ==
nullptr) {
7707 NumDependences = ConstantInt::get(Int32, 0);
7711 Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause);
7713 Ident, ThreadId, InteropVar, Device,
7714 NumDependences, DependenceAddress, HaveNowaitClauseVal};
7744 assert(!Attrs.MaxThreads.empty() && !Attrs.MaxTeams.empty() &&
7745 "expected num_threads and num_teams to be specified");
7764 const std::string DebugPrefix =
"_debug__";
7765 if (KernelName.
ends_with(DebugPrefix)) {
7766 KernelName = KernelName.
drop_back(DebugPrefix.length());
7767 Kernel =
M.getFunction(KernelName);
7773 if (Attrs.MinTeams > 1 || Attrs.MaxTeams.front() > 0)
7778 int32_t MaxThreadsVal = Attrs.MaxThreads.front();
7779 if (MaxThreadsVal < 0) {
7785 MaxThreadsVal = Attrs.MinThreads;
7789 if (MaxThreadsVal > 0)
7802 omp::RuntimeFunction::OMPRTL___kmpc_target_init);
7805 Twine DynamicEnvironmentName = KernelName +
"_dynamic_environment";
7806 Constant *DynamicEnvironmentInitializer =
7810 DynamicEnvironmentInitializer, DynamicEnvironmentName,
7812 DL.getDefaultGlobalsAddressSpace());
7816 DynamicEnvironmentGV->
getType() == DynamicEnvironmentPtr
7817 ? DynamicEnvironmentGV
7819 DynamicEnvironmentPtr);
7822 ConfigurationEnvironment, {
7823 UseGenericStateMachineVal,
7824 MayUseNestedParallelismVal,
7831 ReductionBufferLength,
7834 KernelEnvironment, {
7835 ConfigurationEnvironmentInitializer,
7839 std::string KernelEnvironmentName =
7840 (KernelName +
"_kernel_environment").str();
7843 KernelEnvironmentInitializer, KernelEnvironmentName,
7845 DL.getDefaultGlobalsAddressSpace());
7849 KernelEnvironmentGV->
getType() == KernelEnvironmentPtr
7850 ? KernelEnvironmentGV
7852 KernelEnvironmentPtr);
7853 Value *KernelLaunchEnvironment =
7856 KernelLaunchEnvironment =
7857 KernelLaunchEnvironment->
getType() == KernelLaunchEnvParamTy
7858 ? KernelLaunchEnvironment
7859 :
Builder.CreateAddrSpaceCast(KernelLaunchEnvironment,
7860 KernelLaunchEnvParamTy);
7862 Fn, {KernelEnvironment, KernelLaunchEnvironment});
7874 auto *UI =
Builder.CreateUnreachable();
7880 Builder.SetInsertPoint(WorkerExitBB);
7884 Builder.SetInsertPoint(CheckBBTI);
7885 Builder.CreateCondBr(ExecUserCode, UI->getParent(), WorkerExitBB);
7887 CheckBBTI->eraseFromParent();
7888 UI->eraseFromParent();
7896 int32_t TeamsReductionDataSize,
7897 int32_t TeamsReductionBufferLength) {
7902 omp::RuntimeFunction::OMPRTL___kmpc_target_deinit);
7906 if (!TeamsReductionBufferLength || !TeamsReductionDataSize)
7912 const std::string DebugPrefix =
"_debug__";
7914 KernelName = KernelName.
drop_back(DebugPrefix.length());
7915 auto *KernelEnvironmentGV =
7916 M.getNamedGlobal((KernelName +
"_kernel_environment").str());
7917 assert(KernelEnvironmentGV &&
"Expected kernel environment global\n");
7918 auto *KernelEnvironmentInitializer = KernelEnvironmentGV->getInitializer();
7920 KernelEnvironmentInitializer,
7921 ConstantInt::get(Int32, TeamsReductionDataSize), {0, 7});
7923 NewInitializer, ConstantInt::get(Int32, TeamsReductionBufferLength),
7925 KernelEnvironmentGV->setInitializer(NewInitializer);
7930 if (
Kernel.hasFnAttribute(Name)) {
7931 int32_t OldLimit =
Kernel.getFnAttributeAsParsedInteger(Name);
7937std::pair<int32_t, int32_t>
7939 int32_t ThreadLimit =
7940 Kernel.getFnAttributeAsParsedInteger(
"omp_target_thread_limit");
7943 const auto &Attr =
Kernel.getFnAttribute(
"amdgpu-flat-work-group-size");
7944 if (!Attr.isValid() || !Attr.isStringAttribute())
7945 return {0, ThreadLimit};
7946 auto [LBStr, UBStr] = Attr.getValueAsString().split(
',');
7949 return {0, ThreadLimit};
7950 UB = ThreadLimit ? std::min(ThreadLimit, UB) : UB;
7958 return {0, ThreadLimit ? std::min(ThreadLimit, UB) : UB};
7960 return {0, ThreadLimit};
7966 Kernel.addFnAttr(
"omp_target_thread_limit", std::to_string(UB));
7969 Kernel.addFnAttr(
"amdgpu-flat-work-group-size",
7977std::pair<int32_t, int32_t>
7980 return {0,
Kernel.getFnAttributeAsParsedInteger(
"omp_target_num_teams")};
7984 int32_t LB, int32_t UB) {
7991 Kernel.addFnAttr(
"omp_target_num_teams", std::to_string(LB));
7994void OpenMPIRBuilder::setOutlinedTargetRegionFunctionAttributes(
8003 else if (
T.isNVPTX())
8005 else if (
T.isSPIRV())
8010Constant *OpenMPIRBuilder::createOutlinedFunctionID(Function *OutlinedFn,
8011 StringRef EntryFnIDName) {
8012 if (
Config.isTargetDevice()) {
8013 assert(OutlinedFn &&
"The outlined function must exist if embedded");
8017 return new GlobalVariable(
8022Constant *OpenMPIRBuilder::createTargetRegionEntryAddr(Function *OutlinedFn,
8023 StringRef EntryFnName) {
8027 assert(!
M.getGlobalVariable(EntryFnName,
true) &&
8028 "Named kernel already exists?");
8029 return new GlobalVariable(
8042 if (
Config.isTargetDevice() || !
Config.openMPOffloadMandatory()) {
8046 OutlinedFn = *CBResult;
8048 OutlinedFn =
nullptr;
8054 if (!IsOffloadEntry)
8057 std::string EntryFnIDName =
8059 ? std::string(EntryFnName)
8063 EntryFnName, EntryFnIDName);
8071 setOutlinedTargetRegionFunctionAttributes(OutlinedFn);
8072 auto OutlinedFnID = createOutlinedFunctionID(OutlinedFn, EntryFnIDName);
8073 auto EntryAddr = createTargetRegionEntryAddr(OutlinedFn, EntryFnName);
8075 EntryInfo, EntryAddr, OutlinedFnID,
8077 return OutlinedFnID;
8094 bool IsStandAlone = !BodyGenCB;
8101 MapInfo = &GenMapInfoCB(
Builder.saveIP());
8103 AllocaIP,
Builder.saveIP(), *MapInfo, Info, CustomMapperCB,
8104 true, DeviceAddrCB))
8111 Value *PointerNum =
Builder.getInt32(Info.NumberOfPtrs);
8121 SrcLocInfo, DeviceID,
8128 assert(MapperFunc &&
"MapperFunc missing for standalone target data");
8132 if (Info.HasNoWait) {
8142 if (Info.HasNoWait) {
8146 emitBlock(OffloadContBlock, CurFn,
true);
8152 bool RequiresOuterTargetTask = Info.HasNoWait;
8153 if (!RequiresOuterTargetTask)
8154 cantFail(TaskBodyCB(
nullptr,
nullptr,
8158 {}, RTArgs, Info.HasNoWait));
8161 omp::OMPRTL___tgt_target_data_begin_mapper);
8165 for (
auto DeviceMap : Info.DevicePtrInfoMap) {
8169 Builder.CreateStore(LI, DeviceMap.second.second);
8205 Value *PointerNum =
Builder.getInt32(Info.NumberOfPtrs);
8214 Value *OffloadingArgs[] = {SrcLocInfo, DeviceID,
8236 return emitIfClause(IfCond, BeginThenGen, BeginElseGen, AllocaIP);
8237 return BeginThenGen(AllocaIP,
Builder.saveIP());
8252 return emitIfClause(IfCond, EndThenGen, EndElseGen, AllocaIP);
8253 return EndThenGen(AllocaIP,
Builder.saveIP());
8256 return emitIfClause(IfCond, BeginThenGen, EndElseGen, AllocaIP);
8257 return BeginThenGen(AllocaIP,
Builder.saveIP());
8268 bool IsGPUDistribute) {
8269 assert((IVSize == 32 || IVSize == 64) &&
8270 "IV size is not compatible with the omp runtime");
8272 if (IsGPUDistribute)
8274 ? (IVSigned ? omp::OMPRTL___kmpc_distribute_static_init_4
8275 : omp::OMPRTL___kmpc_distribute_static_init_4u)
8276 : (IVSigned ? omp::OMPRTL___kmpc_distribute_static_init_8
8277 : omp::OMPRTL___kmpc_distribute_static_init_8u);
8279 Name = IVSize == 32 ? (IVSigned ? omp::OMPRTL___kmpc_for_static_init_4
8280 : omp::OMPRTL___kmpc_for_static_init_4u)
8281 : (IVSigned ? omp::OMPRTL___kmpc_for_static_init_8
8282 : omp::OMPRTL___kmpc_for_static_init_8u);
8289 assert((IVSize == 32 || IVSize == 64) &&
8290 "IV size is not compatible with the omp runtime");
8292 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_init_4
8293 : omp::OMPRTL___kmpc_dispatch_init_4u)
8294 : (IVSigned ? omp::OMPRTL___kmpc_dispatch_init_8
8295 : omp::OMPRTL___kmpc_dispatch_init_8u);
8302 assert((IVSize == 32 || IVSize == 64) &&
8303 "IV size is not compatible with the omp runtime");
8305 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_next_4
8306 : omp::OMPRTL___kmpc_dispatch_next_4u)
8307 : (IVSigned ? omp::OMPRTL___kmpc_dispatch_next_8
8308 : omp::OMPRTL___kmpc_dispatch_next_8u);
8315 assert((IVSize == 32 || IVSize == 64) &&
8316 "IV size is not compatible with the omp runtime");
8318 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_fini_4
8319 : omp::OMPRTL___kmpc_dispatch_fini_4u)
8320 : (IVSigned ? omp::OMPRTL___kmpc_dispatch_fini_8
8321 : omp::OMPRTL___kmpc_dispatch_fini_8u);
8332 DenseMap<
Value *, std::tuple<Value *, unsigned>> &ValueReplacementMap) {
8340 auto GetUpdatedDIVariable = [&](
DILocalVariable *OldVar,
unsigned arg) {
8344 if (NewVar && (arg == NewVar->
getArg()))
8354 auto UpdateDebugRecord = [&](
auto *DR) {
8357 for (
auto Loc : DR->location_ops()) {
8358 auto Iter = ValueReplacementMap.find(
Loc);
8359 if (Iter != ValueReplacementMap.end()) {
8360 DR->replaceVariableLocationOp(
Loc, std::get<0>(Iter->second));
8361 ArgNo = std::get<1>(Iter->second) + 1;
8365 DR->setVariable(GetUpdatedDIVariable(OldVar, ArgNo));
8372 "Unexpected debug intrinsic");
8374 UpdateDebugRecord(&DVR);
8379 Module *M = Func->getParent();
8382 DB.createQualifiedType(dwarf::DW_TAG_pointer_type,
nullptr);
8383 unsigned ArgNo = Func->arg_size();
8385 NewSP,
"dyn_ptr", ArgNo, NewSP->
getFile(), 0, VoidPtrTy,
8386 false, DINode::DIFlags::FlagArtificial);
8388 Argument *LastArg = Func->getArg(Func->arg_size() - 1);
8389 DB.insertDeclare(LastArg, Var, DB.createExpression(),
Loc,
8410 for (
auto &Arg : Inputs)
8411 ParameterTypes.
push_back(Arg->getType()->isPointerTy()
8415 for (
auto &Arg : Inputs)
8416 ParameterTypes.
push_back(Arg->getType());
8424 auto BB = Builder.GetInsertBlock();
8425 auto M = BB->getModule();
8436 if (TargetCpuAttr.isStringAttribute())
8437 Func->addFnAttr(TargetCpuAttr);
8439 auto TargetFeaturesAttr = ParentFn->
getFnAttribute(
"target-features");
8440 if (TargetFeaturesAttr.isStringAttribute())
8441 Func->addFnAttr(TargetFeaturesAttr);
8446 OMPBuilder.
emitUsed(
"llvm.compiler.used", {ExecMode});
8457 Builder.SetInsertPoint(EntryBB);
8463 BasicBlock *UserCodeEntryBB = Builder.GetInsertBlock();
8473 splitBB(Builder,
true,
"outlined.body");
8479 Builder.restoreIP(*AfterIP);
8484 Builder.CreateRetVoid();
8488 auto AllocaIP = Builder.saveIP();
8493 const auto &ArgRange =
make_range(Func->arg_begin(), Func->arg_end() - 1);
8525 if (Instr->getFunction() == Func)
8526 Instr->replaceUsesOfWith(
Input, InputCopy);
8532 for (
auto InArg :
zip(Inputs, ArgRange)) {
8534 Argument &Arg = std::get<1>(InArg);
8535 Value *InputCopy =
nullptr;
8538 ArgAccessorFuncCB(Arg,
Input, InputCopy, AllocaIP, Builder.saveIP());
8541 Builder.restoreIP(*AfterIP);
8542 ValueReplacementMap[
Input] = std::make_tuple(InputCopy, Arg.
getArgNo());
8562 DeferredReplacement.push_back(std::make_pair(
Input, InputCopy));
8569 ReplaceValue(
Input, InputCopy, Func);
8573 for (
auto Deferred : DeferredReplacement)
8574 ReplaceValue(std::get<0>(Deferred), std::get<1>(Deferred), Func);
8577 ValueReplacementMap);
8585 Value *TaskWithPrivates,
8586 Type *TaskWithPrivatesTy) {
8588 Type *TaskTy = OMPIRBuilder.Task;
8591 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskWithPrivates, 0);
8592 Value *Shareds = TaskT;
8602 if (TaskWithPrivatesTy != TaskTy)
8603 Shareds = Builder.CreateStructGEP(TaskTy, TaskT, 0);
8620 const size_t NumOffloadingArrays,
const int SharedArgsOperandNo) {
8625 assert((!NumOffloadingArrays || PrivatesTy) &&
8626 "PrivatesTy cannot be nullptr when there are offloadingArrays"
8659 Type *TaskPtrTy = OMPBuilder.TaskPtr;
8660 [[maybe_unused]]
Type *TaskTy = OMPBuilder.Task;
8666 ".omp_target_task_proxy_func",
8667 Builder.GetInsertBlock()->getModule());
8668 Value *ThreadId = ProxyFn->getArg(0);
8669 Value *TaskWithPrivates = ProxyFn->getArg(1);
8670 ThreadId->
setName(
"thread.id");
8671 TaskWithPrivates->
setName(
"task");
8673 bool HasShareds = SharedArgsOperandNo > 0;
8674 bool HasOffloadingArrays = NumOffloadingArrays > 0;
8677 Builder.SetInsertPoint(EntryBB);
8683 if (HasOffloadingArrays) {
8684 assert(TaskTy != TaskWithPrivatesTy &&
8685 "If there are offloading arrays to pass to the target"
8686 "TaskTy cannot be the same as TaskWithPrivatesTy");
8689 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskWithPrivates, 1);
8690 for (
unsigned int i = 0; i < NumOffloadingArrays; ++i)
8692 Builder.CreateStructGEP(PrivatesTy, Privates, i));
8696 auto *ArgStructAlloca =
8698 assert(ArgStructAlloca &&
8699 "Unable to find the alloca instruction corresponding to arguments "
8700 "for extracted function");
8702 std::optional<TypeSize> ArgAllocSize =
8704 assert(ArgStructType && ArgAllocSize &&
8705 "Unable to determine size of arguments for extracted function");
8706 uint64_t StructSize = ArgAllocSize->getFixedValue();
8709 Builder.CreateAlloca(ArgStructType,
nullptr,
"structArg");
8711 Value *SharedsSize = Builder.getInt64(StructSize);
8714 OMPBuilder, Builder, TaskWithPrivates, TaskWithPrivatesTy);
8716 Builder.CreateMemCpy(
8717 NewArgStructAlloca, NewArgStructAlloca->
getAlign(), LoadShared,
8719 KernelLaunchArgs.
push_back(NewArgStructAlloca);
8722 Builder.CreateRetVoid();
8728 return GEP->getSourceElementType();
8730 return Alloca->getAllocatedType();
8753 if (OffloadingArraysToPrivatize.
empty())
8754 return OMPIRBuilder.Task;
8757 for (
Value *V : OffloadingArraysToPrivatize) {
8758 assert(V->getType()->isPointerTy() &&
8759 "Expected pointer to array to privatize. Got a non-pointer value "
8762 assert(ArrayTy &&
"ArrayType cannot be nullptr");
8768 "struct.task_with_privates");
8782 EntryFnName, Inputs, CBFunc,
8787 EntryInfo, GenerateOutlinedFunction, IsOffloadEntry, OutlinedFn,
8924 TargetTaskAllocaBB->
begin());
8928 OI.
EntryBB = TargetTaskAllocaBB;
8934 Builder, AllocaIP, ToBeDeleted, TargetTaskAllocaIP,
"global.tid",
false));
8937 Builder.restoreIP(TargetTaskBodyIP);
8938 if (
Error Err = TaskBodyCB(DeviceID, RTLoc, TargetTaskAllocaIP))
8956 bool NeedsTargetTask = HasNoWait && DeviceID;
8957 if (NeedsTargetTask) {
8963 OffloadingArraysToPrivatize.
push_back(V);
8968 OI.
PostOutlineCB = [
this, ToBeDeleted, Dependencies, NeedsTargetTask,
8969 DeviceID, OffloadingArraysToPrivatize](
8972 "there must be a single user for the outlined function");
8986 const unsigned int NumStaleCIArgs = StaleCI->
arg_size();
8987 bool HasShareds = NumStaleCIArgs > OffloadingArraysToPrivatize.
size() + 1;
8989 NumStaleCIArgs == (OffloadingArraysToPrivatize.
size() + 2)) &&
8990 "Wrong number of arguments for StaleCI when shareds are present");
8991 int SharedArgOperandNo =
8992 HasShareds ? OffloadingArraysToPrivatize.
size() + 1 : 0;
8998 if (!OffloadingArraysToPrivatize.
empty())
9003 *
this,
Builder, StaleCI, PrivatesTy, TaskWithPrivatesTy,
9004 OffloadingArraysToPrivatize.
size(), SharedArgOperandNo);
9006 LLVM_DEBUG(
dbgs() <<
"Proxy task entry function created: " << *ProxyFn
9009 Builder.SetInsertPoint(StaleCI);
9026 OMPRTL___kmpc_omp_target_task_alloc);
9038 M.getDataLayout().getTypeStoreSize(TaskWithPrivatesTy));
9045 auto *ArgStructAlloca =
9047 assert(ArgStructAlloca &&
9048 "Unable to find the alloca instruction corresponding to arguments "
9049 "for extracted function");
9050 std::optional<TypeSize> ArgAllocSize =
9053 "Unable to determine size of arguments for extracted function");
9054 SharedsSize =
Builder.getInt64(ArgAllocSize->getFixedValue());
9073 TaskSize, SharedsSize,
9076 if (NeedsTargetTask) {
9077 assert(DeviceID &&
"Expected non-empty device ID.");
9087 *
this,
Builder, TaskData, TaskWithPrivatesTy);
9088 Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment,
9091 if (!OffloadingArraysToPrivatize.
empty()) {
9093 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskData, 1);
9094 for (
unsigned int i = 0; i < OffloadingArraysToPrivatize.
size(); ++i) {
9095 Value *PtrToPrivatize = OffloadingArraysToPrivatize[i];
9102 "ElementType should match ArrayType");
9105 Value *Dst =
Builder.CreateStructGEP(PrivatesTy, Privates, i);
9107 Dst, Alignment, PtrToPrivatize, Alignment,
9108 Builder.getInt64(
M.getDataLayout().getTypeStoreSize(ElementType)));
9122 if (!NeedsTargetTask) {
9131 ConstantInt::get(
Builder.getInt32Ty(), 0),
9144 }
else if (DepArray) {
9152 {Ident, ThreadID, TaskData,
Builder.getInt32(Dependencies.
size()),
9153 DepArray, ConstantInt::get(
Builder.getInt32Ty(), 0),
9163 I->eraseFromParent();
9168 << *(
Builder.GetInsertBlock()) <<
"\n");
9170 << *(
Builder.GetInsertBlock()->getParent()->getParent())
9182 CustomMapperCB, IsNonContiguous, DeviceAddrCB))
9199 bool HasNoWait,
Value *DynCGroupMem,
9206 Builder.restoreIP(IP);
9212 return Builder.saveIP();
9215 bool HasDependencies = Dependencies.
size() > 0;
9216 bool RequiresOuterTargetTask = HasNoWait || HasDependencies;
9233 if (OutlinedFnID && DeviceID)
9235 EmitTargetCallFallbackCB, KArgs,
9236 DeviceID, RTLoc, TargetTaskAllocaIP);
9244 return EmitTargetCallFallbackCB(OMPBuilder.
Builder.
saveIP());
9251 auto &&EmitTargetCallElse =
9257 if (RequiresOuterTargetTask) {
9264 Dependencies, EmptyRTArgs, HasNoWait);
9266 return EmitTargetCallFallbackCB(Builder.saveIP());
9269 Builder.restoreIP(AfterIP);
9273 auto &&EmitTargetCallThen =
9276 Info.HasNoWait = HasNoWait;
9281 AllocaIP, Builder.saveIP(), Info, RTArgs, MapInfo, CustomMapperCB,
9287 for (
auto [DefaultVal, RuntimeVal] :
9289 NumTeamsC.
push_back(RuntimeVal ? RuntimeVal
9290 : Builder.getInt32(DefaultVal));
9294 auto InitMaxThreadsClause = [&Builder](
Value *
Clause) {
9296 Clause = Builder.CreateIntCast(
Clause, Builder.getInt32Ty(),
9300 auto CombineMaxThreadsClauses = [&Builder](
Value *
Clause,
Value *&Result) {
9303 Result ? Builder.CreateSelect(Builder.CreateICmpULT(Result,
Clause),
9311 Value *MaxThreadsClause =
9313 ? InitMaxThreadsClause(RuntimeAttrs.
MaxThreads)
9316 for (
auto [TeamsVal, TargetVal] :
zip_equal(
9318 Value *TeamsThreadLimitClause = InitMaxThreadsClause(TeamsVal);
9319 Value *NumThreads = InitMaxThreadsClause(TargetVal);
9321 CombineMaxThreadsClauses(TeamsThreadLimitClause, NumThreads);
9322 CombineMaxThreadsClauses(MaxThreadsClause, NumThreads);
9324 NumThreadsC.
push_back(NumThreads ? NumThreads : Builder.getInt32(0));
9327 unsigned NumTargetItems = Info.NumberOfPtrs;
9335 Builder.getInt64Ty(),
9337 : Builder.getInt64(0);
9341 DynCGroupMem = Builder.getInt32(0);
9344 NumTargetItems, RTArgs, TripCount, NumTeamsC, NumThreadsC, DynCGroupMem,
9345 HasNoWait, DynCGroupMemFallback);
9352 if (RequiresOuterTargetTask)
9354 RTLoc, AllocaIP, Dependencies,
9355 KArgs.
RTArgs, Info.HasNoWait);
9358 Builder, OutlinedFnID, EmitTargetCallFallbackCB, KArgs,
9359 RuntimeAttrs.
DeviceID, RTLoc, AllocaIP);
9362 Builder.restoreIP(AfterIP);
9369 if (!OutlinedFnID) {
9370 cantFail(EmitTargetCallElse(AllocaIP, Builder.saveIP()));
9376 cantFail(EmitTargetCallThen(AllocaIP, Builder.saveIP()));
9381 EmitTargetCallElse, AllocaIP));
9408 *
this,
Builder, IsOffloadEntry, EntryInfo, DefaultAttrs, OutlinedFn,
9409 OutlinedFnID, Inputs, CBFunc, ArgAccessorFuncCB))
9415 if (!
Config.isTargetDevice())
9417 IfCond, OutlinedFn, OutlinedFnID, Inputs, GenMapInfoCB,
9418 CustomMapperCB, Dependencies, HasNowait, DynCGroupMem,
9419 DynCGroupMemFallback);
9433 return OS.
str().str();
9438 return OpenMPIRBuilder::getNameWithSeparators(Parts,
Config.firstSeparator(),
9444 auto &Elem = *
InternalVars.try_emplace(Name,
nullptr).first;
9446 assert(Elem.second->getValueType() == Ty &&
9447 "OMP internal variable has different type than requested");
9460 :
M.getTargetTriple().isAMDGPU()
9462 :
DL.getDefaultGlobalsAddressSpace();
9471 const llvm::Align PtrAlign =
DL.getPointerABIAlignment(AddressSpaceVal);
9472 GV->setAlignment(std::max(TypeAlign, PtrAlign));
9479Value *OpenMPIRBuilder::getOMPCriticalRegionLock(
StringRef CriticalName) {
9480 std::string Prefix =
Twine(
"gomp_critical_user_", CriticalName).
str();
9481 std::string Name = getNameWithSeparators({Prefix,
"var"},
".",
".");
9492 return SizePtrToInt;
9497 std::string VarName) {
9505 return MaptypesArrayGlobal;
9510 unsigned NumOperands,
9519 ArrI8PtrTy,
nullptr,
".offload_baseptrs");
9523 ArrI64Ty,
nullptr,
".offload_sizes");
9534 int64_t DeviceID,
unsigned NumOperands) {
9540 Value *ArgsBaseGEP =
9542 {Builder.getInt32(0), Builder.getInt32(0)});
9545 {Builder.getInt32(0), Builder.getInt32(0)});
9546 Value *ArgSizesGEP =
9548 {Builder.getInt32(0), Builder.getInt32(0)});
9552 Builder.getInt32(NumOperands),
9553 ArgsBaseGEP, ArgsGEP, ArgSizesGEP,
9554 MaptypesArg, MapnamesArg, NullPtr});
9561 assert((!ForEndCall || Info.separateBeginEndCalls()) &&
9562 "expected region end call to runtime only when end call is separate");
9564 auto VoidPtrTy = UnqualPtrTy;
9565 auto VoidPtrPtrTy = UnqualPtrTy;
9567 auto Int64PtrTy = UnqualPtrTy;
9569 if (!Info.NumberOfPtrs) {
9581 Info.RTArgs.BasePointersArray,
9584 ArrayType::get(VoidPtrTy, Info.NumberOfPtrs), Info.RTArgs.PointersArray,
9588 ArrayType::get(Int64Ty, Info.NumberOfPtrs), Info.RTArgs.SizesArray,
9592 ForEndCall && Info.RTArgs.MapTypesArrayEnd ? Info.RTArgs.MapTypesArrayEnd
9593 : Info.RTArgs.MapTypesArray,
9599 if (!Info.EmitDebug)
9603 ArrayType::get(VoidPtrTy, Info.NumberOfPtrs), Info.RTArgs.MapNamesArray,
9608 if (!Info.HasMapper)
9612 Builder.CreatePointerCast(Info.RTArgs.MappersArray, VoidPtrPtrTy);
9633 "struct.descriptor_dim");
9635 enum { OffsetFD = 0, CountFD, StrideFD };
9639 for (
unsigned I = 0, L = 0, E = NonContigInfo.
Dims.
size();
I < E; ++
I) {
9642 if (NonContigInfo.
Dims[
I] == 1)
9647 Builder.CreateAlloca(ArrayTy,
nullptr,
"dims");
9649 for (
unsigned II = 0, EE = NonContigInfo.
Dims[
I];
II < EE; ++
II) {
9650 unsigned RevIdx = EE -
II - 1;
9654 Value *OffsetLVal =
Builder.CreateStructGEP(DimTy, DimsLVal, OffsetFD);
9656 NonContigInfo.
Offsets[L][RevIdx], OffsetLVal,
9657 M.getDataLayout().getPrefTypeAlign(OffsetLVal->
getType()));
9659 Value *CountLVal =
Builder.CreateStructGEP(DimTy, DimsLVal, CountFD);
9661 NonContigInfo.
Counts[L][RevIdx], CountLVal,
9662 M.getDataLayout().getPrefTypeAlign(CountLVal->
getType()));
9664 Value *StrideLVal =
Builder.CreateStructGEP(DimTy, DimsLVal, StrideFD);
9666 NonContigInfo.
Strides[L][RevIdx], StrideLVal,
9667 M.getDataLayout().getPrefTypeAlign(CountLVal->
getType()));
9671 Value *DAddr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
9672 DimsAddr,
Builder.getPtrTy());
9675 Info.RTArgs.PointersArray, 0,
I);
9677 DAddr,
P,
M.getDataLayout().getPrefTypeAlign(
Builder.getPtrTy()));
9682void OpenMPIRBuilder::emitUDMapperArrayInitOrDel(
9686 StringRef Prefix = IsInit ?
".init" :
".del";
9692 Builder.CreateICmpSGT(
Size, Builder.getInt64(1),
"omp.arrayinit.isarray");
9693 Value *DeleteBit = Builder.CreateAnd(
9696 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9697 OpenMPOffloadMappingFlags::OMP_MAP_DELETE)));
9702 Value *BaseIsBegin = Builder.CreateICmpNE(
Base, Begin);
9703 Cond = Builder.CreateOr(IsArray, BaseIsBegin);
9704 DeleteCond = Builder.CreateIsNull(
9709 DeleteCond =
Builder.CreateIsNotNull(
9725 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9726 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9727 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9728 MapTypeArg =
Builder.CreateOr(
9731 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9732 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT)));
9736 Value *OffloadingArgs[] = {MapperHandle,
Base, Begin,
9737 ArraySize, MapTypeArg, MapName};
9763 MapperFn->
addFnAttr(Attribute::NoInline);
9764 MapperFn->
addFnAttr(Attribute::NoUnwind);
9774 auto SavedIP =
Builder.saveIP();
9775 Builder.SetInsertPoint(EntryBB);
9787 TypeSize ElementSize =
M.getDataLayout().getTypeStoreSize(ElemTy);
9789 Value *PtrBegin = BeginIn;
9795 emitUDMapperArrayInitOrDel(MapperFn, MapperHandle, BaseIn, BeginIn,
Size,
9796 MapType, MapName, ElementSize, HeadBB,
9807 Builder.CreateICmpEQ(PtrBegin, PtrEnd,
"omp.arraymap.isempty");
9808 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9814 Builder.CreatePHI(PtrBegin->
getType(), 2,
"omp.arraymap.ptrcurrent");
9815 PtrPHI->addIncoming(PtrBegin, HeadBB);
9820 return Info.takeError();
9824 Value *OffloadingArgs[] = {MapperHandle};
9828 Value *ShiftedPreviousSize =
9832 for (
unsigned I = 0;
I < Info->BasePointers.size(); ++
I) {
9833 Value *CurBaseArg = Info->BasePointers[
I];
9834 Value *CurBeginArg = Info->Pointers[
I];
9835 Value *CurSizeArg = Info->Sizes[
I];
9836 Value *CurNameArg = Info->Names.size()
9842 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9844 Value *MemberMapType =
9845 Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9862 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9863 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9864 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9874 Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9880 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9881 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9882 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9888 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9889 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
9890 Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9896 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9897 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9903 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9904 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9905 Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9911 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9912 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
9923 Value *OffloadingArgs[] = {MapperHandle, CurBaseArg, CurBeginArg,
9924 CurSizeArg, CurMapType, CurNameArg};
9926 auto ChildMapperFn = CustomMapperCB(
I);
9928 return ChildMapperFn.takeError();
9929 if (*ChildMapperFn) {
9944 Value *PtrNext =
Builder.CreateConstGEP1_32(ElemTy, PtrPHI, 1,
9945 "omp.arraymap.next");
9946 PtrPHI->addIncoming(PtrNext, LastBB);
9947 Value *IsDone =
Builder.CreateICmpEQ(PtrNext, PtrEnd,
"omp.arraymap.isdone");
9949 Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9954 emitUDMapperArrayInitOrDel(MapperFn, MapperHandle, BaseIn, BeginIn,
Size,
9955 MapType, MapName, ElementSize, DoneBB,
9969 bool IsNonContiguous,
9973 Info.clearArrayInfo();
9976 if (Info.NumberOfPtrs == 0)
9985 Info.RTArgs.BasePointersArray =
Builder.CreateAlloca(
9986 PointerArrayType,
nullptr,
".offload_baseptrs");
9988 Info.RTArgs.PointersArray =
Builder.CreateAlloca(
9989 PointerArrayType,
nullptr,
".offload_ptrs");
9991 PointerArrayType,
nullptr,
".offload_mappers");
9992 Info.RTArgs.MappersArray = MappersArray;
9999 ConstantInt::get(Int64Ty, 0));
10001 for (
unsigned I = 0, E = CombinedInfo.
Sizes.
size();
I < E; ++
I) {
10002 bool IsNonContigEntry =
10004 (
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10006 OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG) != 0);
10009 if (IsNonContigEntry) {
10011 "Index must be in-bounds for NON_CONTIG Dims array");
10013 assert(DimCount > 0 &&
"NON_CONTIG DimCount must be > 0");
10014 ConstSizes[
I] = ConstantInt::get(Int64Ty, DimCount);
10019 ConstSizes[
I] = CI;
10023 RuntimeSizes.
set(
I);
10026 if (RuntimeSizes.
all()) {
10028 Info.RTArgs.SizesArray =
Builder.CreateAlloca(
10029 SizeArrayType,
nullptr,
".offload_sizes");
10035 auto *SizesArrayGbl =
10040 if (!RuntimeSizes.
any()) {
10041 Info.RTArgs.SizesArray = SizesArrayGbl;
10043 unsigned IndexSize =
M.getDataLayout().getIndexSizeInBits(0);
10044 Align OffloadSizeAlign =
M.getDataLayout().getABIIntegerTypeAlignment(64);
10047 SizeArrayType,
nullptr,
".offload_sizes");
10051 Buffer,
M.getDataLayout().getPrefTypeAlign(Buffer->
getType()),
10052 SizesArrayGbl, OffloadSizeAlign,
10057 Info.RTArgs.SizesArray = Buffer;
10065 for (
auto mapFlag : CombinedInfo.
Types)
10067 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10071 Info.RTArgs.MapTypesArray = MapTypesArrayGbl;
10077 Info.RTArgs.MapNamesArray = MapNamesArrayGbl;
10078 Info.EmitDebug =
true;
10080 Info.RTArgs.MapNamesArray =
10082 Info.EmitDebug =
false;
10087 if (Info.separateBeginEndCalls()) {
10088 bool EndMapTypesDiffer =
false;
10090 if (
Type &
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10091 OpenMPOffloadMappingFlags::OMP_MAP_PRESENT)) {
10092 Type &= ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
10093 OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
10094 EndMapTypesDiffer =
true;
10097 if (EndMapTypesDiffer) {
10099 Info.RTArgs.MapTypesArrayEnd = MapTypesArrayGbl;
10104 for (
unsigned I = 0;
I < Info.NumberOfPtrs; ++
I) {
10107 ArrayType::get(PtrTy, Info.NumberOfPtrs), Info.RTArgs.BasePointersArray,
10109 Builder.CreateAlignedStore(BPVal, BP,
10110 M.getDataLayout().getPrefTypeAlign(PtrTy));
10112 if (Info.requiresDevicePointerInfo()) {
10114 CodeGenIP =
Builder.saveIP();
10116 Info.DevicePtrInfoMap[BPVal] = {BP,
Builder.CreateAlloca(PtrTy)};
10117 Builder.restoreIP(CodeGenIP);
10119 DeviceAddrCB(
I, Info.DevicePtrInfoMap[BPVal].second);
10121 Info.DevicePtrInfoMap[BPVal] = {BP, BP};
10123 DeviceAddrCB(
I, BP);
10129 ArrayType::get(PtrTy, Info.NumberOfPtrs), Info.RTArgs.PointersArray, 0,
10132 Builder.CreateAlignedStore(PVal,
P,
10133 M.getDataLayout().getPrefTypeAlign(PtrTy));
10135 if (RuntimeSizes.
test(
I)) {
10137 ArrayType::get(Int64Ty, Info.NumberOfPtrs), Info.RTArgs.SizesArray,
10143 S,
M.getDataLayout().getPrefTypeAlign(PtrTy));
10146 unsigned IndexSize =
M.getDataLayout().getIndexSizeInBits(0);
10149 auto CustomMFunc = CustomMapperCB(
I);
10151 return CustomMFunc.takeError();
10153 MFunc =
Builder.CreatePointerCast(*CustomMFunc, PtrTy);
10156 PointerArrayType, MappersArray,
10159 MFunc, MAddr,
M.getDataLayout().getPrefTypeAlign(MAddr->
getType()));
10163 Info.NumberOfPtrs == 0)
10180 Builder.ClearInsertionPoint();
10210 auto CondConstant = CI->getSExtValue();
10212 return ThenGen(AllocaIP,
Builder.saveIP());
10214 return ElseGen(AllocaIP,
Builder.saveIP());
10224 Builder.CreateCondBr(
Cond, ThenBlock, ElseBlock);
10242bool OpenMPIRBuilder::checkAndEmitFlushAfterAtomic(
10246 "Unexpected Atomic Ordering.");
10248 bool Flush =
false;
10310 assert(
X.Var->getType()->isPointerTy() &&
10311 "OMP Atomic expects a pointer to target memory");
10312 Type *XElemTy =
X.ElemTy;
10315 "OMP atomic read expected a scalar type");
10317 Value *XRead =
nullptr;
10321 Builder.CreateLoad(XElemTy,
X.Var,
X.IsVolatile,
"omp.atomic.read");
10330 unsigned LoadSize =
DL.getTypeStoreSize(XElemTy);
10333 OldVal->
getAlign(),
true , AllocaIP,
X.Var);
10335 XRead = AtomicLoadRes.first;
10342 Builder.CreateLoad(IntCastTy,
X.Var,
X.IsVolatile,
"omp.atomic.load");
10345 XRead =
Builder.CreateBitCast(XLoad, XElemTy,
"atomic.flt.cast");
10347 XRead =
Builder.CreateIntToPtr(XLoad, XElemTy,
"atomic.ptr.cast");
10350 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Read);
10351 Builder.CreateStore(XRead, V.Var, V.IsVolatile);
10362 assert(
X.Var->getType()->isPointerTy() &&
10363 "OMP Atomic expects a pointer to target memory");
10364 Type *XElemTy =
X.ElemTy;
10367 "OMP atomic write expected a scalar type");
10375 unsigned LoadSize =
DL.getTypeStoreSize(XElemTy);
10378 OldVal->
getAlign(),
true , AllocaIP,
X.Var);
10386 Builder.CreateBitCast(Expr, IntCastTy,
"atomic.src.int.cast");
10391 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Write);
10398 AtomicUpdateCallbackTy &UpdateOp,
bool IsXBinopExpr,
10399 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
10405 Type *XTy =
X.Var->getType();
10407 "OMP Atomic expects a pointer to target memory");
10408 Type *XElemTy =
X.ElemTy;
10411 "OMP atomic update expected a scalar type");
10414 "OpenMP atomic does not support LT or GT operations");
10418 AllocaIP,
X.Var,
X.ElemTy, Expr, AO, RMWOp, UpdateOp,
X.IsVolatile,
10419 IsXBinopExpr, IsIgnoreDenormalMode, IsFineGrainedMemory, IsRemoteMemory);
10421 return AtomicResult.takeError();
10422 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Update);
10427Value *OpenMPIRBuilder::emitRMWOpAsInstruction(
Value *Src1,
Value *Src2,
10431 return Builder.CreateAdd(Src1, Src2);
10433 return Builder.CreateSub(Src1, Src2);
10435 return Builder.CreateAnd(Src1, Src2);
10437 return Builder.CreateNeg(Builder.CreateAnd(Src1, Src2));
10439 return Builder.CreateOr(Src1, Src2);
10441 return Builder.CreateXor(Src1, Src2);
10465Expected<std::pair<Value *, Value *>> OpenMPIRBuilder::emitAtomicUpdate(
10468 AtomicUpdateCallbackTy &UpdateOp,
bool VolatileX,
bool IsXBinopExpr,
10469 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
10472 bool emitRMWOp =
false;
10480 emitRMWOp = XElemTy;
10483 emitRMWOp = (IsXBinopExpr && XElemTy);
10490 std::pair<Value *, Value *> Res;
10492 AtomicRMWInst *RMWInst =
10493 Builder.CreateAtomicRMW(RMWOp,
X, Expr, llvm::MaybeAlign(), AO);
10494 if (
T.isAMDGPU()) {
10495 if (IsIgnoreDenormalMode)
10496 RMWInst->
setMetadata(
"amdgpu.ignore.denormal.mode",
10498 if (!IsFineGrainedMemory)
10499 RMWInst->
setMetadata(
"amdgpu.no.fine.grained.memory",
10501 if (!IsRemoteMemory)
10505 Res.first = RMWInst;
10510 Res.second = Res.first;
10512 Res.second = emitRMWOpAsInstruction(Res.first, Expr, RMWOp);
10516 Builder.CreateLoad(XElemTy,
X,
X->getName() +
".atomic.load");
10519 unsigned LoadSize =
10522 OpenMPIRBuilder::AtomicInfo atomicInfo(
10524 OldVal->
getAlign(),
true , AllocaIP,
X);
10525 auto AtomicLoadRes = atomicInfo.EmitAtomicLoadLibcall(AO);
10528 CurBBTI = CurBBTI ? CurBBTI :
Builder.CreateUnreachable();
10535 AllocaInst *NewAtomicAddr =
Builder.CreateAlloca(XElemTy);
10536 NewAtomicAddr->
setName(
X->getName() +
"x.new.val");
10537 Builder.SetInsertPoint(ContBB);
10539 PHI->addIncoming(AtomicLoadRes.first, CurBB);
10541 Expected<Value *> CBResult = UpdateOp(OldExprVal,
Builder);
10544 Value *Upd = *CBResult;
10545 Builder.CreateStore(Upd, NewAtomicAddr);
10548 auto Result = atomicInfo.EmitAtomicCompareExchangeLibcall(
10549 AtomicLoadRes.second, NewAtomicAddr, AO, Failure);
10550 LoadInst *PHILoad =
Builder.CreateLoad(XElemTy,
Result.first);
10551 PHI->addIncoming(PHILoad,
Builder.GetInsertBlock());
10554 Res.first = OldExprVal;
10557 if (UnreachableInst *ExitTI =
10560 Builder.SetInsertPoint(ExitBB);
10562 Builder.SetInsertPoint(ExitTI);
10565 IntegerType *IntCastTy =
10568 Builder.CreateLoad(IntCastTy,
X,
X->getName() +
".atomic.load");
10577 CurBBTI = CurBBTI ? CurBBTI :
Builder.CreateUnreachable();
10584 AllocaInst *NewAtomicAddr =
Builder.CreateAlloca(XElemTy);
10585 NewAtomicAddr->
setName(
X->getName() +
"x.new.val");
10586 Builder.SetInsertPoint(ContBB);
10588 PHI->addIncoming(OldVal, CurBB);
10593 OldExprVal =
Builder.CreateBitCast(
PHI, XElemTy,
10594 X->getName() +
".atomic.fltCast");
10596 OldExprVal =
Builder.CreateIntToPtr(
PHI, XElemTy,
10597 X->getName() +
".atomic.ptrCast");
10601 Expected<Value *> CBResult = UpdateOp(OldExprVal,
Builder);
10604 Value *Upd = *CBResult;
10605 Builder.CreateStore(Upd, NewAtomicAddr);
10606 LoadInst *DesiredVal =
Builder.CreateLoad(IntCastTy, NewAtomicAddr);
10610 X,
PHI, DesiredVal, llvm::MaybeAlign(), AO, Failure);
10611 Result->setVolatile(VolatileX);
10612 Value *PreviousVal =
Builder.CreateExtractValue(Result, 0);
10613 Value *SuccessFailureVal =
Builder.CreateExtractValue(Result, 1);
10614 PHI->addIncoming(PreviousVal,
Builder.GetInsertBlock());
10615 Builder.CreateCondBr(SuccessFailureVal, ExitBB, ContBB);
10617 Res.first = OldExprVal;
10621 if (UnreachableInst *ExitTI =
10624 Builder.SetInsertPoint(ExitBB);
10626 Builder.SetInsertPoint(ExitTI);
10637 bool UpdateExpr,
bool IsPostfixUpdate,
bool IsXBinopExpr,
10638 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
10643 Type *XTy =
X.Var->getType();
10645 "OMP Atomic expects a pointer to target memory");
10646 Type *XElemTy =
X.ElemTy;
10649 "OMP atomic capture expected a scalar type");
10651 "OpenMP atomic does not support LT or GT operations");
10658 AllocaIP,
X.Var,
X.ElemTy, Expr, AO, AtomicOp, UpdateOp,
X.IsVolatile,
10659 IsXBinopExpr, IsIgnoreDenormalMode, IsFineGrainedMemory, IsRemoteMemory);
10662 Value *CapturedVal =
10663 (IsPostfixUpdate ? AtomicResult->first : AtomicResult->second);
10664 Builder.CreateStore(CapturedVal, V.Var, V.IsVolatile);
10666 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Capture);
10678 IsPostfixUpdate, IsFailOnly, Failure);
10690 assert(
X.Var->getType()->isPointerTy() &&
10691 "OMP atomic expects a pointer to target memory");
10694 assert(V.Var->getType()->isPointerTy() &&
"v.var must be of pointer type");
10695 assert(V.ElemTy ==
X.ElemTy &&
"x and v must be of same type");
10698 bool IsInteger = E->getType()->isIntegerTy();
10700 if (
Op == OMPAtomicCompareOp::EQ) {
10715 Value *OldValue =
Builder.CreateExtractValue(Result, 0);
10717 OldValue =
Builder.CreateBitCast(OldValue,
X.ElemTy);
10719 "OldValue and V must be of same type");
10720 if (IsPostfixUpdate) {
10721 Builder.CreateStore(OldValue, V.Var, V.IsVolatile);
10723 Value *SuccessOrFail =
Builder.CreateExtractValue(Result, 1);
10736 CurBBTI = CurBBTI ? CurBBTI :
Builder.CreateUnreachable();
10738 CurBBTI,
X.Var->getName() +
".atomic.exit");
10744 Builder.CreateCondBr(SuccessOrFail, ExitBB, ContBB);
10746 Builder.SetInsertPoint(ContBB);
10747 Builder.CreateStore(OldValue, V.Var);
10753 Builder.SetInsertPoint(ExitBB);
10755 Builder.SetInsertPoint(ExitTI);
10758 Value *CapturedValue =
10759 Builder.CreateSelect(SuccessOrFail, E, OldValue);
10760 Builder.CreateStore(CapturedValue, V.Var, V.IsVolatile);
10766 assert(R.Var->getType()->isPointerTy() &&
10767 "r.var must be of pointer type");
10768 assert(R.ElemTy->isIntegerTy() &&
"r must be of integral type");
10770 Value *SuccessFailureVal =
Builder.CreateExtractValue(Result, 1);
10771 Value *ResultCast = R.IsSigned
10772 ?
Builder.CreateSExt(SuccessFailureVal, R.ElemTy)
10773 :
Builder.CreateZExt(SuccessFailureVal, R.ElemTy);
10774 Builder.CreateStore(ResultCast, R.Var, R.IsVolatile);
10777 assert((
Op == OMPAtomicCompareOp::MAX ||
Op == OMPAtomicCompareOp::MIN) &&
10778 "Op should be either max or min at this point");
10779 assert(!IsFailOnly &&
"IsFailOnly is only valid when the comparison is ==");
10790 if (IsXBinopExpr) {
10819 Value *CapturedValue =
nullptr;
10820 if (IsPostfixUpdate) {
10821 CapturedValue = OldValue;
10846 Value *NonAtomicCmp =
Builder.CreateCmp(Pred, OldValue, E);
10847 CapturedValue =
Builder.CreateSelect(NonAtomicCmp, E, OldValue);
10849 Builder.CreateStore(CapturedValue, V.Var, V.IsVolatile);
10853 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Compare);
10873 if (&OuterAllocaBB ==
Builder.GetInsertBlock()) {
10900 bool SubClausesPresent =
10901 (NumTeamsLower || NumTeamsUpper || ThreadLimit || IfExpr);
10903 if (!
Config.isTargetDevice() && SubClausesPresent) {
10904 assert((NumTeamsLower ==
nullptr || NumTeamsUpper !=
nullptr) &&
10905 "if lowerbound is non-null, then upperbound must also be non-null "
10906 "for bounds on num_teams");
10908 if (NumTeamsUpper ==
nullptr)
10909 NumTeamsUpper =
Builder.getInt32(0);
10911 if (NumTeamsLower ==
nullptr)
10912 NumTeamsLower = NumTeamsUpper;
10916 "argument to if clause must be an integer value");
10920 IfExpr =
Builder.CreateICmpNE(IfExpr,
10921 ConstantInt::get(IfExpr->
getType(), 0));
10922 NumTeamsUpper =
Builder.CreateSelect(
10923 IfExpr, NumTeamsUpper,
Builder.getInt32(1),
"numTeamsUpper");
10926 NumTeamsLower =
Builder.CreateSelect(
10927 IfExpr, NumTeamsLower,
Builder.getInt32(1),
"numTeamsLower");
10930 if (ThreadLimit ==
nullptr)
10931 ThreadLimit =
Builder.getInt32(0);
10935 Value *NumTeamsLowerInt32 =
10937 Value *NumTeamsUpperInt32 =
10939 Value *ThreadLimitInt32 =
10946 {Ident, ThreadNum, NumTeamsLowerInt32, NumTeamsUpperInt32,
10947 ThreadLimitInt32});
10952 if (
Error Err = BodyGenCB(AllocaIP, CodeGenIP))
10964 Builder, OuterAllocaIP, ToBeDeleted, AllocaIP,
"gid",
true));
10966 Builder, OuterAllocaIP, ToBeDeleted, AllocaIP,
"tid",
true));
10968 auto HostPostOutlineCB = [
this, Ident,
10969 ToBeDeleted](
Function &OutlinedFn)
mutable {
10974 "there must be a single user for the outlined function");
10979 "Outlined function must have two or three arguments only");
10981 bool HasShared = OutlinedFn.
arg_size() == 3;
10989 assert(StaleCI &&
"Error while outlining - no CallInst user found for the "
10990 "outlined function.");
10991 Builder.SetInsertPoint(StaleCI);
10998 omp::RuntimeFunction::OMPRTL___kmpc_fork_teams),
11002 I->eraseFromParent();
11005 if (!
Config.isTargetDevice())
11024 if (OuterAllocaBB ==
Builder.GetInsertBlock()) {
11039 if (
Error Err = BodyGenCB(AllocaIP, CodeGenIP))
11044 if (
Config.isTargetDevice()) {
11059 std::string VarName) {
11068 return MapNamesArrayGlobal;
11073void OpenMPIRBuilder::initializeTypes(
Module &M) {
11077 unsigned ProgramAS = M.getDataLayout().getProgramAddressSpace();
11078#define OMP_TYPE(VarName, InitValue) VarName = InitValue;
11079#define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \
11080 VarName##Ty = ArrayType::get(ElemTy, ArraySize); \
11081 VarName##PtrTy = PointerType::get(Ctx, DefaultTargetAS);
11082#define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \
11083 VarName = FunctionType::get(ReturnType, {__VA_ARGS__}, IsVarArg); \
11084 VarName##Ptr = PointerType::get(Ctx, ProgramAS);
11085#define OMP_STRUCT_TYPE(VarName, StructName, Packed, ...) \
11086 T = StructType::getTypeByName(Ctx, StructName); \
11088 T = StructType::create(Ctx, {__VA_ARGS__}, StructName, Packed); \
11090 VarName##Ptr = PointerType::get(Ctx, DefaultTargetAS);
11091#include "llvm/Frontend/OpenMP/OMPKinds.def"
11102 while (!Worklist.
empty()) {
11106 if (
BlockSet.insert(SuccBB).second)
11118 Name.empty() ? Addr->
getName() : Name,
Size, Flags, 0);
11130 Fn->
addFnAttr(
"uniform-work-group-size");
11131 Fn->
addFnAttr(Attribute::MustProgress);
11149 auto &&GetMDInt = [
this](
unsigned V) {
11156 NamedMDNode *MD =
M.getOrInsertNamedMetadata(
"omp_offload.info");
11157 auto &&TargetRegionMetadataEmitter =
11158 [&
C, MD, &OrderedEntries, &GetMDInt, &GetMDString](
11173 GetMDInt(E.getKind()), GetMDInt(EntryInfo.DeviceID),
11174 GetMDInt(EntryInfo.FileID), GetMDString(EntryInfo.ParentName),
11175 GetMDInt(EntryInfo.Line), GetMDInt(EntryInfo.Count),
11176 GetMDInt(E.getOrder())};
11179 OrderedEntries[E.getOrder()] = std::make_pair(&E, EntryInfo);
11188 auto &&DeviceGlobalVarMetadataEmitter =
11189 [&
C, &OrderedEntries, &GetMDInt, &GetMDString, MD](
11199 Metadata *
Ops[] = {GetMDInt(E.getKind()), GetMDString(MangledName),
11200 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
11204 OrderedEntries[E.getOrder()] = std::make_pair(&E, varInfo);
11211 DeviceGlobalVarMetadataEmitter);
11213 for (
const auto &E : OrderedEntries) {
11214 assert(E.first &&
"All ordered entries must exist!");
11215 if (
const auto *CE =
11218 if (!CE->getID() || !CE->getAddress()) {
11222 if (!
M.getNamedValue(FnName))
11230 }
else if (
const auto *CE =
dyn_cast<
11239 if (
Config.isTargetDevice() &&
Config.hasRequiresUnifiedSharedMemory())
11241 if (!CE->getAddress()) {
11246 if (CE->getVarSize() == 0)
11250 assert(((
Config.isTargetDevice() && !CE->getAddress()) ||
11251 (!
Config.isTargetDevice() && CE->getAddress())) &&
11252 "Declaret target link address is set.");
11253 if (
Config.isTargetDevice())
11255 if (!CE->getAddress()) {
11262 if (!CE->getAddress()) {
11275 if ((
GV->hasLocalLinkage() ||
GV->hasHiddenVisibility()) &&
11279 OMPTargetGlobalVarEntryIndirectVTable))
11288 Flags, CE->getLinkage(), CE->getVarName());
11291 Flags, CE->getLinkage());
11302 if (
Config.hasRequiresFlags() && !
Config.isTargetDevice())
11308 Config.getRequiresFlags());
11318 OS <<
"_" <<
Count;
11323 unsigned NewCount = getTargetRegionEntryInfoCount(EntryInfo);
11326 EntryInfo.
Line, NewCount);
11334 auto FileIDInfo = CallBack();
11338 FileID =
Status->getUniqueID().getFile();
11342 FileID =
hash_value(std::get<0>(FileIDInfo));
11346 std::get<1>(FileIDInfo));
11352 static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
11354 !(Remain & 1); Remain = Remain >> 1)
11372 if (
static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
11374 static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
11381 if (
static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
11387 Flags &=
~omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
11388 Flags |= MemberOfFlag;
11394 bool IsDeclaration,
bool IsExternallyVisible,
11396 std::vector<GlobalVariable *> &GeneratedRefs,
bool OpenMPSIMD,
11397 std::vector<Triple> TargetTriple,
Type *LlvmPtrTy,
11398 std::function<
Constant *()> GlobalInitializer,
11409 Config.hasRequiresUnifiedSharedMemory())) {
11414 if (!IsExternallyVisible)
11416 OS <<
"_decl_tgt_ref_ptr";
11419 Value *Ptr =
M.getNamedValue(PtrName);
11428 if (!
Config.isTargetDevice()) {
11429 if (GlobalInitializer)
11430 GV->setInitializer(GlobalInitializer());
11436 CaptureClause, DeviceClause, IsDeclaration, IsExternallyVisible,
11437 EntryInfo, MangledName, GeneratedRefs, OpenMPSIMD, TargetTriple,
11438 GlobalInitializer, VariableLinkage, LlvmPtrTy,
cast<Constant>(Ptr));
11450 bool IsDeclaration,
bool IsExternallyVisible,
11452 std::vector<GlobalVariable *> &GeneratedRefs,
bool OpenMPSIMD,
11453 std::vector<Triple> TargetTriple,
11454 std::function<
Constant *()> GlobalInitializer,
11458 (TargetTriple.empty() && !
Config.isTargetDevice()))
11469 !
Config.hasRequiresUnifiedSharedMemory()) {
11471 VarName = MangledName;
11474 if (!IsDeclaration)
11476 M.getDataLayout().getTypeSizeInBits(LlvmVal->
getValueType()), 8);
11479 Linkage = (VariableLinkage) ? VariableLinkage() : LlvmVal->
getLinkage();
11483 if (
Config.isTargetDevice() &&
11492 if (!
M.getNamedValue(RefName)) {
11496 GvAddrRef->setConstant(
true);
11498 GvAddrRef->setInitializer(Addr);
11499 GeneratedRefs.push_back(GvAddrRef);
11508 if (
Config.isTargetDevice()) {
11509 VarName = (Addr) ? Addr->
getName() :
"";
11513 CaptureClause, DeviceClause, IsDeclaration, IsExternallyVisible,
11514 EntryInfo, MangledName, GeneratedRefs, OpenMPSIMD, TargetTriple,
11515 LlvmPtrTy, GlobalInitializer, VariableLinkage);
11516 VarName = (Addr) ? Addr->
getName() :
"";
11518 VarSize =
M.getDataLayout().getPointerSize();
11537 auto &&GetMDInt = [MN](
unsigned Idx) {
11542 auto &&GetMDString = [MN](
unsigned Idx) {
11544 return V->getString();
11547 switch (GetMDInt(0)) {
11551 case OffloadEntriesInfoManager::OffloadEntryInfo::
11552 OffloadingEntryInfoTargetRegion: {
11562 case OffloadEntriesInfoManager::OffloadEntryInfo::
11563 OffloadingEntryInfoDeviceGlobalVar:
11576 if (HostFilePath.
empty())
11580 if (std::error_code Err = Buf.getError()) {
11582 "OpenMPIRBuilder: " +
11590 if (std::error_code Err =
M.getError()) {
11592 (
"error parsing host file inside of OpenMPIRBuilder: " + Err.message())
11606 "expected a valid insertion block for creating an iterator loop");
11616 Builder.getCurrentDebugLocation(),
"omp.it.cont");
11628 T->eraseFromParent();
11637 if (!BodyBr || BodyBr->getSuccessor() != CLI->
getLatch()) {
11639 "iterator bodygen must terminate the canonical body with an "
11640 "unconditional branch to the loop latch",
11664 for (
const auto &
ParamAttr : ParamAttrs) {
11707 return std::string(Out.
str());
11715 unsigned VecRegSize;
11717 ISADataTy ISAData[] = {
11736 for (
char Mask :
Masked) {
11737 for (
const ISADataTy &
Data : ISAData) {
11740 Out <<
"_ZGV" <<
Data.ISA << Mask;
11742 assert(NumElts &&
"Non-zero simdlen/cdtsize expected");
11756template <
typename T>
11759 StringRef MangledName,
bool OutputBecomesInput,
11763 Out << Prefix << ISA << LMask << VLEN;
11764 if (OutputBecomesInput)
11766 Out << ParSeq <<
'_' << MangledName;
11775 bool OutputBecomesInput,
11780 OutputBecomesInput, Fn);
11782 OutputBecomesInput, Fn);
11786 OutputBecomesInput, Fn);
11788 OutputBecomesInput, Fn);
11792 OutputBecomesInput, Fn);
11794 OutputBecomesInput, Fn);
11799 OutputBecomesInput, Fn);
11810 char ISA,
unsigned NarrowestDataSize,
bool OutputBecomesInput) {
11811 assert((ISA ==
'n' || ISA ==
's') &&
"Expected ISA either 's' or 'n'.");
11823 OutputBecomesInput, Fn);
11830 OutputBecomesInput, Fn);
11832 OutputBecomesInput, Fn);
11836 OutputBecomesInput, Fn);
11840 OutputBecomesInput, Fn);
11849 OutputBecomesInput, Fn);
11856 MangledName, OutputBecomesInput, Fn);
11858 MangledName, OutputBecomesInput, Fn);
11862 MangledName, OutputBecomesInput, Fn);
11866 MangledName, OutputBecomesInput, Fn);
11876 return OffloadEntriesTargetRegion.empty() &&
11877 OffloadEntriesDeviceGlobalVar.empty();
11880unsigned OffloadEntriesInfoManager::getTargetRegionEntryInfoCount(
11882 auto It = OffloadEntriesTargetRegionCount.find(
11883 getTargetRegionEntryCountKey(EntryInfo));
11884 if (It == OffloadEntriesTargetRegionCount.end())
11889void OffloadEntriesInfoManager::incrementTargetRegionEntryInfoCount(
11891 OffloadEntriesTargetRegionCount[getTargetRegionEntryCountKey(EntryInfo)] =
11892 EntryInfo.
Count + 1;
11898 OffloadEntriesTargetRegion[EntryInfo] =
11901 ++OffloadingEntriesNum;
11907 assert(EntryInfo.
Count == 0 &&
"expected default EntryInfo");
11910 EntryInfo.
Count = getTargetRegionEntryInfoCount(EntryInfo);
11914 if (OMPBuilder->Config.isTargetDevice()) {
11919 auto &Entry = OffloadEntriesTargetRegion[EntryInfo];
11920 Entry.setAddress(Addr);
11922 Entry.setFlags(Flags);
11928 "Target region entry already registered!");
11930 OffloadEntriesTargetRegion[EntryInfo] = Entry;
11931 ++OffloadingEntriesNum;
11933 incrementTargetRegionEntryInfoCount(EntryInfo);
11940 EntryInfo.
Count = getTargetRegionEntryInfoCount(EntryInfo);
11942 auto It = OffloadEntriesTargetRegion.find(EntryInfo);
11943 if (It == OffloadEntriesTargetRegion.end()) {
11947 if (!IgnoreAddressId && (It->second.getAddress() || It->second.getID()))
11955 for (
const auto &It : OffloadEntriesTargetRegion) {
11956 Action(It.first, It.second);
11962 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
11963 ++OffloadingEntriesNum;
11969 if (OMPBuilder->Config.isTargetDevice()) {
11973 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
11975 if (Entry.getVarSize() == 0) {
11976 Entry.setVarSize(VarSize);
11977 Entry.setLinkage(Linkage);
11981 Entry.setVarSize(VarSize);
11982 Entry.setLinkage(Linkage);
11983 Entry.setAddress(Addr);
11986 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
11987 assert(Entry.isValid() && Entry.getFlags() == Flags &&
11988 "Entry not initialized!");
11989 if (Entry.getVarSize() == 0) {
11990 Entry.setVarSize(VarSize);
11991 Entry.setLinkage(Linkage);
11998 OffloadEntriesDeviceGlobalVar.try_emplace(VarName, OffloadingEntriesNum,
11999 Addr, VarSize, Flags, Linkage,
12002 OffloadEntriesDeviceGlobalVar.try_emplace(
12003 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage,
"");
12004 ++OffloadingEntriesNum;
12011 for (
const auto &E : OffloadEntriesDeviceGlobalVar)
12012 Action(E.getKey(), E.getValue());
12019void CanonicalLoopInfo::collectControlBlocks(
12026 BBs.
append({getPreheader(), Header,
Cond, Latch, Exit, getAfter()});
12038void CanonicalLoopInfo::setTripCount(
Value *TripCount) {
12050void CanonicalLoopInfo::mapIndVar(
12060 for (
Use &U : OldIV->
uses()) {
12064 if (
User->getParent() == getCond())
12066 if (
User->getParent() == getLatch())
12072 Value *NewIV = Updater(OldIV);
12075 for (Use *U : ReplacableUses)
12096 "Preheader must terminate with unconditional branch");
12098 "Preheader must jump to header");
12102 "Header must terminate with unconditional branch");
12103 assert(Header->getSingleSuccessor() == Cond &&
12104 "Header must jump to exiting block");
12107 assert(Cond->getSinglePredecessor() == Header &&
12108 "Exiting block only reachable from header");
12111 "Exiting block must terminate with conditional branch");
12113 "Exiting block's first successor jump to the body");
12115 "Exiting block's second successor must exit the loop");
12119 "Body only reachable from exiting block");
12124 "Latch must terminate with unconditional branch");
12125 assert(Latch->getSingleSuccessor() == Header &&
"Latch must jump to header");
12128 assert(Latch->getSinglePredecessor() !=
nullptr);
12133 "Exit block must terminate with unconditional branch");
12134 assert(Exit->getSingleSuccessor() == After &&
12135 "Exit block must jump to after block");
12139 "After block only reachable from exit block");
12143 assert(IndVar &&
"Canonical induction variable not found?");
12145 "Induction variable must be an integer");
12147 "Induction variable must be a PHI in the loop header");
12153 auto *NextIndVar =
cast<PHINode>(IndVar)->getIncomingValue(1);
12161 assert(TripCount &&
"Loop trip count not found?");
12163 "Trip count and induction variable must have the same type");
12167 "Exit condition must be a signed less-than comparison");
12169 "Exit condition must compare the induction variable");
12171 "Exit condition must compare with the trip count");
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Expand Atomic instructions
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This header defines various interfaces for pass management in LLVM.
iv Induction Variable Users
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
static cl::opt< unsigned > TileSize("fuse-matrix-tile-size", cl::init(4), cl::Hidden, cl::desc("Tile size for matrix instruction fusion using square-shaped tiles."))
uint64_t IntrinsicInst * II
#define OMP_KERNEL_ARG_VERSION
Provides definitions for Target specific Grid Values.
static Value * removeASCastIfPresent(Value *V)
static void createTargetLoopWorkshareCall(OpenMPIRBuilder *OMPBuilder, WorksharingLoopType LoopType, BasicBlock *InsertBlock, Value *Ident, Value *LoopBodyArg, Value *TripCount, Function &LoopBodyFn, bool NoLoop)
Value * createFakeIntVal(IRBuilderBase &Builder, OpenMPIRBuilder::InsertPointTy OuterAllocaIP, llvm::SmallVectorImpl< Instruction * > &ToBeDeleted, OpenMPIRBuilder::InsertPointTy InnerAllocaIP, const Twine &Name="", bool AsPtr=true, bool Is64Bit=false)
static void redirectTo(BasicBlock *Source, BasicBlock *Target, DebugLoc DL)
Make Source branch to Target.
static FunctionCallee getKmpcDistForStaticInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
static void applyParallelAccessesMetadata(CanonicalLoopInfo *CLI, LLVMContext &Ctx, Loop *Loop, LoopInfo &LoopInfo, SmallVector< Metadata * > &LoopMDList)
static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, char ISA, StringRef ParSeq, StringRef MangledName, bool OutputBecomesInput, llvm::Function *Fn)
static FunctionCallee getKmpcForDynamicFiniForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for finalizing the dynamic loop using depending on type.
static Expected< Function * > createOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, StringRef FuncName, SmallVectorImpl< Value * > &Inputs, OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc, OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB)
static void FixupDebugInfoForOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, Function *Func, DenseMap< Value *, std::tuple< Value *, unsigned > > &ValueReplacementMap)
static OMPScheduleType getOpenMPOrderingScheduleType(OMPScheduleType BaseScheduleType, bool HasOrderedClause)
Adds ordering modifier flags to schedule type.
static OMPScheduleType getOpenMPMonotonicityScheduleType(OMPScheduleType ScheduleType, bool HasSimdModifier, bool HasMonotonic, bool HasNonmonotonic, bool HasOrderedClause)
Adds monotonicity modifier flags to schedule type.
static std::string mangleVectorParameters(ArrayRef< llvm::OpenMPIRBuilder::DeclareSimdAttrTy > ParamAttrs)
Mangle the parameter part of the vector function name according to their OpenMP classification.
static void workshareLoopTargetCallback(OpenMPIRBuilder *OMPIRBuilder, CanonicalLoopInfo *CLI, Value *Ident, Function &OutlinedFn, const SmallVector< Instruction *, 4 > &ToBeDeleted, WorksharingLoopType LoopType, bool NoLoop)
static bool isValidWorkshareLoopScheduleType(OMPScheduleType SchedType)
static llvm::CallInst * emitNoUnwindRuntimeCall(IRBuilder<> &Builder, llvm::FunctionCallee Callee, ArrayRef< llvm::Value * > Args, const llvm::Twine &Name)
static Error populateReductionFunction(Function *ReductionFunc, ArrayRef< OpenMPIRBuilder::ReductionInfo > ReductionInfos, IRBuilder<> &Builder, ArrayRef< bool > IsByRef, bool IsGPU)
static Function * getFreshReductionFunc(Module &M)
static void raiseUserConstantDataAllocasToEntryBlock(IRBuilderBase &Builder, Function *Function)
static FunctionCallee getKmpcForDynamicNextForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for updating the next loop using OpenMP dynamic scheduling depending...
static bool isConflictIP(IRBuilder<>::InsertPoint IP1, IRBuilder<>::InsertPoint IP2)
Return whether IP1 and IP2 are ambiguous, i.e.
static void checkReductionInfos(ArrayRef< OpenMPIRBuilder::ReductionInfo > ReductionInfos, bool IsGPU)
static Type * getOffloadingArrayType(Value *V)
static OMPScheduleType getOpenMPBaseScheduleType(llvm::omp::ScheduleKind ClauseKind, bool HasChunks, bool HasSimdModifier, bool HasDistScheduleChunks)
Determine which scheduling algorithm to use, determined from schedule clause arguments.
static OMPScheduleType computeOpenMPScheduleType(ScheduleKind ClauseKind, bool HasChunks, bool HasSimdModifier, bool HasMonotonicModifier, bool HasNonmonotonicModifier, bool HasOrderedClause, bool HasDistScheduleChunks)
Determine the schedule type using schedule and ordering clause arguments.
static FunctionCallee getKmpcForDynamicInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for initializing loop bounds using OpenMP dynamic scheduling dependi...
static StructType * createTaskWithPrivatesTy(OpenMPIRBuilder &OMPIRBuilder, ArrayRef< Value * > OffloadingArraysToPrivatize)
static cl::opt< double > UnrollThresholdFactor("openmp-ir-builder-unroll-threshold-factor", cl::Hidden, cl::desc("Factor for the unroll threshold to account for code " "simplifications still taking place"), cl::init(1.5))
static int32_t computeHeuristicUnrollFactor(CanonicalLoopInfo *CLI)
Heuristically determine the best-performant unroll factor for CLI.
static Value * emitTaskDependencies(OpenMPIRBuilder &OMPBuilder, const SmallVectorImpl< OpenMPIRBuilder::DependData > &Dependencies)
static Error emitTargetOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, bool IsOffloadEntry, TargetRegionEntryInfo &EntryInfo, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, Function *&OutlinedFn, Constant *&OutlinedFnID, SmallVectorImpl< Value * > &Inputs, OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc, OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB)
static void updateNVPTXAttr(Function &Kernel, StringRef Name, int32_t Value, bool Min)
static OpenMPIRBuilder::InsertPointTy getInsertPointAfterInstr(Instruction *I)
static void redirectAllPredecessorsTo(BasicBlock *OldTarget, BasicBlock *NewTarget, DebugLoc DL)
Redirect all edges that branch to OldTarget to NewTarget.
static void hoistNonEntryAllocasToEntryBlock(llvm::BasicBlock &Block)
static std::unique_ptr< TargetMachine > createTargetMachine(Function *F, CodeGenOptLevel OptLevel)
Create the TargetMachine object to query the backend for optimization preferences.
static FunctionCallee getKmpcForStaticInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
static void addAccessGroupMetadata(BasicBlock *Block, MDNode *AccessGroup, LoopInfo &LI)
Attach llvm.access.group metadata to the memref instructions of Block.
static void addBasicBlockMetadata(BasicBlock *BB, ArrayRef< Metadata * > Properties)
Attach metadata Properties to the basic block described by BB.
static void restoreIPandDebugLoc(llvm::IRBuilderBase &Builder, llvm::IRBuilderBase::InsertPoint IP)
This is wrapper over IRBuilderBase::restoreIP that also restores the current debug location to the la...
static LoadInst * loadSharedDataFromTaskDescriptor(OpenMPIRBuilder &OMPIRBuilder, IRBuilderBase &Builder, Value *TaskWithPrivates, Type *TaskWithPrivatesTy)
Given a task descriptor, TaskWithPrivates, return the pointer to the block of pointers containing sha...
static cl::opt< bool > OptimisticAttributes("openmp-ir-builder-optimistic-attributes", cl::Hidden, cl::desc("Use optimistic attributes describing " "'as-if' properties of runtime calls."), cl::init(false))
static bool hasGridValue(const Triple &T)
static FunctionCallee getKmpcForStaticLoopForType(Type *Ty, OpenMPIRBuilder *OMPBuilder, WorksharingLoopType LoopType)
static void emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, OpenMPIRBuilder::InsertPointTy AllocaIP, OpenMPIRBuilder::TargetDataInfo &Info, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, const OpenMPIRBuilder::TargetKernelRuntimeAttrs &RuntimeAttrs, Value *IfCond, Function *OutlinedFn, Constant *OutlinedFnID, SmallVectorImpl< Value * > &Args, OpenMPIRBuilder::GenMapInfoCallbackTy GenMapInfoCB, OpenMPIRBuilder::CustomMapperCallbackTy CustomMapperCB, const SmallVector< llvm::OpenMPIRBuilder::DependData > &Dependencies, bool HasNoWait, Value *DynCGroupMem, OMPDynGroupprivateFallbackType DynCGroupMemFallback)
static const omp::GV & getGridValue(const Triple &T, Function *Kernel)
static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, StringRef Prefix, char ISA, StringRef ParSeq, StringRef MangledName, bool OutputBecomesInput, llvm::Function *Fn)
static Function * emitTargetTaskProxyFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, CallInst *StaleCI, StructType *PrivatesTy, StructType *TaskWithPrivatesTy, const size_t NumOffloadingArrays, const int SharedArgsOperandNo)
Create an entry point for a target task with the following.
static void addLoopMetadata(CanonicalLoopInfo *Loop, ArrayRef< Metadata * > Properties)
Attach loop metadata Properties to the loop described by Loop.
static void removeUnusedBlocksFromParent(ArrayRef< BasicBlock * > BBs)
Determine which blocks in BBs are reachable from outside and remove the ones that are not reachable f...
static void targetParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn, Function *OuterFn, BasicBlock *OuterAllocaBB, Value *Ident, Value *IfCondition, Value *NumThreads, Instruction *PrivTID, AllocaInst *PrivTIDAddr, Value *ThreadID, const SmallVector< Instruction *, 4 > &ToBeDeleted)
static void hostParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn, Function *OuterFn, Value *Ident, Value *IfCondition, Instruction *PrivTID, AllocaInst *PrivTIDAddr, const SmallVector< Instruction *, 4 > &ToBeDeleted)
FunctionAnalysisManager FAM
This file defines the Pass Instrumentation classes that provide instrumentation points into the pass ...
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
std::unordered_set< BasicBlock * > BlockSet
This file implements the SmallBitVector class.
This file defines the SmallSet class.
static SymbolRef::Type getType(const Symbol *Sym)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Defines the virtual file system interface vfs::FileSystem.
static cl::opt< unsigned > MaxThreads("xcore-max-threads", cl::Optional, cl::desc("Maximum number of threads (for emulation thread-local storage)"), cl::Hidden, cl::value_desc("number"), cl::init(8))
static const uint32_t IV[8]
Class for arbitrary precision integers.
An arbitrary precision integer that knows its signedness.
static APSInt getUnsigned(uint64_t X)
This class represents a conversion between pointers from one address space to another.
an instruction to allocate memory on the stack
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
PointerType * getType() const
Overload to return most specific pointer type.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
unsigned getAddressSpace() const
Return the address space for the allocation.
LLVM_ABI std::optional< TypeSize > getAllocationSize(const DataLayout &DL) const
Get allocation size in bytes.
LLVM_ABI bool isArrayAllocation() const
Return true if there is an allocation size parameter to the allocation instruction that is not 1.
void setAlignment(Align Align)
const Value * getArraySize() const
Get the number of elements allocated.
bool registerPass(PassBuilderT &&PassBuilder)
Register an analysis pass with the manager.
This class represents an incoming formal argument to a Function.
unsigned getArgNo() const
Return the index of this formal argument in its containing function.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
Class to represent array types.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
A function analysis which provides an AssumptionCache.
LLVM_ABI AssumptionCache run(Function &F, FunctionAnalysisManager &)
A cache of @llvm.assume calls within a function.
An instruction that atomically checks whether a specified value is in a memory location,...
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
LLVM_ABI std::pair< LoadInst *, AllocaInst * > EmitAtomicLoadLibcall(AtomicOrdering AO)
LLVM_ABI void EmitAtomicStoreLibcall(AtomicOrdering AO, Value *Source)
an instruction that atomically reads a memory location, combines it with another value,...
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ USubCond
Subtract only if no unsigned overflow.
@ FMinimum
*p = minimum(old, v) minimum matches the behavior of llvm.minimum.
@ Min
*p = old <signed v ? old : v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ FMaximum
*p = maximum(old, v) maximum matches the behavior of llvm.maximum.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ UMax
*p = old >unsigned v ? old : v
@ FMaximumNum
*p = maximumnum(old, v) maximumnum matches the behavior of llvm.maximumnum.
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
@ FMinimumNum
*p = minimumnum(old, v) minimumnum matches the behavior of llvm.minimumnum.
This class holds the attributes for a particular argument, parameter, function, or return value.
LLVM_ABI AttributeSet addAttributes(LLVMContext &C, AttributeSet AS) const
Add attributes to the attribute set.
LLVM_ABI AttributeSet addAttribute(LLVMContext &C, Attribute::AttrKind Kind) const
Add an argument attribute.
LLVM Basic Block Representation.
LLVM_ABI void replaceSuccessorsPhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block's successors to refer to basic block New instead of basic bl...
iterator begin()
Instruction iterator methods.
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="")
Split the basic block into two basic blocks at the specified instruction.
const Function * getParent() const
Return the enclosing method, or null if none.
reverse_iterator rbegin()
bool hasTerminator() const LLVM_READONLY
Returns whether the block has a terminator.
const Instruction & back() const
LLVM_ABI BasicBlock * splitBasicBlockBefore(iterator I, const Twine &BBName="")
Split the basic block into two basic blocks at the specified instruction and insert the new basic blo...
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
LLVM_ABI InstListType::const_iterator getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
LLVM_ABI const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Instruction & front() const
InstListType::reverse_iterator reverse_iterator
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
const Instruction * getTerminatorOrNull() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
LLVM_ABI SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
InstListType::iterator iterator
Instruction iterators...
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
LLVM_ABI void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Value * getArgOperand(unsigned i) const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
Class to represented the control flow structure of an OpenMP canonical loop.
Value * getTripCount() const
Returns the llvm::Value containing the number of loop iterations.
BasicBlock * getHeader() const
The header is the entry for each iteration.
LLVM_ABI void assertOK() const
Consistency self-check.
Type * getIndVarType() const
Return the type of the induction variable (and the trip count).
BasicBlock * getBody() const
The body block is the single entry for a loop iteration and not controlled by CanonicalLoopInfo.
bool isValid() const
Returns whether this object currently represents the IR of a loop.
void setLastIter(Value *IterVar)
Sets the last iteration variable for this loop.
OpenMPIRBuilder::InsertPointTy getAfterIP() const
Return the insertion point for user code after the loop.
OpenMPIRBuilder::InsertPointTy getBodyIP() const
Return the insertion point for user code in the body.
BasicBlock * getAfter() const
The after block is intended for clean-up code such as lifetime end markers.
Function * getFunction() const
LLVM_ABI void invalidate()
Invalidate this loop.
BasicBlock * getLatch() const
Reaching the latch indicates the end of the loop body code.
OpenMPIRBuilder::InsertPointTy getPreheaderIP() const
Return the insertion point for user code before the loop.
BasicBlock * getCond() const
The condition block computes whether there is another loop iteration.
BasicBlock * getExit() const
Reaching the exit indicates no more iterations are being executed.
LLVM_ABI BasicBlock * getPreheader() const
The preheader ensures that there is only a single edge entering the loop.
Instruction * getIndVar() const
Returns the instruction representing the current logical induction variable.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ ICMP_ULT
unsigned less than
@ ICMP_ULE
unsigned less or equal
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
static LLVM_ABI Constant * getString(LLVMContext &Context, StringRef Initializer, bool AddNull=true, bool ByteString=false)
This method constructs a CDS and initializes it with a text string.
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getTruncOrBitCast(Constant *C, Type *Ty)
static LLVM_ABI Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
static LLVM_ABI Constant * getSizeOf(Type *Ty)
getSizeOf constant expr - computes the (alloc) size of a type (in address-units, not bits) in a targe...
static LLVM_ABI Constant * getAddrSpaceCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
This is an important base class in LLVM.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
DILocalScope * getScope() const
Get the local scope for this variable.
DINodeArray getAnnotations() const
Subprogram description. Uses SubclassData1.
uint32_t getAlignInBits() const
StringRef getName() const
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Record of a variable value-assignment, aka a non instruction representation of the dbg....
Analysis pass which computes a DominatorTree.
LLVM_ABI DominatorTree run(Function &F, FunctionAnalysisManager &)
Run the analysis pass over a function and produce a dominator tree.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Represents either an error or a value T.
Lightweight error class with error context and mandatory checking.
static ErrorSuccess success()
Create a success value.
Tagged union holding either a T or a Error.
Error takeError()
Take ownership of the stored error.
reference get()
Returns a reference to the stored T value.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Class to represent function types.
Type * getParamType(unsigned i) const
Parameter type accessors.
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
void addFnAttr(Attribute::AttrKind Kind)
Add function attributes to this function.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
const BasicBlock & getEntryBlock() const
FunctionType * getFunctionType() const
Returns the FunctionType for me.
void removeFromParent()
removeFromParent - This method unlinks 'this' from the containing module, but does not delete it.
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
AttributeList getAttributes() const
Return the attribute list for this Function.
const Function & getFunction() const
void setAttributes(AttributeList Attrs)
Set the attribute list for this Function.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
adds the attribute to the list of attributes for the given arg.
Function::iterator insert(Function::iterator Position, BasicBlock *BB)
Insert BB in the basic block list at Position.
Type * getReturnType() const
Returns the type of the ret val.
void setCallingConv(CallingConv::ID CC)
Argument * getArg(unsigned i) const
bool hasMetadata() const
Return true if this GlobalObject has any metadata attached to it.
LLVM_ABI void addMetadata(unsigned KindID, MDNode &MD)
Add a metadata attachment.
LinkageTypes getLinkage() const
void setLinkage(LinkageTypes LT)
Module * getParent()
Get the module that this global value is contained inside of...
void setDSOLocal(bool Local)
PointerType * getType() const
Global values are always pointers.
@ HiddenVisibility
The GV is hidden.
@ ProtectedVisibility
The GV is protected.
void setVisibility(VisibilityTypes V)
LinkageTypes
An enumeration for the kinds of linkage for global values.
@ PrivateLinkage
Like Internal, but omit from symbol table.
@ CommonLinkage
Tentative definitions.
@ InternalLinkage
Rename collisions when linking (static functions).
@ WeakODRLinkage
Same, but only replaced by something equivalent.
@ WeakAnyLinkage
Keep one copy of named function when linking (weak)
@ AppendingLinkage
Special purpose, only applies to global arrays.
@ LinkOnceODRLinkage
Same, but only replaced by something equivalent.
Type * getValueType() const
InsertPoint - A saved insertion point.
BasicBlock * getBlock() const
bool isSet() const
Returns true if this insert point is set.
BasicBlock::iterator getPoint() const
Common base class shared among various IRBuilders.
InsertPoint saveIP() const
Returns the current insert point.
void restoreIP(InsertPoint IP)
Sets the current insert point to a previously-saved location.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
LLVM_ABI const DebugLoc & getStableDebugLoc() const
Fetch the debug location for this node, unless this is a debug intrinsic, in which case fetch the deb...
LLVM_ABI void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI void moveBeforePreserving(InstListType::iterator MovePos)
Perform a moveBefore operation, while signalling that the caller intends to preserve the original ord...
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void insertAfter(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately after the specified instruction.
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Value * getPointerOperand()
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
Align getAlign() const
Return the alignment of the access that is being performed.
Analysis pass that exposes the LoopInfo for a function.
LLVM_ABI LoopInfo run(Function &F, FunctionAnalysisManager &AM)
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
This class represents a loop nest and can be used to query its properties.
Represents a single loop in the control flow graph.
LLVM_ABI MDNode * createCallbackEncoding(unsigned CalleeArgNo, ArrayRef< int > Arguments, bool VarArgsArePassed)
Return metadata describing a callback (see llvm::AbstractCallSite).
LLVM_ABI void replaceOperandWith(unsigned I, Metadata *New)
Replace a specific operand.
static MDTuple * getDistinct(LLVMContext &Context, ArrayRef< Metadata * > MDs)
ArrayRef< MDOperand > operands() const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
This class implements a map that also provides access to all stored values in a deterministic order.
A Module instance is used to store all the information related to an LLVM module.
LLVMContext & getContext() const
Get the global data context.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
iterator_range< op_iterator > operands()
LLVM_ABI void addOperand(MDNode *M)
Device global variable entries info.
Target region entries info.
Base class of the entries info.
Class that manages information about offload code regions and data.
function_ref< void(StringRef, const OffloadEntryInfoDeviceGlobalVar &)> OffloadDeviceGlobalVarEntryInfoActTy
Applies action Action on all registered entries.
OMPTargetDeviceClauseKind
Kind of device clause for declare target variables and functions NOTE: Currently not used as a part o...
@ OMPTargetDeviceClauseAny
The target is marked for all devices.
LLVM_ABI void registerDeviceGlobalVarEntryInfo(StringRef VarName, Constant *Addr, int64_t VarSize, OMPTargetGlobalVarEntryKind Flags, GlobalValue::LinkageTypes Linkage)
Register device global variable entry.
LLVM_ABI void initializeDeviceGlobalVarEntryInfo(StringRef Name, OMPTargetGlobalVarEntryKind Flags, unsigned Order)
Initialize device global variable entry.
LLVM_ABI void actOnDeviceGlobalVarEntriesInfo(const OffloadDeviceGlobalVarEntryInfoActTy &Action)
OMPTargetRegionEntryKind
Kind of the target registry entry.
@ OMPTargetRegionEntryTargetRegion
Mark the entry as target region.
LLVM_ABI void getTargetRegionEntryFnName(SmallVectorImpl< char > &Name, const TargetRegionEntryInfo &EntryInfo)
LLVM_ABI bool hasTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, bool IgnoreAddressId=false) const
Return true if a target region entry with the provided information exists.
LLVM_ABI void registerTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, Constant *Addr, Constant *ID, OMPTargetRegionEntryKind Flags)
Register target region entry.
LLVM_ABI void actOnTargetRegionEntriesInfo(const OffloadTargetRegionEntryInfoActTy &Action)
LLVM_ABI void initializeTargetRegionEntryInfo(const TargetRegionEntryInfo &EntryInfo, unsigned Order)
Initialize target region entry.
OMPTargetGlobalVarEntryKind
Kind of the global variable entry..
@ OMPTargetGlobalVarEntryEnter
Mark the entry as a declare target enter.
@ OMPTargetGlobalRegisterRequires
Mark the entry as a register requires global.
@ OMPTargetGlobalVarEntryIndirect
Mark the entry as a declare target indirect global.
@ OMPTargetGlobalVarEntryLink
Mark the entry as a to declare target link.
@ OMPTargetGlobalVarEntryTo
Mark the entry as a to declare target.
@ OMPTargetGlobalVarEntryIndirectVTable
Mark the entry as a declare target indirect vtable.
function_ref< void(const TargetRegionEntryInfo &EntryInfo, const OffloadEntryInfoTargetRegion &)> OffloadTargetRegionEntryInfoActTy
brief Applies action Action on all registered entries.
bool hasDeviceGlobalVarEntryInfo(StringRef VarName) const
Checks if the variable with the given name has been registered already.
LLVM_ABI bool empty() const
Return true if a there are no entries defined.
std::optional< bool > IsTargetDevice
Flag to define whether to generate code for the role of the OpenMP host (if set to false) or device (...
std::optional< bool > IsGPU
Flag for specifying if the compilation is done for an accelerator.
LLVM_ABI int64_t getRequiresFlags() const
Returns requires directive clauses as flags compatible with those expected by libomptarget.
std::optional< bool > OpenMPOffloadMandatory
Flag for specifying if offloading is mandatory.
LLVM_ABI void setHasRequiresReverseOffload(bool Value)
LLVM_ABI OpenMPIRBuilderConfig()
LLVM_ABI bool hasRequiresUnifiedSharedMemory() const
LLVM_ABI void setHasRequiresUnifiedSharedMemory(bool Value)
unsigned getDefaultTargetAS() const
LLVM_ABI bool hasRequiresDynamicAllocators() const
LLVM_ABI void setHasRequiresUnifiedAddress(bool Value)
bool isTargetDevice() const
LLVM_ABI void setHasRequiresDynamicAllocators(bool Value)
LLVM_ABI bool hasRequiresReverseOffload() const
bool hasRequiresFlags() const
LLVM_ABI bool hasRequiresUnifiedAddress() const
Struct that keeps the information that should be kept throughout a 'target data' region.
An interface to create LLVM-IR for OpenMP directives.
LLVM_ABI InsertPointOrErrorTy createOrderedThreadsSimd(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool IsThreads)
Generator for 'omp ordered [threads | simd]'.
LLVM_ABI void emitAArch64DeclareSimdFunction(llvm::Function *Fn, unsigned VLENVal, llvm::ArrayRef< DeclareSimdAttrTy > ParamAttrs, DeclareSimdBranch Branch, char ISA, unsigned NarrowestDataSize, bool OutputBecomesInput)
Emit AArch64 vector-function ABI attributes for a declare simd function.
LLVM_ABI Constant * getOrCreateIdent(Constant *SrcLocStr, uint32_t SrcLocStrSize, omp::IdentFlag Flags=omp::IdentFlag(0), unsigned Reserve2Flags=0)
Return an ident_t* encoding the source location SrcLocStr and Flags.
LLVM_ABI FunctionCallee getOrCreateRuntimeFunction(Module &M, omp::RuntimeFunction FnID)
Return the function declaration for the runtime function with FnID.
LLVM_ABI InsertPointOrErrorTy createCancel(const LocationDescription &Loc, Value *IfCondition, omp::Directive CanceledDirective)
Generator for 'omp cancel'.
std::function< Expected< Function * >(StringRef FunctionName)> FunctionGenCallback
Functions used to generate a function with the given name.
ReductionGenCBKind
Enum class for the RedctionGen CallBack type to be used.
LLVM_ABI CanonicalLoopInfo * collapseLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops, InsertPointTy ComputeIP)
Collapse a loop nest into a single loop.
LLVM_ABI void createTaskyield(const LocationDescription &Loc)
Generator for 'omp taskyield'.
std::function< Error(InsertPointTy CodeGenIP)> FinalizeCallbackTy
Callback type for variable finalization (think destructors).
LLVM_ABI void emitBranch(BasicBlock *Target)
LLVM_ABI Error emitCancelationCheckImpl(Value *CancelFlag, omp::Directive CanceledDirective)
Generate control flow and cleanup for cancellation.
static LLVM_ABI void writeThreadBoundsForKernel(const Triple &T, Function &Kernel, int32_t LB, int32_t UB)
LLVM_ABI void emitTaskwaitImpl(const LocationDescription &Loc)
Generate a taskwait runtime call.
LLVM_ABI Constant * registerTargetRegionFunction(TargetRegionEntryInfo &EntryInfo, Function *OutlinedFunction, StringRef EntryFnName, StringRef EntryFnIDName)
Registers the given function and sets up the attribtues of the function Returns the FunctionID.
LLVM_ABI GlobalVariable * emitKernelExecutionMode(StringRef KernelName, omp::OMPTgtExecModeFlags Mode)
Emit the kernel execution mode.
LLVM_ABI InsertPointOrErrorTy createDistribute(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB)
Generator for #omp distribute
LLVM_ABI void initialize()
Initialize the internal state, this will put structures types and potentially other helpers into the ...
LLVM_ABI void createTargetDeinit(const LocationDescription &Loc, int32_t TeamsReductionDataSize=0, int32_t TeamsReductionBufferLength=1024)
Create a runtime call for kmpc_target_deinit.
LLVM_ABI InsertPointOrErrorTy createTaskgroup(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB)
Generator for the taskgroup construct.
LLVM_ABI InsertPointTy createAtomicWrite(const LocationDescription &Loc, AtomicOpValue &X, Value *Expr, AtomicOrdering AO, InsertPointTy AllocaIP)
Emit atomic write for : X = Expr — Only Scalar data types.
LLVM_ABI void loadOffloadInfoMetadata(Module &M)
Loads all the offload entries information from the host IR metadata.
function_ref< MapInfosTy &(InsertPointTy CodeGenIP)> GenMapInfoCallbackTy
Callback type for creating the map infos for the kernel parameters.
LLVM_ABI Error emitOffloadingArrays(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo, TargetDataInfo &Info, CustomMapperCallbackTy CustomMapperCB, bool IsNonContiguous=false, function_ref< void(unsigned int, Value *)> DeviceAddrCB=nullptr)
Emit the arrays used to pass the captures and map information to the offloading runtime library.
LLVM_ABI void unrollLoopFull(DebugLoc DL, CanonicalLoopInfo *Loop)
Fully unroll a loop.
function_ref< Error(InsertPointTy CodeGenIP, Value *IndVar)> LoopBodyGenCallbackTy
Callback type for loop body code generation.
LLVM_ABI InsertPointOrErrorTy emitScanReduction(const LocationDescription &Loc, ArrayRef< llvm::OpenMPIRBuilder::ReductionInfo > ReductionInfos, ScanInfo *ScanRedInfo)
This function performs the scan reduction of the values updated in the input phase.
LLVM_ABI void emitFlush(const LocationDescription &Loc)
Generate a flush runtime call.
static LLVM_ABI std::pair< int32_t, int32_t > readThreadBoundsForKernel(const Triple &T, Function &Kernel)
}
OpenMPIRBuilderConfig Config
The OpenMPIRBuilder Configuration.
LLVM_ABI CallInst * createOMPInteropDestroy(const LocationDescription &Loc, Value *InteropVar, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_destroy.
LLVM_ABI Error emitIfClause(Value *Cond, BodyGenCallbackTy ThenGen, BodyGenCallbackTy ElseGen, InsertPointTy AllocaIP={})
Emits code for OpenMP 'if' clause using specified BodyGenCallbackTy Here is the logic: if (Cond) { Th...
function_ref< InsertPointOrErrorTy( Argument &Arg, Value *Input, Value *&RetVal, InsertPointTy AllocaIP, InsertPointTy CodeGenIP)> TargetGenArgAccessorsCallbackTy
LLVM_ABI void emitUsed(StringRef Name, ArrayRef< llvm::WeakTrackingVH > List)
Emit the llvm.used metadata.
LLVM_ABI InsertPointOrErrorTy createSingle(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool IsNowait, ArrayRef< llvm::Value * > CPVars={}, ArrayRef< llvm::Function * > CPFuncs={})
Generator for 'omp single'.
LLVM_ABI InsertPointOrErrorTy createTeams(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, Value *NumTeamsLower=nullptr, Value *NumTeamsUpper=nullptr, Value *ThreadLimit=nullptr, Value *IfExpr=nullptr)
Generator for #omp teams
std::forward_list< CanonicalLoopInfo > LoopInfos
Collection of owned canonical loop objects that eventually need to be free'd.
LLVM_ABI void createTaskwait(const LocationDescription &Loc)
Generator for 'omp taskwait'.
LLVM_ABI llvm::StructType * getKmpTaskAffinityInfoTy()
Return the LLVM struct type matching runtime kmp_task_affinity_info_t.
LLVM_ABI CanonicalLoopInfo * createLoopSkeleton(DebugLoc DL, Value *TripCount, Function *F, BasicBlock *PreInsertBefore, BasicBlock *PostInsertBefore, const Twine &Name={})
Create the control flow structure of a canonical OpenMP loop.
LLVM_ABI std::string createPlatformSpecificName(ArrayRef< StringRef > Parts) const
Get the create a name using the platform specific separators.
LLVM_ABI FunctionCallee createDispatchNextFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_next_* runtime function for the specified size IVSize and sign IVSigned.
static LLVM_ABI void getKernelArgsVector(TargetKernelArgs &KernelArgs, IRBuilderBase &Builder, SmallVector< Value * > &ArgsVector)
Create the kernel args vector used by emitTargetKernel.
function_ref< Error(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)> BodyGenCallbackTy
Callback type for body (=inner region) code generation.
LLVM_ABI void unrollLoopHeuristic(DebugLoc DL, CanonicalLoopInfo *Loop)
Fully or partially unroll a loop.
LLVM_ABI InsertPointOrErrorTy createParallel(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads, omp::ProcBindKind ProcBind, bool IsCancellable)
Generator for 'omp parallel'.
LLVM_ABI omp::OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position)
Get OMP_MAP_MEMBER_OF flag with extra bits reserved based on the position given.
LLVM_ABI void addAttributes(omp::RuntimeFunction FnID, Function &Fn)
Add attributes known for FnID to Fn.
Module & M
The underlying LLVM-IR module.
StringMap< Constant * > SrcLocStrMap
Map to remember source location strings.
LLVM_ABI void createMapperAllocas(const LocationDescription &Loc, InsertPointTy AllocaIP, unsigned NumOperands, struct MapperAllocas &MapperAllocas)
Create the allocas instruction used in call to mapper functions.
LLVM_ABI Constant * getOrCreateSrcLocStr(StringRef LocStr, uint32_t &SrcLocStrSize)
Return the (LLVM-IR) string describing the source location LocStr.
void addOutlineInfo(OutlineInfo &&OI)
Add a new region that will be outlined later.
LLVM_ABI Error emitTargetRegionFunction(TargetRegionEntryInfo &EntryInfo, FunctionGenCallback &GenerateFunctionCallback, bool IsOffloadEntry, Function *&OutlinedFn, Constant *&OutlinedFnID)
Create a unique name for the entry function using the source location information of the current targ...
LLVM_ABI InsertPointOrErrorTy createIteratorLoop(LocationDescription Loc, llvm::Value *TripCount, IteratorBodyGenTy BodyGen, llvm::StringRef Name="iterator")
Create a canonical iterator loop at the current insertion point.
LLVM_ABI Expected< SmallVector< llvm::CanonicalLoopInfo * > > createCanonicalScanLoops(const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop, InsertPointTy ComputeIP, const Twine &Name, ScanInfo *ScanRedInfo)
Generator for the control flow structure of an OpenMP canonical loops if the parent directive has an ...
LLVM_ABI FunctionCallee createDispatchFiniFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_fini_* runtime function for the specified size IVSize and sign IVSigned.
LLVM_ABI void unrollLoopPartial(DebugLoc DL, CanonicalLoopInfo *Loop, int32_t Factor, CanonicalLoopInfo **UnrolledCLI)
Partially unroll a loop.
function_ref< Error(Value *DeviceID, Value *RTLoc, IRBuilderBase::InsertPoint TargetTaskAllocaIP)> TargetTaskBodyCallbackTy
Callback type for generating the bodies of device directives that require outer target tasks (e....
Expected< MapInfosTy & > MapInfosOrErrorTy
LLVM_ABI void emitTaskyieldImpl(const LocationDescription &Loc)
Generate a taskyield runtime call.
LLVM_ABI InsertPointOrErrorTy createTask(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, bool Tied=true, Value *Final=nullptr, Value *IfCondition=nullptr, SmallVector< DependData > Dependencies={}, AffinityData Affinities={}, bool Mergeable=false, Value *EventHandle=nullptr, Value *Priority=nullptr)
Generator for #omp taskloop
LLVM_ABI void emitMapperCall(const LocationDescription &Loc, Function *MapperFunc, Value *SrcLocInfo, Value *MaptypesArg, Value *MapnamesArg, struct MapperAllocas &MapperAllocas, int64_t DeviceID, unsigned NumOperands)
Create the call for the target mapper function.
function_ref< Expected< Function * >(unsigned int)> CustomMapperCallbackTy
LLVM_ABI InsertPointTy createAtomicCompare(const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V, AtomicOpValue &R, Value *E, Value *D, AtomicOrdering AO, omp::OMPAtomicCompareOp Op, bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly)
Emit atomic compare for constructs: — Only scalar data types cond-expr-stmt: x = x ordop expr ?
LLVM_ABI InsertPointTy createOrderedDepend(const LocationDescription &Loc, InsertPointTy AllocaIP, unsigned NumLoops, ArrayRef< llvm::Value * > StoreValues, const Twine &Name, bool IsDependSource)
Generator for 'omp ordered depend (source | sink)'.
LLVM_ABI InsertPointTy createCopyinClauseBlocks(InsertPointTy IP, Value *MasterAddr, Value *PrivateAddr, llvm::IntegerType *IntPtrTy, bool BranchtoEnd=true)
Generate conditional branch and relevant BasicBlocks through which private threads copy the 'copyin' ...
function_ref< InsertPointOrErrorTy( InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &Original, Value &Inner, Value *&ReplVal)> PrivatizeCallbackTy
Callback type for variable privatization (think copy & default constructor).
LLVM_ABI bool isFinalized()
Check whether the finalize function has already run.
function_ref< InsertPointOrErrorTy( InsertPointTy AllocaIP, InsertPointTy CodeGenIP)> TargetBodyGenCallbackTy
SmallVector< FinalizationInfo, 8 > FinalizationStack
The finalization stack made up of finalize callbacks currently in-flight, wrapped into FinalizationIn...
LLVM_ABI std::vector< CanonicalLoopInfo * > tileLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops, ArrayRef< Value * > TileSizes)
Tile a loop nest.
LLVM_ABI CallInst * createOMPInteropInit(const LocationDescription &Loc, Value *InteropVar, omp::OMPInteropType InteropType, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_init.
LLVM_ABI void finalize(Function *Fn=nullptr)
Finalize the underlying module, e.g., by outlining regions.
SmallVector< OutlineInfo, 16 > OutlineInfos
Collection of regions that need to be outlined during finalization.
LLVM_ABI Function * getOrCreateRuntimeFunctionPtr(omp::RuntimeFunction FnID)
LLVM_ABI InsertPointTy createTargetInit(const LocationDescription &Loc, const llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs)
The omp target interface.
LLVM_ABI InsertPointOrErrorTy createReductions(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< ReductionInfo > ReductionInfos, ArrayRef< bool > IsByRef, bool IsNoWait=false, bool IsTeamsReduction=false)
Generator for 'omp reduction'.
const Triple T
The target triple of the underlying module.
DenseMap< std::pair< Constant *, uint64_t >, Constant * > IdentMap
Map to remember existing ident_t*.
LLVM_ABI CallInst * createOMPFree(const LocationDescription &Loc, Value *Addr, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_free.
LLVM_ABI FunctionCallee createForStaticInitFunction(unsigned IVSize, bool IVSigned, bool IsGPUDistribute)
Returns __kmpc_for_static_init_* runtime function for the specified size IVSize and sign IVSigned.
LLVM_ABI CallInst * createOMPAlloc(const LocationDescription &Loc, Value *Size, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_Alloc.
LLVM_ABI void emitNonContiguousDescriptor(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo, TargetDataInfo &Info)
Emit an array of struct descriptors to be assigned to the offload args.
LLVM_ABI InsertPointOrErrorTy createSection(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB)
Generator for 'omp section'.
function_ref< InsertPointOrErrorTy(InsertPointTy)> EmitFallbackCallbackTy
Callback function type for functions emitting the host fallback code that is executed when the kernel...
static LLVM_ABI TargetRegionEntryInfo getTargetEntryUniqueInfo(FileIdentifierInfoCallbackTy CallBack, vfs::FileSystem &VFS, StringRef ParentName="")
Creates a unique info for a target entry when provided a filename and line number from.
LLVM_ABI void emitBlock(BasicBlock *BB, Function *CurFn, bool IsFinished=false)
LLVM_ABI Value * getOrCreateThreadID(Value *Ident)
Return the current thread ID.
LLVM_ABI InsertPointOrErrorTy createMaster(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB)
Generator for 'omp master'.
LLVM_ABI InsertPointOrErrorTy createTargetData(const LocationDescription &Loc, InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value *DeviceID, Value *IfCond, TargetDataInfo &Info, GenMapInfoCallbackTy GenMapInfoCB, CustomMapperCallbackTy CustomMapperCB, omp::RuntimeFunction *MapperFunc=nullptr, function_ref< InsertPointOrErrorTy(InsertPointTy CodeGenIP, BodyGenTy BodyGenType)> BodyGenCB=nullptr, function_ref< void(unsigned int, Value *)> DeviceAddrCB=nullptr, Value *SrcLocInfo=nullptr)
Generator for 'omp target data'.
CallInst * createRuntimeFunctionCall(FunctionCallee Callee, ArrayRef< Value * > Args, StringRef Name="")
LLVM_ABI InsertPointOrErrorTy emitKernelLaunch(const LocationDescription &Loc, Value *OutlinedFnID, EmitFallbackCallbackTy EmitTargetCallFallbackCB, TargetKernelArgs &Args, Value *DeviceID, Value *RTLoc, InsertPointTy AllocaIP)
Generate a target region entry call and host fallback call.
StringMap< GlobalVariable *, BumpPtrAllocator > InternalVars
An ordered map of auto-generated variables to their unique names.
LLVM_ABI InsertPointOrErrorTy createCancellationPoint(const LocationDescription &Loc, omp::Directive CanceledDirective)
Generator for 'omp cancellation point'.
LLVM_ABI FunctionCallee createDispatchInitFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_init_* runtime function for the specified size IVSize and sign IVSigned.
LLVM_ABI InsertPointOrErrorTy createScan(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< llvm::Value * > ScanVars, ArrayRef< llvm::Type * > ScanVarsType, bool IsInclusive, ScanInfo *ScanRedInfo)
This directive split and directs the control flow to input phase blocks or scan phase blocks based on...
LLVM_ABI CallInst * createOMPInteropUse(const LocationDescription &Loc, Value *InteropVar, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_use.
IRBuilder<>::InsertPoint InsertPointTy
Type used throughout for insertion points.
LLVM_ABI GlobalVariable * getOrCreateInternalVariable(Type *Ty, const StringRef &Name, std::optional< unsigned > AddressSpace={})
Gets (if variable with the given name already exist) or creates internal global variable with the spe...
LLVM_ABI GlobalVariable * createOffloadMapnames(SmallVectorImpl< llvm::Constant * > &Names, std::string VarName)
Create the global variable holding the offload names information.
std::forward_list< ScanInfo > ScanInfos
Collection of owned ScanInfo objects that eventually need to be free'd.
static LLVM_ABI void writeTeamsForKernel(const Triple &T, Function &Kernel, int32_t LB, int32_t UB)
LLVM_ABI Value * calculateCanonicalLoopTripCount(const LocationDescription &Loc, Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop, const Twine &Name="loop")
Calculate the trip count of a canonical loop.
LLVM_ABI InsertPointOrErrorTy createBarrier(const LocationDescription &Loc, omp::Directive Kind, bool ForceSimpleCall=false, bool CheckCancelFlag=true)
Emitter methods for OpenMP directives.
LLVM_ABI void setCorrectMemberOfFlag(omp::OpenMPOffloadMappingFlags &Flags, omp::OpenMPOffloadMappingFlags MemberOfFlag)
Given an initial flag set, this function modifies it to contain the passed in MemberOfFlag generated ...
LLVM_ABI Error emitOffloadingArraysAndArgs(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info, TargetDataRTArgs &RTArgs, MapInfosTy &CombinedInfo, CustomMapperCallbackTy CustomMapperCB, bool IsNonContiguous=false, bool ForEndCall=false, function_ref< void(unsigned int, Value *)> DeviceAddrCB=nullptr)
Allocates memory for and populates the arrays required for offloading (offload_{baseptrs|ptrs|mappers...
LLVM_ABI Constant * getOrCreateDefaultSrcLocStr(uint32_t &SrcLocStrSize)
Return the (LLVM-IR) string describing the default source location.
LLVM_ABI InsertPointOrErrorTy createCritical(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, StringRef CriticalName, Value *HintInst)
Generator for 'omp critical'.
LLVM_ABI void createOffloadEntry(Constant *ID, Constant *Addr, uint64_t Size, int32_t Flags, GlobalValue::LinkageTypes, StringRef Name="")
Creates offloading entry for the provided entry ID ID, address Addr, size Size, and flags Flags.
static LLVM_ABI unsigned getOpenMPDefaultSimdAlign(const Triple &TargetTriple, const StringMap< bool > &Features)
Get the default alignment value for given target.
LLVM_ABI unsigned getFlagMemberOffset()
Get the offset of the OMP_MAP_MEMBER_OF field.
LLVM_ABI InsertPointOrErrorTy applyWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, bool NeedsBarrier, llvm::omp::ScheduleKind SchedKind=llvm::omp::OMP_SCHEDULE_Default, Value *ChunkSize=nullptr, bool HasSimdModifier=false, bool HasMonotonicModifier=false, bool HasNonmonotonicModifier=false, bool HasOrderedClause=false, omp::WorksharingLoopType LoopType=omp::WorksharingLoopType::ForStaticLoop, bool NoLoop=false, bool HasDistSchedule=false, Value *DistScheduleChunkSize=nullptr)
Modifies the canonical loop to be a workshare loop.
LLVM_ABI InsertPointOrErrorTy createAtomicCapture(const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X, AtomicOpValue &V, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool UpdateExpr, bool IsPostfixUpdate, bool IsXBinopExpr, bool IsIgnoreDenormalMode=false, bool IsFineGrainedMemory=false, bool IsRemoteMemory=false)
Emit atomic update for constructs: — Only Scalar data types V = X; X = X BinOp Expr ,...
LLVM_ABI void createOffloadEntriesAndInfoMetadata(EmitMetadataErrorReportFunctionTy &ErrorReportFunction)
LLVM_ABI void applySimd(CanonicalLoopInfo *Loop, MapVector< Value *, Value * > AlignedVars, Value *IfCond, omp::OrderKind Order, ConstantInt *Simdlen, ConstantInt *Safelen)
Add metadata to simd-ize a loop.
LLVM_ABI InsertPointOrErrorTy createAtomicUpdate(const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool IsXBinopExpr, bool IsIgnoreDenormalMode=false, bool IsFineGrainedMemory=false, bool IsRemoteMemory=false)
Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X For complex Operations: X = ...
std::function< std::tuple< std::string, uint64_t >()> FileIdentifierInfoCallbackTy
bool isLastFinalizationInfoCancellable(omp::Directive DK)
Return true if the last entry in the finalization stack is of kind DK and cancellable.
LLVM_ABI InsertPointTy emitTargetKernel(const LocationDescription &Loc, InsertPointTy AllocaIP, Value *&Return, Value *Ident, Value *DeviceID, Value *NumTeams, Value *NumThreads, Value *HostPtr, ArrayRef< Value * > KernelArgs)
Generate a target region entry call.
LLVM_ABI GlobalVariable * createOffloadMaptypes(SmallVectorImpl< uint64_t > &Mappings, std::string VarName)
Create the global variable holding the offload mappings information.
LLVM_ABI ~OpenMPIRBuilder()
LLVM_ABI CallInst * createCachedThreadPrivate(const LocationDescription &Loc, llvm::Value *Pointer, llvm::ConstantInt *Size, const llvm::Twine &Name=Twine(""))
Create a runtime call for kmpc_threadprivate_cached.
IRBuilder Builder
The LLVM-IR Builder used to create IR.
LLVM_ABI InsertPointOrErrorTy createTarget(const LocationDescription &Loc, bool IsOffloadEntry, OpenMPIRBuilder::InsertPointTy AllocaIP, OpenMPIRBuilder::InsertPointTy CodeGenIP, TargetDataInfo &Info, TargetRegionEntryInfo &EntryInfo, const TargetKernelDefaultAttrs &DefaultAttrs, const TargetKernelRuntimeAttrs &RuntimeAttrs, Value *IfCond, SmallVectorImpl< Value * > &Inputs, GenMapInfoCallbackTy GenMapInfoCB, TargetBodyGenCallbackTy BodyGenCB, TargetGenArgAccessorsCallbackTy ArgAccessorFuncCB, CustomMapperCallbackTy CustomMapperCB, const SmallVector< DependData > &Dependencies, bool HasNowait=false, Value *DynCGroupMem=nullptr, omp::OMPDynGroupprivateFallbackType DynCGroupMemFallback=omp::OMPDynGroupprivateFallbackType::Abort)
Generator for 'omp target'.
LLVM_ABI GlobalValue * createGlobalFlag(unsigned Value, StringRef Name)
Create a hidden global flag Name in the module with initial value Value.
LLVM_ABI void emitOffloadingArraysArgument(IRBuilderBase &Builder, OpenMPIRBuilder::TargetDataRTArgs &RTArgs, OpenMPIRBuilder::TargetDataInfo &Info, bool ForEndCall=false)
Emit the arguments to be passed to the runtime library based on the arrays of base pointers,...
LLVM_ABI InsertPointOrErrorTy createMasked(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, Value *Filter)
Generator for 'omp masked'.
LLVM_ABI Expected< CanonicalLoopInfo * > createCanonicalLoop(const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, Value *TripCount, const Twine &Name="loop")
Generator for the control flow structure of an OpenMP canonical loop.
function_ref< Expected< InsertPointTy >( InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value *DestPtr, Value *SrcPtr)> TaskDupCallbackTy
Callback type for task duplication function code generation.
LLVM_ABI Value * getSizeInBytes(Value *BasePtr)
Computes the size of type in bytes.
llvm::function_ref< llvm::Error( InsertPointTy BodyIP, llvm::Value *LinearIV)> IteratorBodyGenTy
LLVM_ABI InsertPointOrErrorTy createReductionsGPU(const LocationDescription &Loc, InsertPointTy AllocaIP, InsertPointTy CodeGenIP, ArrayRef< ReductionInfo > ReductionInfos, ArrayRef< bool > IsByRef, bool IsNoWait=false, bool IsTeamsReduction=false, ReductionGenCBKind ReductionGenCBKind=ReductionGenCBKind::MLIR, std::optional< omp::GV > GridValue={}, unsigned ReductionBufNum=1024, Value *SrcLocInfo=nullptr)
Design of OpenMP reductions on the GPU.
LLVM_ABI Expected< Function * > emitUserDefinedMapper(function_ref< MapInfosOrErrorTy(InsertPointTy CodeGenIP, llvm::Value *PtrPHI, llvm::Value *BeginArg)> PrivAndGenMapInfoCB, llvm::Type *ElemTy, StringRef FuncName, CustomMapperCallbackTy CustomMapperCB)
Emit the user-defined mapper function.
LLVM_ABI FunctionCallee createDispatchDeinitFunction()
Returns __kmpc_dispatch_deinit runtime function.
LLVM_ABI void registerTargetGlobalVariable(OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause, OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause, bool IsDeclaration, bool IsExternallyVisible, TargetRegionEntryInfo EntryInfo, StringRef MangledName, std::vector< GlobalVariable * > &GeneratedRefs, bool OpenMPSIMD, std::vector< Triple > TargetTriple, std::function< Constant *()> GlobalInitializer, std::function< GlobalValue::LinkageTypes()> VariableLinkage, Type *LlvmPtrTy, Constant *Addr)
Registers a target variable for device or host.
BodyGenTy
Type of BodyGen to use for region codegen.
LLVM_ABI CanonicalLoopInfo * fuseLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops)
Fuse a sequence of loops.
LLVM_ABI void emitX86DeclareSimdFunction(llvm::Function *Fn, unsigned NumElements, const llvm::APSInt &VLENVal, llvm::ArrayRef< DeclareSimdAttrTy > ParamAttrs, DeclareSimdBranch Branch)
Emit x86 vector-function ABI attributes for a declare simd function.
SmallVector< llvm::Function *, 16 > ConstantAllocaRaiseCandidates
A collection of candidate target functions that's constant allocas will attempt to be raised on a cal...
OffloadEntriesInfoManager OffloadInfoManager
Info manager to keep track of target regions.
static LLVM_ABI std::pair< int32_t, int32_t > readTeamBoundsForKernel(const Triple &T, Function &Kernel)
Read/write a bounds on teams for Kernel.
const std::string ompOffloadInfoName
OMP Offload Info Metadata name string.
Expected< InsertPointTy > InsertPointOrErrorTy
Type used to represent an insertion point or an error value.
LLVM_ABI InsertPointTy createCopyPrivate(const LocationDescription &Loc, llvm::Value *BufSize, llvm::Value *CpyBuf, llvm::Value *CpyFn, llvm::Value *DidIt)
Generator for __kmpc_copyprivate.
LLVM_ABI InsertPointOrErrorTy createSections(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< StorableBodyGenCallbackTy > SectionCBs, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, bool IsCancellable, bool IsNowait)
Generator for 'omp sections'.
LLVM_ABI InsertPointOrErrorTy emitTargetTask(TargetTaskBodyCallbackTy TaskBodyCB, Value *DeviceID, Value *RTLoc, OpenMPIRBuilder::InsertPointTy AllocaIP, const SmallVector< llvm::OpenMPIRBuilder::DependData > &Dependencies, const TargetDataRTArgs &RTArgs, bool HasNoWait)
Generate a target-task for the target construct.
std::function< void(EmitMetadataErrorKind, TargetRegionEntryInfo)> EmitMetadataErrorReportFunctionTy
Callback function type.
LLVM_ABI Expected< ScanInfo * > scanInfoInitialize()
Creates a ScanInfo object, allocates and returns the pointer.
LLVM_ABI InsertPointTy createAtomicRead(const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V, AtomicOrdering AO, InsertPointTy AllocaIP)
Emit atomic Read for : V = X — Only Scalar data types.
bool updateToLocation(const LocationDescription &Loc)
Update the internal location to Loc.
LLVM_ABI void createFlush(const LocationDescription &Loc)
Generator for 'omp flush'.
LLVM_ABI Constant * getAddrOfDeclareTargetVar(OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause, OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause, bool IsDeclaration, bool IsExternallyVisible, TargetRegionEntryInfo EntryInfo, StringRef MangledName, std::vector< GlobalVariable * > &GeneratedRefs, bool OpenMPSIMD, std::vector< Triple > TargetTriple, Type *LlvmPtrTy, std::function< Constant *()> GlobalInitializer, std::function< GlobalValue::LinkageTypes()> VariableLinkage)
Retrieve (or create if non-existent) the address of a declare target variable, used in conjunction wi...
EmitMetadataErrorKind
The kind of errors that can occur when emitting the offload entries and metadata.
@ EMIT_MD_DECLARE_TARGET_ERROR
@ EMIT_MD_GLOBAL_VAR_INDIRECT_ERROR
@ EMIT_MD_GLOBAL_VAR_LINK_ERROR
@ EMIT_MD_TARGET_REGION_ERROR
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Pseudo-analysis pass that exposes the PassInstrumentation to pass managers.
Class to represent pointers.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
Analysis pass that exposes the ScalarEvolution for a function.
LLVM_ABI ScalarEvolution run(Function &F, FunctionAnalysisManager &AM)
The main scalar evolution driver.
ScanInfo holds the information to assist in lowering of Scan reduction.
llvm::SmallDenseMap< llvm::Value *, llvm::Value * > * ScanBuffPtrs
Maps the private reduction variable to the pointer of the temporary buffer.
llvm::BasicBlock * OMPScanLoopExit
Exit block of loop body.
llvm::Value * IV
Keeps track of value of iteration variable for input/scan loop to be used for Scan directive lowering...
llvm::BasicBlock * OMPAfterScanBlock
Dominates the body of the loop before scan directive.
llvm::BasicBlock * OMPScanInit
Block before loop body where scan initializations are done.
llvm::BasicBlock * OMPBeforeScanBlock
Dominates the body of the loop before scan directive.
llvm::BasicBlock * OMPScanFinish
Block after loop body where scan finalizations are done.
llvm::Value * Span
Stores the span of canonical loop being lowered to be used for temporary buffer allocation or Finaliz...
bool OMPFirstScanLoop
If true, it indicates Input phase is lowered; else it indicates ScanPhase is lowered.
llvm::BasicBlock * OMPScanDispatch
Controls the flow to before or after scan blocks.
A vector that has set insertion semantics.
bool remove_if(UnaryPredicate P)
Remove items from the set vector based on a predicate function.
bool empty() const
Determine if the SetVector is empty or not.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
bool test(unsigned Idx) const
bool all() const
Returns true if all bits are set.
bool any() const
Returns true if any bit is set.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
bool remove_if(UnaryPredicate P)
Remove elements that match the given predicate.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
void append(StringRef RHS)
Append from a StringRef.
StringRef str() const
Explicit conversion to StringRef.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAlignment(Align Align)
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
ValueTy lookup(StringRef Key) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
StringRef - Represent a constant reference to a string, i.e.
std::string str() const
str - Get the contents as an std::string.
constexpr bool empty() const
empty - Check if the string is empty.
constexpr size_t size() const
size - Get the string size.
size_t count(char C) const
Return the number of occurrences of C in the string.
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
StringRef drop_back(size_t N=1) const
Return a StringRef equal to 'this' but with the last N elements dropped.
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
static LLVM_ABI StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
Type * getElementType(unsigned N) const
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
Analysis pass providing the TargetTransformInfo.
LLVM_ABI Result run(const Function &F, FunctionAnalysisManager &)
TargetTransformInfo Result
Analysis pass providing the TargetLibraryInfo.
Target - Wrapper for Target specific information.
TargetMachine * createTargetMachine(const Triple &TT, StringRef CPU, StringRef Features, const TargetOptions &Options, std::optional< Reloc::Model > RM, std::optional< CodeModel::Model > CM=std::nullopt, CodeGenOptLevel OL=CodeGenOptLevel::Default, bool JIT=false) const
createTargetMachine - Create a target specific machine implementation for the specified Triple.
Triple - Helper class for working with autoconf configuration names.
bool isPPC() const
Tests whether the target is PowerPC (32- or 64-bit LE or BE).
bool isX86() const
Tests whether the target is x86 (32- or 64-bit).
bool isWasm() const
Tests whether the target is wasm (32- and 64-bit).
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
LLVM_ABI unsigned getIntegerBitWidth() const
LLVM_ABI Type * getStructElementType(unsigned N) const
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
bool isStructTy() const
True if this is an instance of StructType.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isVoidTy() const
Return true if this is 'void'.
Unconditional Branch instruction.
static UncondBrInst * Create(BasicBlock *Target, InsertPosition InsertBefore=nullptr)
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
This function has undefined behavior.
Produce an estimate of the unrolled cost of the specified loop.
LLVM_ABI bool canUnroll() const
Whether it is legal to unroll this loop.
uint64_t getRolledLoopSize() const
A Use represents the edge between a Value definition and its users.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
LLVM_ABI bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
LLVM_ABI User * getUniqueUndroppableUser()
Return true if there is exactly one unique user of this value that cannot be dropped (that user can h...
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
LLVM_ABI bool replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)
Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...
iterator_range< use_iterator > uses()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
The virtual file system interface.
llvm::ErrorOr< std::unique_ptr< llvm::MemoryBuffer > > getBufferForFile(const Twine &Name, int64_t FileSize=-1, bool RequiresNullTerminator=true, bool IsVolatile=false, bool IsText=true)
This is a convenience method that opens a file, gets its content and then closes the file.
virtual llvm::ErrorOr< Status > status(const Twine &Path)=0
Get the status of the entry at Path, if one exists.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
constexpr StringLiteral MaxNTID("nvvm.maxntid")
constexpr StringLiteral MaxClusterRank("nvvm.maxclusterrank")
initializer< Ty > init(const Ty &Val)
@ User
could "use" a pointer
LLVM_ABI GlobalVariable * emitOffloadingEntry(Module &M, object::OffloadKind Kind, Constant *Addr, StringRef Name, uint64_t Size, uint32_t Flags, uint64_t Data, Constant *AuxAddr=nullptr, StringRef SectionName="llvm_offload_entries")
Create an offloading section struct used to register this global at runtime.
OpenMPOffloadMappingFlags
Values for bit flags used to specify the mapping type for offloading.
@ OMP_MAP_PTR_AND_OBJ
The element being mapped is a pointer-pointee pair; both the pointer and the pointee should be mapped...
@ OMP_MAP_MEMBER_OF
The 16 MSBs of the flags indicate whether the entry is member of some struct/class.
IdentFlag
IDs for all omp runtime library ident_t flag encodings (see their defintion in openmp/runtime/src/kmp...
RuntimeFunction
IDs for all omp runtime library (RTL) functions.
constexpr const GV & getAMDGPUGridValues()
static constexpr GV SPIRVGridValues
For generic SPIR-V GPUs.
OMPDynGroupprivateFallbackType
The fallback types for the dyn_groupprivate clause.
static constexpr GV NVPTXGridValues
For Nvidia GPUs.
Function * Kernel
Summary of a kernel (=entry point for target offloading).
WorksharingLoopType
A type of worksharing loop construct.
OMPAtomicCompareOp
Atomic compare operations. Currently OpenMP only supports ==, >, and <.
NodeAddr< PhiNode * > Phi
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
LLVM_ABI BasicBlock * splitBBWithSuffix(IRBuilderBase &Builder, bool CreateBranch, llvm::Twine Suffix=".split")
Like splitBB, but reuses the current block's name for the new name.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
FunctionAddr VTableAddr Value
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
hash_code hash_value(const FixedPointSemantics &Val)
LLVM_ABI Expected< std::unique_ptr< Module > > parseBitcodeFile(MemoryBufferRef Buffer, LLVMContext &Context, ParserCallbacks Callbacks={})
Read the specified bitcode file, returning the module.
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
@ LLVM_MARK_AS_BITMASK_ENUM
LLVM_ABI BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, bool MapAtoms=true)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
unsigned getPointerAddressSpace(const Type *T)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
auto successors(const MachineBasicBlock *BB)
LLVM_ABI std::error_code inconvertibleErrorCode()
The value returned by this function can be returned from convertToErrorCode for Error values where no...
testing::Matcher< const detail::ErrorHolder & > Failed()
constexpr from_range_t from_range
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
LLVM_ABI BasicBlock * splitBB(IRBuilderBase::InsertPoint IP, bool CreateBranch, DebugLoc DL, llvm::Twine Name={})
Split a BasicBlock at an InsertPoint, even if the block is degenerate (missing the terminator).
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
std::string utostr(uint64_t X, bool isNeg=false)
ErrorOr< T > expectedToErrorOrAndEmitErrors(LLVMContext &Ctx, Expected< T > Val)
bool isa_and_nonnull(const Y &Val)
LLVM_ABI bool convertUsersOfConstantsToInstructions(ArrayRef< Constant * > Consts, Function *RestrictToFunc=nullptr, bool RemoveDeadConstants=true, bool IncludeSelf=false)
Replace constant expressions users of the given constants with instructions.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
FunctionAddr VTableAddr uintptr_t uintptr_t Version
auto reverse(ContainerTy &&C)
TargetTransformInfo::PeelingPreferences gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, std::optional< bool > UserAllowPeeling, std::optional< bool > UserAllowProfileBasedPeeling, bool UnrollingSpecficValues=false)
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
CodeGenOptLevel
Code generation optimization level.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Error make_error(ArgTs &&... Args)
Make a Error instance representing failure using the given error info type.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
LLVM_ABI bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
@ Mul
Product of integers.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the specified block at the specified instruction.
FunctionAddr VTableAddr Next
DWARFExpression::Operation Op
LLVM_ABI void remapInstructionsInBlocks(ArrayRef< BasicBlock * > Blocks, ValueToValueMapTy &VMap)
Remaps instructions in Blocks using the mapping in VMap.
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, llvm::OptimizationRemarkEmitter &ORE, int OptLevel, std::optional< unsigned > UserThreshold, std::optional< unsigned > UserCount, std::optional< bool > UserAllowPartial, std::optional< bool > UserRuntime, std::optional< bool > UserUpperBound, std::optional< unsigned > UserFullUnrollMaxCount)
Gather the various unrolling parameters based on the defaults, compiler flags, TTI overrides and user...
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
LLVM_ABI void spliceBB(IRBuilderBase::InsertPoint IP, BasicBlock *New, bool CreateBranch, DebugLoc DL)
Move the instruction after an InsertPoint to the beginning of another BasicBlock.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto predecessors(const MachineBasicBlock *BB)
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
LLVM_ABI Constant * ConstantFoldInsertValueInstruction(Constant *Agg, Constant *Val, ArrayRef< unsigned > Idxs)
Attempt to constant fold an insertvalue instruction with the specified operands and indices.
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI void DeleteDeadBlocks(ArrayRef< BasicBlock * > BBs, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified blocks from BB.
bool to_integer(StringRef S, N &Num, unsigned Base=0)
Convert the string S to an integer of the specified type using the radix Base. If Base is 0,...
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
LLVM_ABI void computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, AssumptionCache *AC, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, OptimizationRemarkEmitter *ORE, unsigned TripCount, unsigned MaxTripCount, bool MaxOrZero, unsigned TripMultiple, const UnrollCostEstimator &UCE, TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP)
This struct is a compact representation of a valid (non-zero power of two) alignment.
static LLVM_ABI void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
A struct to pack the relevant information for an OpenMP affinity clause.
a struct to pack relevant information while generating atomic Ops
Error mergeFiniBB(IRBuilderBase &Builder, BasicBlock *ExistingFiniBB)
For cases where there is an unavoidable existing finalization block (e.g.
Expected< BasicBlock * > getFiniBB(IRBuilderBase &Builder)
The basic block to which control should be transferred to implement the FiniCB.
Description of a LLVM-IR insertion point (IP) and a debug/source location (filename,...
MapNonContiguousArrayTy Offsets
MapNonContiguousArrayTy Counts
MapNonContiguousArrayTy Strides
This structure contains combined information generated for mappable clauses, including base pointers,...
MapDeviceInfoArrayTy DevicePointers
MapValuesArrayTy BasePointers
MapValuesArrayTy Pointers
StructNonContiguousInfo NonContigInfo
Helper that contains information about regions we need to outline during finalization.
PostOutlineCBTy PostOutlineCB
LLVM_ABI void collectBlocks(SmallPtrSetImpl< BasicBlock * > &BlockSet, SmallVectorImpl< BasicBlock * > &BlockVector)
Collect all blocks in between EntryBB and ExitBB in both the given vector and set.
SmallVector< Value *, 2 > ExcludeArgsFromAggregate
bool FixUpNonEntryAllocas
BasicBlock * OuterAllocaBB
Information about an OpenMP reduction.
EvalKind EvaluationKind
Reduction evaluation kind - scalar, complex or aggregate.
ReductionGenAtomicCBTy AtomicReductionGen
Callback for generating the atomic reduction body, may be null.
ReductionGenCBTy ReductionGen
Callback for generating the reduction body.
Value * Variable
Reduction variable of pointer type.
Value * PrivateVariable
Thread-private partial reduction variable.
ReductionGenClangCBTy ReductionGenClang
Clang callback for generating the reduction body.
Type * ElementType
Reduction element type, must match pointee type of variable.
ReductionGenDataPtrPtrCBTy DataPtrPtrGen
Container for the arguments used to pass data to the runtime library.
Value * SizesArray
The array of sizes passed to the runtime library.
Value * PointersArray
The array of section pointers passed to the runtime library.
Value * MappersArray
The array of user-defined mappers passed to the runtime library.
Value * MapTypesArrayEnd
The array of map types passed to the runtime library for the end of the region, or nullptr if there a...
Value * BasePointersArray
The array of base pointer passed to the runtime library.
Value * MapTypesArray
The array of map types passed to the runtime library for the beginning of the region or for the entir...
Value * MapNamesArray
The array of original declaration names of mapped pointers sent to the runtime library for debugging.
Data structure that contains the needed information to construct the kernel args vector.
ArrayRef< Value * > NumThreads
The number of threads.
TargetDataRTArgs RTArgs
Arguments passed to the runtime library.
Value * NumIterations
The number of iterations.
Value * DynCGroupMem
The size of the dynamic shared memory.
unsigned NumTargetItems
Number of arguments passed to the runtime library.
bool HasNoWait
True if the kernel has 'no wait' clause.
ArrayRef< Value * > NumTeams
The number of teams.
omp::OMPDynGroupprivateFallbackType DynCGroupMemFallback
The fallback mechanism for the shared memory.
Container to pass the default attributes with which a kernel must be launched, used to set kernel att...
omp::OMPTgtExecModeFlags ExecFlags
SmallVector< int32_t, 3 > MaxTeams
Container to pass LLVM IR runtime values or constants related to the number of teams and threads with...
Value * DeviceID
Device ID value used in the kernel launch.
SmallVector< Value *, 3 > MaxTeams
Value * MaxThreads
'parallel' construct 'num_threads' clause value, if present and it is an SPMD kernel.
Value * LoopTripCount
Total number of iterations of the SPMD or Generic-SPMD kernel or null if it is a generic kernel.
SmallVector< Value *, 3 > TargetThreadLimit
SmallVector< Value *, 3 > TeamsThreadLimit
Data structure to contain the information needed to uniquely identify a target entry.
static LLVM_ABI void getTargetRegionEntryFnName(SmallVectorImpl< char > &Name, StringRef ParentName, unsigned DeviceID, unsigned FileID, unsigned Line, unsigned Count)
static constexpr const char * KernelNamePrefix
The prefix used for kernel names.
static const Target * lookupTarget(StringRef TripleStr, std::string &Error)
lookupTarget - Lookup a target based on a target triple.
Defines various target-specific GPU grid values that must be consistent between host RTL (plugin),...