69#define DEBUG_TYPE "openmp-ir-builder"
76 cl::desc(
"Use optimistic attributes describing "
77 "'as-if' properties of runtime calls."),
81 "openmp-ir-builder-unroll-threshold-factor",
cl::Hidden,
82 cl::desc(
"Factor for the unroll threshold to account for code "
83 "simplifications still taking place"),
94 if (!IP1.isSet() || !IP2.isSet())
96 return IP1.getBlock() == IP2.getBlock() && IP1.getPoint() == IP2.getPoint();
101 switch (SchedType & ~OMPScheduleType::MonotonicityMask) {
102 case OMPScheduleType::UnorderedStaticChunked:
103 case OMPScheduleType::UnorderedStatic:
104 case OMPScheduleType::UnorderedDynamicChunked:
105 case OMPScheduleType::UnorderedGuidedChunked:
106 case OMPScheduleType::UnorderedRuntime:
107 case OMPScheduleType::UnorderedAuto:
108 case OMPScheduleType::UnorderedTrapezoidal:
109 case OMPScheduleType::UnorderedGreedy:
110 case OMPScheduleType::UnorderedBalanced:
111 case OMPScheduleType::UnorderedGuidedIterativeChunked:
112 case OMPScheduleType::UnorderedGuidedAnalyticalChunked:
113 case OMPScheduleType::UnorderedSteal:
114 case OMPScheduleType::UnorderedStaticBalancedChunked:
115 case OMPScheduleType::UnorderedGuidedSimd:
116 case OMPScheduleType::UnorderedRuntimeSimd:
117 case OMPScheduleType::OrderedStaticChunked:
118 case OMPScheduleType::OrderedStatic:
119 case OMPScheduleType::OrderedDynamicChunked:
120 case OMPScheduleType::OrderedGuidedChunked:
121 case OMPScheduleType::OrderedRuntime:
122 case OMPScheduleType::OrderedAuto:
123 case OMPScheduleType::OrderdTrapezoidal:
124 case OMPScheduleType::NomergeUnorderedStaticChunked:
125 case OMPScheduleType::NomergeUnorderedStatic:
126 case OMPScheduleType::NomergeUnorderedDynamicChunked:
127 case OMPScheduleType::NomergeUnorderedGuidedChunked:
128 case OMPScheduleType::NomergeUnorderedRuntime:
129 case OMPScheduleType::NomergeUnorderedAuto:
130 case OMPScheduleType::NomergeUnorderedTrapezoidal:
131 case OMPScheduleType::NomergeUnorderedGreedy:
132 case OMPScheduleType::NomergeUnorderedBalanced:
133 case OMPScheduleType::NomergeUnorderedGuidedIterativeChunked:
134 case OMPScheduleType::NomergeUnorderedGuidedAnalyticalChunked:
135 case OMPScheduleType::NomergeUnorderedSteal:
136 case OMPScheduleType::NomergeOrderedStaticChunked:
137 case OMPScheduleType::NomergeOrderedStatic:
138 case OMPScheduleType::NomergeOrderedDynamicChunked:
139 case OMPScheduleType::NomergeOrderedGuidedChunked:
140 case OMPScheduleType::NomergeOrderedRuntime:
141 case OMPScheduleType::NomergeOrderedAuto:
142 case OMPScheduleType::NomergeOrderedTrapezoidal:
143 case OMPScheduleType::OrderedDistributeChunked:
144 case OMPScheduleType::OrderedDistribute:
152 SchedType & OMPScheduleType::MonotonicityMask;
153 if (MonotonicityFlags == OMPScheduleType::MonotonicityMask)
165 Builder.restoreIP(IP);
173 return T.isAMDGPU() ||
T.isNVPTX() ||
T.isSPIRV();
179 Kernel->getFnAttribute(
"target-features").getValueAsString();
180 if (Features.
count(
"+wavefrontsize64"))
195 bool HasSimdModifier,
bool HasDistScheduleChunks) {
197 switch (ClauseKind) {
198 case OMP_SCHEDULE_Default:
199 case OMP_SCHEDULE_Static:
200 return HasChunks ? OMPScheduleType::BaseStaticChunked
201 : OMPScheduleType::BaseStatic;
202 case OMP_SCHEDULE_Dynamic:
203 return OMPScheduleType::BaseDynamicChunked;
204 case OMP_SCHEDULE_Guided:
205 return HasSimdModifier ? OMPScheduleType::BaseGuidedSimd
206 : OMPScheduleType::BaseGuidedChunked;
207 case OMP_SCHEDULE_Auto:
209 case OMP_SCHEDULE_Runtime:
210 return HasSimdModifier ? OMPScheduleType::BaseRuntimeSimd
211 : OMPScheduleType::BaseRuntime;
212 case OMP_SCHEDULE_Distribute:
213 return HasDistScheduleChunks ? OMPScheduleType::BaseDistributeChunked
214 : OMPScheduleType::BaseDistribute;
222 bool HasOrderedClause) {
223 assert((BaseScheduleType & OMPScheduleType::ModifierMask) ==
224 OMPScheduleType::None &&
225 "Must not have ordering nor monotonicity flags already set");
228 ? OMPScheduleType::ModifierOrdered
229 : OMPScheduleType::ModifierUnordered;
230 OMPScheduleType OrderingScheduleType = BaseScheduleType | OrderingModifier;
233 if (OrderingScheduleType ==
234 (OMPScheduleType::BaseGuidedSimd | OMPScheduleType::ModifierOrdered))
235 return OMPScheduleType::OrderedGuidedChunked;
236 else if (OrderingScheduleType == (OMPScheduleType::BaseRuntimeSimd |
237 OMPScheduleType::ModifierOrdered))
238 return OMPScheduleType::OrderedRuntime;
240 return OrderingScheduleType;
246 bool HasSimdModifier,
bool HasMonotonic,
247 bool HasNonmonotonic,
bool HasOrderedClause) {
248 assert((ScheduleType & OMPScheduleType::MonotonicityMask) ==
249 OMPScheduleType::None &&
250 "Must not have monotonicity flags already set");
251 assert((!HasMonotonic || !HasNonmonotonic) &&
252 "Monotonic and Nonmonotonic are contradicting each other");
255 return ScheduleType | OMPScheduleType::ModifierMonotonic;
256 }
else if (HasNonmonotonic) {
257 return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
267 if ((BaseScheduleType == OMPScheduleType::BaseStatic) ||
268 (BaseScheduleType == OMPScheduleType::BaseStaticChunked) ||
274 return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
282 bool HasSimdModifier,
bool HasMonotonicModifier,
283 bool HasNonmonotonicModifier,
bool HasOrderedClause,
284 bool HasDistScheduleChunks) {
286 ClauseKind, HasChunks, HasSimdModifier, HasDistScheduleChunks);
290 OrderedSchedule, HasSimdModifier, HasMonotonicModifier,
291 HasNonmonotonicModifier, HasOrderedClause);
304 if (
Instruction *Term = Source->getTerminatorOrNull()) {
313 NewBr->setDebugLoc(
DL);
318 assert(New->getFirstInsertionPt() == New->begin() &&
319 "Target BB must not have PHI nodes");
335 New->splice(New->begin(), Old, IP.
getPoint(), Old->
end());
339 NewBr->setDebugLoc(
DL);
351 Builder.SetInsertPoint(Old);
355 Builder.SetCurrentDebugLocation(
DebugLoc);
365 New->replaceSuccessorsPhiUsesWith(Old, New);
374 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
376 Builder.SetInsertPoint(Builder.GetInsertBlock());
379 Builder.SetCurrentDebugLocation(
DebugLoc);
388 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
390 Builder.SetInsertPoint(Builder.GetInsertBlock());
393 Builder.SetCurrentDebugLocation(
DebugLoc);
410 const Twine &Name =
"",
bool AsPtr =
true,
411 bool Is64Bit =
false) {
412 Builder.restoreIP(OuterAllocaIP);
416 Builder.CreateAlloca(IntTy,
nullptr, Name +
".addr");
420 FakeVal = FakeValAddr;
422 FakeVal = Builder.CreateLoad(IntTy, FakeValAddr, Name +
".val");
427 Builder.restoreIP(InnerAllocaIP);
430 UseFakeVal = Builder.CreateLoad(IntTy, FakeVal, Name +
".use");
433 FakeVal, Is64Bit ? Builder.getInt64(10) : Builder.getInt32(10)));
446enum OpenMPOffloadingRequiresDirFlags {
448 OMP_REQ_UNDEFINED = 0x000,
450 OMP_REQ_NONE = 0x001,
452 OMP_REQ_REVERSE_OFFLOAD = 0x002,
454 OMP_REQ_UNIFIED_ADDRESS = 0x004,
456 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008,
458 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010,
465 : RequiresFlags(OMP_REQ_UNDEFINED) {}
469 bool HasRequiresReverseOffload,
bool HasRequiresUnifiedAddress,
470 bool HasRequiresUnifiedSharedMemory,
bool HasRequiresDynamicAllocators)
473 RequiresFlags(OMP_REQ_UNDEFINED) {
474 if (HasRequiresReverseOffload)
475 RequiresFlags |= OMP_REQ_REVERSE_OFFLOAD;
476 if (HasRequiresUnifiedAddress)
477 RequiresFlags |= OMP_REQ_UNIFIED_ADDRESS;
478 if (HasRequiresUnifiedSharedMemory)
479 RequiresFlags |= OMP_REQ_UNIFIED_SHARED_MEMORY;
480 if (HasRequiresDynamicAllocators)
481 RequiresFlags |= OMP_REQ_DYNAMIC_ALLOCATORS;
485 return RequiresFlags & OMP_REQ_REVERSE_OFFLOAD;
489 return RequiresFlags & OMP_REQ_UNIFIED_ADDRESS;
493 return RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY;
497 return RequiresFlags & OMP_REQ_DYNAMIC_ALLOCATORS;
502 :
static_cast<int64_t
>(OMP_REQ_NONE);
507 RequiresFlags |= OMP_REQ_REVERSE_OFFLOAD;
509 RequiresFlags &= ~OMP_REQ_REVERSE_OFFLOAD;
514 RequiresFlags |= OMP_REQ_UNIFIED_ADDRESS;
516 RequiresFlags &= ~OMP_REQ_UNIFIED_ADDRESS;
521 RequiresFlags |= OMP_REQ_UNIFIED_SHARED_MEMORY;
523 RequiresFlags &= ~OMP_REQ_UNIFIED_SHARED_MEMORY;
528 RequiresFlags |= OMP_REQ_DYNAMIC_ALLOCATORS;
530 RequiresFlags &= ~OMP_REQ_DYNAMIC_ALLOCATORS;
543 constexpr size_t MaxDim = 3;
548 Value *DynCGroupMemFallbackFlag =
550 DynCGroupMemFallbackFlag =
Builder.CreateShl(DynCGroupMemFallbackFlag, 2);
551 Value *Flags =
Builder.CreateOr(HasNoWaitFlag, DynCGroupMemFallbackFlag);
557 Value *NumThreads3D =
588 auto FnAttrs = Attrs.getFnAttrs();
589 auto RetAttrs = Attrs.getRetAttrs();
591 for (
size_t ArgNo = 0; ArgNo < Fn.
arg_size(); ++ArgNo)
596 bool Param =
true) ->
void {
597 bool HasSignExt = AS.hasAttribute(Attribute::SExt);
598 bool HasZeroExt = AS.hasAttribute(Attribute::ZExt);
599 if (HasSignExt || HasZeroExt) {
600 assert(AS.getNumAttributes() == 1 &&
601 "Currently not handling extension attr combined with others.");
603 if (
auto AK = TargetLibraryInfo::getExtAttrForI32Param(
T, HasSignExt))
606 TargetLibraryInfo::getExtAttrForI32Return(
T, HasSignExt))
613#define OMP_ATTRS_SET(VarName, AttrSet) AttributeSet VarName = AttrSet;
614#include "llvm/Frontend/OpenMP/OMPKinds.def"
618#define OMP_RTL_ATTRS(Enum, FnAttrSet, RetAttrSet, ArgAttrSets) \
620 FnAttrs = FnAttrs.addAttributes(Ctx, FnAttrSet); \
621 addAttrSet(RetAttrs, RetAttrSet, false); \
622 for (size_t ArgNo = 0; ArgNo < ArgAttrSets.size(); ++ArgNo) \
623 addAttrSet(ArgAttrs[ArgNo], ArgAttrSets[ArgNo]); \
624 Fn.setAttributes(AttributeList::get(Ctx, FnAttrs, RetAttrs, ArgAttrs)); \
626#include "llvm/Frontend/OpenMP/OMPKinds.def"
640#define OMP_RTL(Enum, Str, IsVarArg, ReturnType, ...) \
642 FnTy = FunctionType::get(ReturnType, ArrayRef<Type *>{__VA_ARGS__}, \
644 Fn = M.getFunction(Str); \
646#include "llvm/Frontend/OpenMP/OMPKinds.def"
652#define OMP_RTL(Enum, Str, ...) \
654 Fn = Function::Create(FnTy, GlobalValue::ExternalLinkage, Str, M); \
656#include "llvm/Frontend/OpenMP/OMPKinds.def"
660 if (FnID == OMPRTL___kmpc_fork_call || FnID == OMPRTL___kmpc_fork_teams) {
670 LLVMContext::MD_callback,
672 2, {-1, -1},
true)}));
685 assert(Fn &&
"Failed to create OpenMP runtime function");
696 Builder.SetInsertPoint(FiniBB);
708 FiniBB = OtherFiniBB;
710 Builder.SetInsertPoint(FiniBB->getFirstNonPHIIt());
718 auto EndIt = FiniBB->end();
719 if (FiniBB->size() >= 1)
720 if (
auto Prev = std::prev(EndIt); Prev->isTerminator())
725 FiniBB->replaceAllUsesWith(OtherFiniBB);
726 FiniBB->eraseFromParent();
727 FiniBB = OtherFiniBB;
734 assert(Fn &&
"Failed to create OpenMP runtime function pointer");
757 for (
auto Inst =
Block->getReverseIterator()->begin();
758 Inst !=
Block->getReverseIterator()->end();) {
787 Block.getParent()->getEntryBlock().getTerminator()->getIterator();
812 ParallelRegionBlockSet.
clear();
814 OI.collectBlocks(ParallelRegionBlockSet, Blocks);
824 bool ArgsInZeroAddressSpace =
Config.isTargetDevice();
833 ".omp_par", ArgsInZeroAddressSpace);
837 <<
" Exit: " << OI.ExitBB->getName() <<
"\n");
838 assert(Extractor.isEligible() &&
839 "Expected OpenMP outlining to be possible!");
841 for (
auto *V : OI.ExcludeArgsFromAggregate)
842 Extractor.excludeArgFromAggregate(V);
845 Extractor.extractCodeRegion(CEAC, OI.Inputs, OI.Outputs);
849 if (TargetCpuAttr.isStringAttribute())
852 auto TargetFeaturesAttr = OuterFn->
getFnAttribute(
"target-features");
853 if (TargetFeaturesAttr.isStringAttribute())
854 OutlinedFn->
addFnAttr(TargetFeaturesAttr);
857 LLVM_DEBUG(
dbgs() <<
" Outlined function: " << *OutlinedFn <<
"\n");
859 "OpenMP outlined functions should not return a value!");
864 M.getFunctionList().insertAfter(OuterFn->
getIterator(), OutlinedFn);
871 assert(OI.EntryBB->getUniquePredecessor() == &ArtificialEntry);
878 "Expected instructions to add in the outlined region entry");
880 End = ArtificialEntry.
rend();
885 if (
I.isTerminator()) {
887 if (
Instruction *TI = OI.EntryBB->getTerminatorOrNull())
888 TI->adoptDbgRecords(&ArtificialEntry,
I.getIterator(),
false);
892 I.moveBeforePreserving(*OI.EntryBB, OI.EntryBB->getFirstInsertionPt());
895 OI.EntryBB->moveBefore(&ArtificialEntry);
902 if (OI.PostOutlineCB)
903 OI.PostOutlineCB(*OutlinedFn);
905 if (OI.FixUpNonEntryAllocas)
937 errs() <<
"Error of kind: " << Kind
938 <<
" when emitting offload entries and metadata during "
939 "OMPIRBuilder finalization \n";
945 if (
Config.EmitLLVMUsedMetaInfo.value_or(
false)) {
946 std::vector<WeakTrackingVH> LLVMCompilerUsed = {
947 M.getGlobalVariable(
"__openmp_nvptx_data_transfer_temporary_storage")};
948 emitUsed(
"llvm.compiler.used", LLVMCompilerUsed);
965 ConstantInt::get(I32Ty,
Value), Name);
978 for (
unsigned I = 0, E =
List.size();
I != E; ++
I)
982 if (UsedArray.
empty())
989 GV->setSection(
"llvm.metadata");
995 auto *Int8Ty =
Builder.getInt8Ty();
998 ConstantInt::get(Int8Ty, Mode),
Twine(KernelName,
"_exec_mode"));
1006 unsigned Reserve2Flags) {
1008 LocFlags |= OMP_IDENT_FLAG_KMPC;
1015 ConstantInt::get(Int32,
uint32_t(LocFlags)),
1016 ConstantInt::get(Int32, Reserve2Flags),
1017 ConstantInt::get(Int32, SrcLocStrSize), SrcLocStr};
1019 size_t SrcLocStrArgIdx = 4;
1020 if (OpenMPIRBuilder::Ident->getElementType(SrcLocStrArgIdx)
1024 SrcLocStr, OpenMPIRBuilder::Ident->getElementType(SrcLocStrArgIdx));
1031 if (
GV.getValueType() == OpenMPIRBuilder::Ident &&
GV.hasInitializer())
1032 if (
GV.getInitializer() == Initializer)
1037 M, OpenMPIRBuilder::Ident,
1040 M.getDataLayout().getDefaultGlobalsAddressSpace());
1052 SrcLocStrSize = LocStr.
size();
1061 if (
GV.isConstant() &&
GV.hasInitializer() &&
1062 GV.getInitializer() == Initializer)
1065 SrcLocStr =
Builder.CreateGlobalString(
1066 LocStr,
"",
M.getDataLayout().getDefaultGlobalsAddressSpace(),
1074 unsigned Line,
unsigned Column,
1080 Buffer.
append(FunctionName);
1082 Buffer.
append(std::to_string(Line));
1084 Buffer.
append(std::to_string(Column));
1092 StringRef UnknownLoc =
";unknown;unknown;0;0;;";
1103 if (
DIFile *DIF = DIL->getFile())
1104 if (std::optional<StringRef> Source = DIF->getSource())
1110 DIL->getColumn(), SrcLocStrSize);
1116 Loc.IP.getBlock()->getParent());
1122 "omp_global_thread_num");
1127 bool ForceSimpleCall,
bool CheckCancelFlag) {
1137 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_FOR;
1140 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SECTIONS;
1143 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SINGLE;
1146 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_EXPL;
1149 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL;
1162 bool UseCancelBarrier =
1167 ? OMPRTL___kmpc_cancel_barrier
1168 : OMPRTL___kmpc_barrier),
1171 if (UseCancelBarrier && CheckCancelFlag)
1181 omp::Directive CanceledDirective) {
1186 auto *UI =
Builder.CreateUnreachable();
1194 Builder.SetInsertPoint(ElseTI);
1195 auto ElseIP =
Builder.saveIP();
1203 Builder.SetInsertPoint(ThenTI);
1205 Value *CancelKind =
nullptr;
1206 switch (CanceledDirective) {
1207#define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
1208 case DirectiveEnum: \
1209 CancelKind = Builder.getInt32(Value); \
1211#include "llvm/Frontend/OpenMP/OMPKinds.def"
1228 Builder.SetInsertPoint(UI->getParent());
1229 UI->eraseFromParent();
1236 omp::Directive CanceledDirective) {
1241 auto *UI =
Builder.CreateUnreachable();
1244 Value *CancelKind =
nullptr;
1245 switch (CanceledDirective) {
1246#define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
1247 case DirectiveEnum: \
1248 CancelKind = Builder.getInt32(Value); \
1250#include "llvm/Frontend/OpenMP/OMPKinds.def"
1267 Builder.SetInsertPoint(UI->getParent());
1268 UI->eraseFromParent();
1281 auto *KernelArgsPtr =
1282 Builder.CreateAlloca(OpenMPIRBuilder::KernelArgs,
nullptr,
"kernel_args");
1287 Builder.CreateStructGEP(OpenMPIRBuilder::KernelArgs, KernelArgsPtr,
I);
1290 M.getDataLayout().getPrefTypeAlign(KernelArgs[
I]->getType()));
1294 NumThreads, HostPtr, KernelArgsPtr};
1321 assert(OutlinedFnID &&
"Invalid outlined function ID!");
1325 Value *Return =
nullptr;
1345 Builder, AllocaIP, Return, RTLoc, DeviceID, Args.NumTeams.front(),
1346 Args.NumThreads.front(), OutlinedFnID, ArgsVector));
1353 Builder.CreateCondBr(
Failed, OffloadFailedBlock, OffloadContBlock);
1355 auto CurFn =
Builder.GetInsertBlock()->getParent();
1362 emitBlock(OffloadContBlock, CurFn,
true);
1367 Value *CancelFlag, omp::Directive CanceledDirective) {
1369 "Unexpected cancellation!");
1389 Builder.CreateCondBr(Cmp, NonCancellationBlock, CancellationBlock,
1398 Builder.SetInsertPoint(CancellationBlock);
1399 Builder.CreateBr(*FiniBBOrErr);
1402 Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->
begin());
1421 OutlinedFn.
addFnAttr(Attribute::NoUnwind);
1424 "Expected at least tid and bounded tid as arguments");
1425 unsigned NumCapturedVars = OutlinedFn.
arg_size() - 2;
1428 assert(CI &&
"Expected call instruction to outlined function");
1429 CI->
getParent()->setName(
"omp_parallel");
1431 Builder.SetInsertPoint(CI);
1432 Type *PtrTy = OMPIRBuilder->VoidPtr;
1436 OpenMPIRBuilder ::InsertPointTy CurrentIP = Builder.saveIP();
1440 Value *Args = ArgsAlloca;
1444 Args = Builder.CreatePointerCast(ArgsAlloca, PtrTy);
1445 Builder.restoreIP(CurrentIP);
1448 for (
unsigned Idx = 0; Idx < NumCapturedVars; Idx++) {
1450 Value *StoreAddress = Builder.CreateConstInBoundsGEP2_64(
1452 Builder.CreateStore(V, StoreAddress);
1456 IfCondition ? Builder.CreateSExtOrTrunc(IfCondition, OMPIRBuilder->Int32)
1457 : Builder.getInt32(1);
1460 Value *Parallel60CallArgs[] = {
1464 NumThreads ? NumThreads : Builder.getInt32(-1),
1465 Builder.getInt32(-1),
1469 Builder.getInt64(NumCapturedVars),
1470 Builder.getInt32(0)};
1478 << *Builder.GetInsertBlock()->getParent() <<
"\n");
1481 Builder.SetInsertPoint(PrivTID);
1483 Builder.CreateStore(Builder.CreateLoad(OMPIRBuilder->Int32, OutlinedAI),
1490 I->eraseFromParent();
1513 if (!
F->hasMetadata(LLVMContext::MD_callback)) {
1521 F->addMetadata(LLVMContext::MD_callback,
1530 OutlinedFn.
addFnAttr(Attribute::NoUnwind);
1533 "Expected at least tid and bounded tid as arguments");
1534 unsigned NumCapturedVars = OutlinedFn.
arg_size() - 2;
1537 CI->
getParent()->setName(
"omp_parallel");
1538 Builder.SetInsertPoint(CI);
1541 Value *ForkCallArgs[] = {Ident, Builder.getInt32(NumCapturedVars),
1545 RealArgs.
append(std::begin(ForkCallArgs), std::end(ForkCallArgs));
1547 Value *
Cond = Builder.CreateSExtOrTrunc(IfCondition, OMPIRBuilder->Int32);
1554 auto PtrTy = OMPIRBuilder->VoidPtr;
1555 if (IfCondition && NumCapturedVars == 0) {
1563 << *Builder.GetInsertBlock()->getParent() <<
"\n");
1566 Builder.SetInsertPoint(PrivTID);
1568 Builder.CreateStore(Builder.CreateLoad(OMPIRBuilder->Int32, OutlinedAI),
1575 I->eraseFromParent();
1583 omp::ProcBindKind ProcBind,
bool IsCancellable) {
1592 const bool NeedThreadID = NumThreads ||
Config.isTargetDevice() ||
1593 (ProcBind != OMP_PROC_BIND_default);
1600 bool ArgsInZeroAddressSpace =
Config.isTargetDevice();
1604 if (NumThreads && !
Config.isTargetDevice()) {
1607 Builder.CreateIntCast(NumThreads, Int32,
false)};
1612 if (ProcBind != OMP_PROC_BIND_default) {
1616 ConstantInt::get(Int32,
unsigned(ProcBind),
true)};
1638 Builder.CreateAlloca(Int32,
nullptr,
"zero.addr");
1641 if (ArgsInZeroAddressSpace &&
M.getDataLayout().getAllocaAddrSpace() != 0) {
1644 TIDAddrAlloca, PointerType ::get(
M.getContext(), 0),
"tid.addr.ascast");
1648 PointerType ::get(
M.getContext(), 0),
1649 "zero.addr.ascast");
1673 if (IP.getBlock()->end() == IP.getPoint()) {
1679 assert(IP.getBlock()->getTerminator()->getNumSuccessors() == 1 &&
1680 IP.getBlock()->getTerminator()->getSuccessor(0) == PRegExitBB &&
1681 "Unexpected insertion point for finalization call!");
1693 Builder.CreateAlloca(Int32,
nullptr,
"tid.addr.local");
1699 Builder.CreateLoad(Int32, ZeroAddr,
"zero.addr.use");
1717 LLVM_DEBUG(
dbgs() <<
"Before body codegen: " << *OuterFn <<
"\n");
1720 assert(BodyGenCB &&
"Expected body generation callback!");
1722 if (
Error Err = BodyGenCB(InnerAllocaIP, CodeGenIP))
1725 LLVM_DEBUG(
dbgs() <<
"After body codegen: " << *OuterFn <<
"\n");
1728 if (
Config.isTargetDevice()) {
1731 std::move(ToBeDeleted)](
Function &OutlinedFn) {
1733 IfCondition, NumThreads, PrivTID, PrivTIDAddr,
1734 ThreadID, ToBeDeletedVec);
1740 std::move(ToBeDeleted)](
Function &OutlinedFn) {
1742 PrivTID, PrivTIDAddr, ToBeDeletedVec);
1764 ".omp_par", ArgsInZeroAddressSpace);
1769 Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
1771 Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands,
1776 return GV->getValueType() == OpenMPIRBuilder::Ident;
1781 LLVM_DEBUG(
dbgs() <<
"Before privatization: " << *OuterFn <<
"\n");
1787 if (&V == TIDAddr || &V == ZeroAddr) {
1793 for (
Use &U : V.uses())
1795 if (ParallelRegionBlockSet.
count(UserI->getParent()))
1805 if (!V.getType()->isPointerTy()) {
1809 Builder.restoreIP(OuterAllocaIP);
1811 Builder.CreateAlloca(V.getType(),
nullptr, V.getName() +
".reloaded");
1815 Builder.SetInsertPoint(InsertBB,
1820 Builder.restoreIP(InnerAllocaIP);
1821 Inner =
Builder.CreateLoad(V.getType(), Ptr);
1824 Value *ReplacementValue =
nullptr;
1827 ReplacementValue = PrivTID;
1830 PrivCB(InnerAllocaIP,
Builder.saveIP(), V, *Inner, ReplacementValue);
1838 assert(ReplacementValue &&
1839 "Expected copy/create callback to set replacement value!");
1840 if (ReplacementValue == &V)
1845 UPtr->set(ReplacementValue);
1870 for (
Value *Output : Outputs)
1874 "OpenMP outlining should not produce live-out values!");
1876 LLVM_DEBUG(
dbgs() <<
"After privatization: " << *OuterFn <<
"\n");
1878 for (
auto *BB : Blocks)
1879 dbgs() <<
" PBR: " << BB->getName() <<
"\n";
1887 assert(FiniInfo.DK == OMPD_parallel &&
1888 "Unexpected finalization stack state!");
1899 Builder.CreateBr(*FiniBBOrErr);
1903 Term->eraseFromParent();
1909 InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end());
1910 UI->eraseFromParent();
1973 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr));
1975 Builder.CreateStore(DepValPtr, Addr);
1978 DependInfo, Entry,
static_cast<unsigned int>(RTLDependInfoFields::Len));
1980 ConstantInt::get(SizeTy,
1985 DependInfo, Entry,
static_cast<unsigned int>(RTLDependInfoFields::Flags));
1987 static_cast<unsigned int>(Dep.
DepKind)),
2000 if (Dependencies.
empty())
2020 Type *DependInfo = OMPBuilder.DependInfo;
2022 Value *DepArray =
nullptr;
2024 Builder.SetInsertPoint(
2028 DepArray = Builder.CreateAlloca(DepArrayTy,
nullptr,
".dep.arr.addr");
2030 Builder.restoreIP(OldIP);
2032 for (
const auto &[DepIdx, Dep] :
enumerate(Dependencies)) {
2034 Builder.CreateConstInBoundsGEP2_64(DepArrayTy, DepArray, 0, DepIdx);
2041Expected<Value *> OpenMPIRBuilder::createTaskDuplicationFunction(
2043 unsigned ProgramAddressSpace =
M.getDataLayout().getProgramAddressSpace();
2058 Builder.getVoidTy(), {VoidPtrTy, VoidPtrTy, Builder.getInt32Ty()},
2062 "omp_taskloop_dup",
M);
2065 Value *LastprivateFlagArg = DupFunction->
getArg(2);
2066 DestTaskArg->
setName(
"dest_task");
2067 SrcTaskArg->
setName(
"src_task");
2068 LastprivateFlagArg->
setName(
"lastprivate_flag");
2070 IRBuilderBase::InsertPointGuard Guard(
Builder);
2074 auto GetTaskContextPtrFromArg = [&](
Value *Arg) ->
Value * {
2075 Type *TaskWithPrivatesTy =
2078 TaskWithPrivatesTy, Arg, {
Builder.getInt32(0),
Builder.getInt32(1)});
2080 PrivatesTy, TaskPrivates,
2085 Value *DestTaskContextPtr = GetTaskContextPtrFromArg(DestTaskArg);
2086 Value *SrcTaskContextPtr = GetTaskContextPtrFromArg(SrcTaskArg);
2088 DestTaskContextPtr->
setName(
"destPtr");
2089 SrcTaskContextPtr->
setName(
"srcPtr");
2094 Expected<IRBuilderBase::InsertPoint> AfterIPOrError =
2095 DupCB(AllocaIP, CodeGenIP, DestTaskContextPtr, SrcTaskContextPtr);
2096 if (!AfterIPOrError)
2098 Builder.restoreIP(*AfterIPOrError);
2108 llvm::function_ref<llvm::Expected<llvm::CanonicalLoopInfo *>()> LoopInfo,
2110 Value *GrainSize,
bool NoGroup,
int Sched,
Value *Final,
bool Mergeable,
2112 Value *TaskContextStructPtrVal) {
2117 uint32_t SrcLocStrSize;
2133 if (
Error Err = BodyGenCB(TaskloopAllocaIP, TaskloopBodyIP))
2136 llvm::Expected<llvm::CanonicalLoopInfo *> result = LoopInfo();
2141 llvm::CanonicalLoopInfo *CLI = result.
get();
2143 OI.
EntryBB = TaskloopAllocaBB;
2144 OI.OuterAllocaBB = AllocaIP.getBlock();
2145 OI.ExitBB = TaskloopExitBB;
2151 Builder, AllocaIP, ToBeDeleted, TaskloopAllocaIP,
"global.tid",
false));
2153 TaskloopAllocaIP,
"lb",
false,
true);
2155 TaskloopAllocaIP,
"ub",
false,
true);
2157 TaskloopAllocaIP,
"step",
false,
true);
2160 OI.Inputs.insert(FakeLB);
2161 OI.Inputs.insert(FakeUB);
2162 OI.Inputs.insert(FakeStep);
2163 if (TaskContextStructPtrVal)
2164 OI.Inputs.insert(TaskContextStructPtrVal);
2165 assert(((TaskContextStructPtrVal && DupCB) ||
2166 (!TaskContextStructPtrVal && !DupCB)) &&
2167 "Task context struct ptr and duplication callback must be both set "
2173 unsigned ProgramAddressSpace =
M.getDataLayout().getProgramAddressSpace();
2177 {FakeLB->getType(), FakeUB->getType(), FakeStep->getType(), PointerTy});
2178 Expected<Value *> TaskDupFnOrErr = createTaskDuplicationFunction(
2181 if (!TaskDupFnOrErr) {
2184 Value *TaskDupFn = *TaskDupFnOrErr;
2186 OI.PostOutlineCB = [
this, Ident, LBVal, UBVal, StepVal, Untied,
2187 TaskloopAllocaBB, CLI, Loc, TaskDupFn, ToBeDeleted,
2188 IfCond, GrainSize, NoGroup, Sched, FakeLB, FakeUB,
2189 FakeStep, FakeSharedsTy, Final, Mergeable, Priority,
2190 NumOfCollapseLoops](
Function &OutlinedFn)
mutable {
2192 assert(OutlinedFn.hasOneUse() &&
2193 "there must be a single user for the outlined function");
2200 Value *CastedLBVal =
2201 Builder.CreateIntCast(LBVal,
Builder.getInt64Ty(),
true,
"lb64");
2202 Value *CastedUBVal =
2203 Builder.CreateIntCast(UBVal,
Builder.getInt64Ty(),
true,
"ub64");
2204 Value *CastedStepVal =
2205 Builder.CreateIntCast(StepVal,
Builder.getInt64Ty(),
true,
"step64");
2207 Builder.SetInsertPoint(StaleCI);
2220 Builder.CreateCall(TaskgroupFn, {Ident, ThreadID});
2241 divideCeil(
M.getDataLayout().getTypeSizeInBits(Task), 8));
2243 AllocaInst *ArgStructAlloca =
2245 assert(ArgStructAlloca &&
2246 "Unable to find the alloca instruction corresponding to arguments "
2247 "for extracted function");
2248 std::optional<TypeSize> ArgAllocSize =
2251 "Unable to determine size of arguments for extracted function");
2252 Value *SharedsSize =
Builder.getInt64(ArgAllocSize->getFixedValue());
2257 CallInst *TaskData =
Builder.CreateCall(
2258 TaskAllocFn, {Ident, ThreadID,
Flags,
2259 TaskSize, SharedsSize,
2264 Value *TaskShareds =
Builder.CreateLoad(VoidPtr, TaskData);
2265 Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment,
2270 FakeSharedsTy, TaskShareds, {
Builder.getInt32(0),
Builder.getInt32(0)});
2273 FakeSharedsTy, TaskShareds, {
Builder.getInt32(0),
Builder.getInt32(1)});
2276 FakeSharedsTy, TaskShareds, {
Builder.getInt32(0),
Builder.getInt32(2)});
2282 IfCond ?
Builder.CreateIntCast(IfCond,
Builder.getInt32Ty(),
true)
2288 Value *GrainSizeVal =
2289 GrainSize ?
Builder.CreateIntCast(GrainSize,
Builder.getInt64Ty(),
true)
2291 Value *TaskDup = TaskDupFn;
2293 Value *
Args[] = {Ident, ThreadID, TaskData, IfCondVal, Lb, Ub,
2294 Loadstep, NoGroupVal, SchedVal, GrainSizeVal, TaskDup};
2299 Builder.CreateCall(TaskloopFn, Args);
2306 Builder.CreateCall(EndTaskgroupFn, {Ident, ThreadID});
2311 Builder.SetInsertPoint(TaskloopAllocaBB, TaskloopAllocaBB->begin());
2313 LoadInst *SharedsOutlined =
2314 Builder.CreateLoad(VoidPtr, OutlinedFn.getArg(1));
2315 OutlinedFn.getArg(1)->replaceUsesWithIf(
2317 [SharedsOutlined](Use &U) {
return U.getUser() != SharedsOutlined; });
2320 Type *IVTy =
IV->getType();
2326 Value *TaskLB =
nullptr;
2327 Value *TaskUB =
nullptr;
2328 Value *TaskStep =
nullptr;
2329 Value *LoadTaskLB =
nullptr;
2330 Value *LoadTaskUB =
nullptr;
2331 Value *LoadTaskStep =
nullptr;
2332 for (Instruction &
I : *TaskloopAllocaBB) {
2333 if (
I.getOpcode() == Instruction::GetElementPtr) {
2336 switch (CI->getZExtValue()) {
2348 }
else if (
I.getOpcode() == Instruction::Load) {
2350 if (
Load.getPointerOperand() == TaskLB) {
2351 assert(TaskLB !=
nullptr &&
"Expected value for TaskLB");
2353 }
else if (
Load.getPointerOperand() == TaskUB) {
2354 assert(TaskUB !=
nullptr &&
"Expected value for TaskUB");
2356 }
else if (
Load.getPointerOperand() == TaskStep) {
2357 assert(TaskStep !=
nullptr &&
"Expected value for TaskStep");
2363 Builder.SetInsertPoint(CLI->getPreheader()->getTerminator());
2365 assert(LoadTaskLB !=
nullptr &&
"Expected value for LoadTaskLB");
2366 assert(LoadTaskUB !=
nullptr &&
"Expected value for LoadTaskUB");
2367 assert(LoadTaskStep !=
nullptr &&
"Expected value for LoadTaskStep");
2369 Builder.CreateSub(LoadTaskUB, LoadTaskLB), LoadTaskStep);
2370 Value *TripCount =
Builder.CreateAdd(TripCountMinusOne, One,
"trip_cnt");
2371 Value *CastedTripCount =
Builder.CreateIntCast(TripCount, IVTy,
true);
2372 Value *CastedTaskLB =
Builder.CreateIntCast(LoadTaskLB, IVTy,
true);
2374 CLI->setTripCount(CastedTripCount);
2376 Builder.SetInsertPoint(CLI->getBody(),
2377 CLI->getBody()->getFirstInsertionPt());
2379 if (NumOfCollapseLoops > 1) {
2385 Builder.CreateSub(CastedTaskLB, ConstantInt::get(IVTy, 1)));
2388 for (
auto IVUse = CLI->getIndVar()->uses().begin();
2389 IVUse != CLI->getIndVar()->uses().end(); IVUse++) {
2390 User *IVUser = IVUse->getUser();
2392 if (
Op->getOpcode() == Instruction::URem ||
2393 Op->getOpcode() == Instruction::UDiv) {
2398 for (User *User : UsersToReplace) {
2399 User->replaceUsesOfWith(CLI->getIndVar(), IVPlusTaskLB);
2416 assert(CLI->getIndVar()->getNumUses() == 3 &&
2417 "Canonical loop should have exactly three uses of the ind var");
2418 for (User *IVUser : CLI->getIndVar()->users()) {
2420 if (
Mul->getOpcode() == Instruction::Mul) {
2421 for (User *MulUser :
Mul->users()) {
2423 if (
Add->getOpcode() == Instruction::Add) {
2424 Add->setOperand(1, CastedTaskLB);
2433 FakeLB->replaceAllUsesWith(CastedLBVal);
2434 FakeUB->replaceAllUsesWith(CastedUBVal);
2435 FakeStep->replaceAllUsesWith(CastedStepVal);
2437 I->eraseFromParent();
2442 Builder.SetInsertPoint(TaskloopExitBB, TaskloopExitBB->
begin());
2448 M.getContext(),
M.getDataLayout().getPointerSizeInBits());
2457 bool Mergeable,
Value *EventHandle,
Value *Priority) {
2489 if (
Error Err = BodyGenCB(TaskAllocaIP, TaskBodyIP))
2500 Builder, AllocaIP, ToBeDeleted, TaskAllocaIP,
"global.tid",
false));
2502 OI.
PostOutlineCB = [
this, Ident, Tied, Final, IfCondition, Dependencies,
2503 Affinities, Mergeable, Priority, EventHandle,
2504 TaskAllocaBB, ToBeDeleted](
Function &OutlinedFn)
mutable {
2506 assert(OutlinedFn.hasOneUse() &&
2507 "there must be a single user for the outlined function");
2512 bool HasShareds = StaleCI->
arg_size() > 1;
2513 Builder.SetInsertPoint(StaleCI);
2538 Flags =
Builder.CreateOr(FinalFlag, Flags);
2551 divideCeil(
M.getDataLayout().getTypeSizeInBits(Task), 8));
2560 assert(ArgStructAlloca &&
2561 "Unable to find the alloca instruction corresponding to arguments "
2562 "for extracted function");
2563 std::optional<TypeSize> ArgAllocSize =
2566 "Unable to determine size of arguments for extracted function");
2567 SharedsSize =
Builder.getInt64(ArgAllocSize->getFixedValue());
2573 TaskAllocFn, {Ident, ThreadID, Flags,
2574 TaskSize, SharedsSize,
2577 if (Affinities.
Count && Affinities.
Info) {
2579 OMPRTL___kmpc_omp_reg_task_with_affinity);
2590 OMPRTL___kmpc_task_allow_completion_event);
2594 Builder.CreatePointerBitCastOrAddrSpaceCast(EventHandle,
2596 EventVal =
Builder.CreatePtrToInt(EventVal,
Builder.getInt64Ty());
2597 Builder.CreateStore(EventVal, EventHandleAddr);
2603 Value *TaskShareds =
Builder.CreateLoad(VoidPtr, TaskData);
2604 Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment,
2622 Builder.CreateInBoundsGEP(TaskPtr, TaskData, {Zero, Zero});
2625 VoidPtr, VoidPtr,
Builder.getInt32Ty(), VoidPtr, VoidPtr);
2627 TaskStructType, TaskGEP, {Zero, ConstantInt::get(
Int32Ty, 4)});
2630 Value *CmplrData =
Builder.CreateInBoundsGEP(CmplrStructType,
2631 PriorityData, {Zero, Zero});
2632 Builder.CreateStore(Priority, CmplrData);
2635 Value *DepArray =
nullptr;
2636 Value *NumDeps =
nullptr;
2639 NumDeps = Dependencies.
NumDeps;
2640 }
else if (!Dependencies.
Deps.empty()) {
2642 NumDeps =
Builder.getInt32(Dependencies.
Deps.size());
2667 Builder.GetInsertPoint()->getParent()->getTerminator();
2668 Instruction *ThenTI = IfTerminator, *ElseTI =
nullptr;
2669 Builder.SetInsertPoint(IfTerminator);
2672 Builder.SetInsertPoint(ElseTI);
2679 {Ident, ThreadID, NumDeps, DepArray,
2680 ConstantInt::get(
Builder.getInt32Ty(), 0),
2695 Builder.SetInsertPoint(ThenTI);
2703 {Ident, ThreadID, TaskData, NumDeps, DepArray,
2704 ConstantInt::get(
Builder.getInt32Ty(), 0),
2715 Builder.SetInsertPoint(TaskAllocaBB, TaskAllocaBB->
begin());
2717 LoadInst *Shareds =
Builder.CreateLoad(VoidPtr, OutlinedFn.getArg(1));
2718 OutlinedFn.getArg(1)->replaceUsesWithIf(
2719 Shareds, [Shareds](
Use &U) {
return U.getUser() != Shareds; });
2723 I->eraseFromParent();
2727 Builder.SetInsertPoint(TaskExitBB, TaskExitBB->
begin());
2753 Builder.SetInsertPoint(TaskgroupExitBB);
2796 unsigned CaseNumber = 0;
2797 for (
auto SectionCB : SectionCBs) {
2799 M.getContext(),
"omp_section_loop.body.case", CurFn,
Continue);
2801 Builder.SetInsertPoint(CaseBB);
2804 CaseEndBr->getIterator()}))
2815 Value *LB = ConstantInt::get(I32Ty, 0);
2816 Value *UB = ConstantInt::get(I32Ty, SectionCBs.
size());
2817 Value *ST = ConstantInt::get(I32Ty, 1);
2819 Loc, LoopBodyGenCB, LB, UB, ST,
true,
false, AllocaIP,
"section_loop");
2824 applyStaticWorkshareLoop(
Loc.DL, *
LoopInfo, AllocaIP,
2825 WorksharingLoopType::ForStaticLoop, !IsNowait);
2831 assert(LoopFini &&
"Bad structure of static workshare loop finalization");
2835 assert(FiniInfo.DK == OMPD_sections &&
2836 "Unexpected finalization stack state!");
2837 if (
Error Err = FiniInfo.mergeFiniBB(
Builder, LoopFini))
2851 if (IP.getBlock()->end() != IP.getPoint())
2862 auto *CaseBB =
Loc.IP.getBlock();
2863 auto *CondBB = CaseBB->getSinglePredecessor()->getSinglePredecessor();
2864 auto *ExitBB = CondBB->getTerminator()->getSuccessor(1);
2870 Directive OMPD = Directive::OMPD_sections;
2873 return EmitOMPInlinedRegion(OMPD,
nullptr,
nullptr, BodyGenCB, FiniCBWrapper,
2884Value *OpenMPIRBuilder::getGPUThreadID() {
2887 OMPRTL___kmpc_get_hardware_thread_id_in_block),
2891Value *OpenMPIRBuilder::getGPUWarpSize() {
2896Value *OpenMPIRBuilder::getNVPTXWarpID() {
2897 unsigned LaneIDBits =
Log2_32(
Config.getGridValue().GV_Warp_Size);
2898 return Builder.CreateAShr(getGPUThreadID(), LaneIDBits,
"nvptx_warp_id");
2901Value *OpenMPIRBuilder::getNVPTXLaneID() {
2902 unsigned LaneIDBits =
Log2_32(
Config.getGridValue().GV_Warp_Size);
2903 assert(LaneIDBits < 32 &&
"Invalid LaneIDBits size in NVPTX device.");
2904 unsigned LaneIDMask = ~0
u >> (32u - LaneIDBits);
2905 return Builder.CreateAnd(getGPUThreadID(),
Builder.getInt32(LaneIDMask),
2912 uint64_t FromSize =
M.getDataLayout().getTypeStoreSize(FromType);
2913 uint64_t ToSize =
M.getDataLayout().getTypeStoreSize(ToType);
2914 assert(FromSize > 0 &&
"From size must be greater than zero");
2915 assert(ToSize > 0 &&
"To size must be greater than zero");
2916 if (FromType == ToType)
2918 if (FromSize == ToSize)
2919 return Builder.CreateBitCast(From, ToType);
2921 return Builder.CreateIntCast(From, ToType,
true);
2927 Value *ValCastItem =
Builder.CreatePointerBitCastOrAddrSpaceCast(
2928 CastItem,
Builder.getPtrTy(0));
2929 Builder.CreateStore(From, ValCastItem);
2930 return Builder.CreateLoad(ToType, CastItem);
2937 uint64_t
Size =
M.getDataLayout().getTypeStoreSize(ElementType);
2938 assert(
Size <= 8 &&
"Unsupported bitwidth in shuffle instruction");
2942 Value *ElemCast = castValueToType(AllocaIP, Element, CastTy);
2944 Builder.CreateIntCast(getGPUWarpSize(),
Builder.getInt16Ty(),
true);
2946 Size <= 4 ? RuntimeFunction::OMPRTL___kmpc_shuffle_int32
2947 : RuntimeFunction::OMPRTL___kmpc_shuffle_int64);
2948 Value *WarpSizeCast =
2950 Value *ShuffleCall =
2952 return castValueToType(AllocaIP, ShuffleCall, CastTy);
2959 uint64_t
Size =
M.getDataLayout().getTypeStoreSize(ElemType);
2971 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
2972 Value *ElemPtr = DstAddr;
2973 Value *Ptr = SrcAddr;
2974 for (
unsigned IntSize = 8; IntSize >= 1; IntSize /= 2) {
2978 Ptr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
2981 Builder.CreateGEP(ElemType, SrcAddr, {ConstantInt::get(IndexTy, 1)});
2982 ElemPtr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
2986 if ((
Size / IntSize) > 1) {
2987 Value *PtrEnd =
Builder.CreatePointerBitCastOrAddrSpaceCast(
2988 SrcAddrGEP,
Builder.getPtrTy());
3005 Builder.CreatePointerBitCastOrAddrSpaceCast(Ptr,
Builder.getPtrTy()));
3007 Builder.CreateICmpSGT(PtrDiff,
Builder.getInt64(IntSize - 1)), ThenBB,
3010 Value *Res = createRuntimeShuffleFunction(
3013 IntType, Ptr,
M.getDataLayout().getPrefTypeAlign(ElemType)),
3015 Builder.CreateAlignedStore(Res, ElemPtr,
3016 M.getDataLayout().getPrefTypeAlign(ElemType));
3018 Builder.CreateGEP(IntType, Ptr, {ConstantInt::get(IndexTy, 1)});
3019 Value *LocalElemPtr =
3020 Builder.CreateGEP(IntType, ElemPtr, {ConstantInt::get(IndexTy, 1)});
3026 Value *Res = createRuntimeShuffleFunction(
3027 AllocaIP,
Builder.CreateLoad(IntType, Ptr), IntType,
Offset);
3030 Res =
Builder.CreateTrunc(Res, ElemType);
3031 Builder.CreateStore(Res, ElemPtr);
3032 Ptr =
Builder.CreateGEP(IntType, Ptr, {ConstantInt::get(IndexTy, 1)});
3034 Builder.CreateGEP(IntType, ElemPtr, {ConstantInt::get(IndexTy, 1)});
3040Error OpenMPIRBuilder::emitReductionListCopy(
3045 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
3046 Value *RemoteLaneOffset = CopyOptions.RemoteLaneOffset;
3050 for (
auto En :
enumerate(ReductionInfos)) {
3052 Value *SrcElementAddr =
nullptr;
3053 AllocaInst *DestAlloca =
nullptr;
3054 Value *DestElementAddr =
nullptr;
3055 Value *DestElementPtrAddr =
nullptr;
3057 bool ShuffleInElement =
false;
3060 bool UpdateDestListPtr =
false;
3064 ReductionArrayTy, SrcBase,
3065 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3066 SrcElementAddr =
Builder.CreateLoad(
Builder.getPtrTy(), SrcElementPtrAddr);
3070 DestElementPtrAddr =
Builder.CreateInBoundsGEP(
3071 ReductionArrayTy, DestBase,
3072 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3073 bool IsByRefElem = (!IsByRef.
empty() && IsByRef[En.index()]);
3079 Type *DestAllocaType =
3080 IsByRefElem ? RI.ByRefAllocatedType : RI.ElementType;
3081 DestAlloca =
Builder.CreateAlloca(DestAllocaType,
nullptr,
3082 ".omp.reduction.element");
3084 M.getDataLayout().getPrefTypeAlign(DestAllocaType));
3085 DestElementAddr = DestAlloca;
3088 DestElementAddr->
getName() +
".ascast");
3090 ShuffleInElement =
true;
3091 UpdateDestListPtr =
true;
3103 if (ShuffleInElement) {
3104 Type *ShuffleType = RI.ElementType;
3105 Value *ShuffleSrcAddr = SrcElementAddr;
3106 Value *ShuffleDestAddr = DestElementAddr;
3107 AllocaInst *LocalStorage =
nullptr;
3110 assert(RI.ByRefElementType &&
"Expected by-ref element type to be set");
3111 assert(RI.ByRefAllocatedType &&
3112 "Expected by-ref allocated type to be set");
3117 ShuffleType = RI.ByRefElementType;
3119 if (RI.DataPtrPtrGen) {
3122 Builder.saveIP(), ShuffleSrcAddr, ShuffleSrcAddr);
3125 return GenResult.takeError();
3134 LocalStorage =
Builder.CreateAlloca(ShuffleType);
3136 ShuffleDestAddr = LocalStorage;
3141 ShuffleDestAddr = DestElementAddr;
3145 shuffleAndStore(AllocaIP, ShuffleSrcAddr, ShuffleDestAddr, ShuffleType,
3146 RemoteLaneOffset, ReductionArrayTy, IsByRefElem);
3148 if (IsByRefElem && RI.DataPtrPtrGen) {
3150 Value *DestDescriptorAddr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3151 DestAlloca,
Builder.getPtrTy(),
".ascast");
3154 DestDescriptorAddr, LocalStorage, SrcElementAddr,
3155 RI.ByRefAllocatedType, RI.DataPtrPtrGen);
3158 return GenResult.takeError();
3161 switch (RI.EvaluationKind) {
3163 Value *Elem =
Builder.CreateLoad(RI.ElementType, SrcElementAddr);
3165 Builder.CreateStore(Elem, DestElementAddr);
3169 Value *SrcRealPtr =
Builder.CreateConstInBoundsGEP2_32(
3170 RI.ElementType, SrcElementAddr, 0, 0,
".realp");
3172 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
3174 RI.ElementType, SrcElementAddr, 0, 1,
".imagp");
3176 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
3178 Value *DestRealPtr =
Builder.CreateConstInBoundsGEP2_32(
3179 RI.ElementType, DestElementAddr, 0, 0,
".realp");
3180 Value *DestImgPtr =
Builder.CreateConstInBoundsGEP2_32(
3181 RI.ElementType, DestElementAddr, 0, 1,
".imagp");
3182 Builder.CreateStore(SrcReal, DestRealPtr);
3183 Builder.CreateStore(SrcImg, DestImgPtr);
3188 M.getDataLayout().getTypeStoreSize(RI.ElementType));
3190 DestElementAddr,
M.getDataLayout().getPrefTypeAlign(RI.ElementType),
3191 SrcElementAddr,
M.getDataLayout().getPrefTypeAlign(RI.ElementType),
3203 if (UpdateDestListPtr) {
3204 Value *CastDestAddr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3205 DestElementAddr,
Builder.getPtrTy(),
3206 DestElementAddr->
getName() +
".ascast");
3207 Builder.CreateStore(CastDestAddr, DestElementPtrAddr);
3214Expected<Function *> OpenMPIRBuilder::emitInterWarpCopyFunction(
3218 LLVMContext &Ctx =
M.getContext();
3220 Builder.getVoidTy(), {Builder.getPtrTy(), Builder.getInt32Ty()},
3224 "_omp_reduction_inter_warp_copy_func", &
M);
3229 Builder.SetInsertPoint(EntryBB);
3246 StringRef TransferMediumName =
3247 "__openmp_nvptx_data_transfer_temporary_storage";
3248 GlobalVariable *TransferMedium =
M.getGlobalVariable(TransferMediumName);
3249 unsigned WarpSize =
Config.getGridValue().GV_Warp_Size;
3251 if (!TransferMedium) {
3252 TransferMedium =
new GlobalVariable(
3260 Value *GPUThreadID = getGPUThreadID();
3262 Value *LaneID = getNVPTXLaneID();
3264 Value *WarpID = getNVPTXWarpID();
3268 Builder.GetInsertBlock()->getFirstInsertionPt());
3272 AllocaInst *ReduceListAlloca =
Builder.CreateAlloca(
3273 Arg0Type,
nullptr, ReduceListArg->
getName() +
".addr");
3274 AllocaInst *NumWarpsAlloca =
3275 Builder.CreateAlloca(Arg1Type,
nullptr, NumWarpsArg->
getName() +
".addr");
3276 Value *ReduceListAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3277 ReduceListAlloca, Arg0Type, ReduceListAlloca->
getName() +
".ascast");
3278 Value *NumWarpsAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3279 NumWarpsAlloca,
Builder.getPtrTy(0),
3280 NumWarpsAlloca->
getName() +
".ascast");
3281 Builder.CreateStore(ReduceListArg, ReduceListAddrCast);
3282 Builder.CreateStore(NumWarpsArg, NumWarpsAddrCast);
3291 for (
auto En :
enumerate(ReductionInfos)) {
3297 bool IsByRefElem = !IsByRef.
empty() && IsByRef[En.index()];
3298 unsigned RealTySize =
M.getDataLayout().getTypeAllocSize(
3299 IsByRefElem ? RI.ByRefElementType : RI.ElementType);
3300 for (
unsigned TySize = 4; TySize > 0 && RealTySize > 0; TySize /= 2) {
3303 unsigned NumIters = RealTySize / TySize;
3306 Value *Cnt =
nullptr;
3307 Value *CntAddr =
nullptr;
3314 Builder.CreateAlloca(
Builder.getInt32Ty(),
nullptr,
".cnt.addr");
3316 CntAddr =
Builder.CreateAddrSpaceCast(CntAddr,
Builder.getPtrTy(),
3317 CntAddr->
getName() +
".ascast");
3329 Cnt, ConstantInt::get(
Builder.getInt32Ty(), NumIters));
3330 Builder.CreateCondBr(Cmp, BodyBB, ExitBB);
3337 omp::Directive::OMPD_unknown,
3341 return BarrierIP1.takeError();
3347 Value *IsWarpMaster =
Builder.CreateIsNull(LaneID,
"warp_master");
3348 Builder.CreateCondBr(IsWarpMaster, ThenBB, ElseBB);
3352 auto *RedListArrayTy =
3355 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
3357 Builder.CreateInBoundsGEP(RedListArrayTy, ReduceList,
3358 {ConstantInt::get(IndexTy, 0),
3359 ConstantInt::get(IndexTy, En.index())});
3363 if (IsByRefElem && RI.DataPtrPtrGen) {
3365 RI.DataPtrPtrGen(
Builder.saveIP(), ElemPtr, ElemPtr);
3368 return GenRes.takeError();
3379 ArrayTy, TransferMedium, {
Builder.getInt64(0), WarpID});
3384 Builder.CreateStore(Elem, MediumPtr,
3396 omp::Directive::OMPD_unknown,
3400 return BarrierIP2.takeError();
3407 Value *NumWarpsVal =
3410 Value *IsActiveThread =
3411 Builder.CreateICmpULT(GPUThreadID, NumWarpsVal,
"is_active_thread");
3412 Builder.CreateCondBr(IsActiveThread, W0ThenBB, W0ElseBB);
3419 ArrayTy, TransferMedium, {
Builder.getInt64(0), GPUThreadID});
3421 Value *TargetElemPtrPtr =
3422 Builder.CreateInBoundsGEP(RedListArrayTy, ReduceList,
3423 {ConstantInt::get(IndexTy, 0),
3424 ConstantInt::get(IndexTy, En.index())});
3425 Value *TargetElemPtrVal =
3427 Value *TargetElemPtr = TargetElemPtrVal;
3429 if (IsByRefElem && RI.DataPtrPtrGen) {
3431 RI.DataPtrPtrGen(
Builder.saveIP(), TargetElemPtr, TargetElemPtr);
3434 return GenRes.takeError();
3436 TargetElemPtr =
Builder.CreateLoad(
Builder.getPtrTy(), TargetElemPtr);
3444 Value *SrcMediumValue =
3445 Builder.CreateLoad(CType, SrcMediumPtrVal,
true);
3446 Builder.CreateStore(SrcMediumValue, TargetElemPtr);
3456 Cnt, ConstantInt::get(
Builder.getInt32Ty(), 1));
3457 Builder.CreateStore(Cnt, CntAddr,
false);
3459 auto *CurFn =
Builder.GetInsertBlock()->getParent();
3463 RealTySize %= TySize;
3473Expected<Function *> OpenMPIRBuilder::emitShuffleAndReduceFunction(
3476 LLVMContext &Ctx =
M.getContext();
3477 FunctionType *FuncTy =
3479 {Builder.getPtrTy(), Builder.getInt16Ty(),
3480 Builder.getInt16Ty(), Builder.getInt16Ty()},
3484 "_omp_reduction_shuffle_and_reduce_func", &
M);
3494 Builder.SetInsertPoint(EntryBB);
3505 Type *ReduceListArgType = ReduceListArg->
getType();
3509 ReduceListArgType,
nullptr, ReduceListArg->
getName() +
".addr");
3510 Value *LaneIdAlloca =
Builder.CreateAlloca(LaneIDArgType,
nullptr,
3511 LaneIDArg->
getName() +
".addr");
3513 LaneIDArgType,
nullptr, RemoteLaneOffsetArg->
getName() +
".addr");
3514 Value *AlgoVerAlloca =
Builder.CreateAlloca(LaneIDArgType,
nullptr,
3515 AlgoVerArg->
getName() +
".addr");
3522 RedListArrayTy,
nullptr,
".omp.reduction.remote_reduce_list");
3524 Value *ReduceListAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3525 ReduceListAlloca, ReduceListArgType,
3526 ReduceListAlloca->
getName() +
".ascast");
3527 Value *LaneIdAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3528 LaneIdAlloca, LaneIDArgPtrType, LaneIdAlloca->
getName() +
".ascast");
3529 Value *RemoteLaneOffsetAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3530 RemoteLaneOffsetAlloca, LaneIDArgPtrType,
3531 RemoteLaneOffsetAlloca->
getName() +
".ascast");
3532 Value *AlgoVerAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3533 AlgoVerAlloca, LaneIDArgPtrType, AlgoVerAlloca->
getName() +
".ascast");
3534 Value *RemoteListAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3535 RemoteReductionListAlloca,
Builder.getPtrTy(),
3536 RemoteReductionListAlloca->
getName() +
".ascast");
3538 Builder.CreateStore(ReduceListArg, ReduceListAddrCast);
3539 Builder.CreateStore(LaneIDArg, LaneIdAddrCast);
3540 Builder.CreateStore(RemoteLaneOffsetArg, RemoteLaneOffsetAddrCast);
3541 Builder.CreateStore(AlgoVerArg, AlgoVerAddrCast);
3543 Value *ReduceList =
Builder.CreateLoad(ReduceListArgType, ReduceListAddrCast);
3544 Value *LaneId =
Builder.CreateLoad(LaneIDArgType, LaneIdAddrCast);
3545 Value *RemoteLaneOffset =
3546 Builder.CreateLoad(LaneIDArgType, RemoteLaneOffsetAddrCast);
3547 Value *AlgoVer =
Builder.CreateLoad(LaneIDArgType, AlgoVerAddrCast);
3554 Error EmitRedLsCpRes = emitReductionListCopy(
3556 ReduceList, RemoteListAddrCast, IsByRef,
3557 {RemoteLaneOffset,
nullptr,
nullptr});
3560 return EmitRedLsCpRes;
3585 Value *LaneComp =
Builder.CreateICmpULT(LaneId, RemoteLaneOffset);
3590 Value *Algo2AndLaneIdComp =
Builder.CreateAnd(Algo2, LaneIdComp);
3591 Value *RemoteOffsetComp =
3593 Value *CondAlgo2 =
Builder.CreateAnd(Algo2AndLaneIdComp, RemoteOffsetComp);
3594 Value *CA0OrCA1 =
Builder.CreateOr(CondAlgo0, CondAlgo1);
3595 Value *CondReduce =
Builder.CreateOr(CA0OrCA1, CondAlgo2);
3601 Builder.CreateCondBr(CondReduce, ThenBB, ElseBB);
3603 Value *LocalReduceListPtr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3604 ReduceList,
Builder.getPtrTy());
3605 Value *RemoteReduceListPtr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3606 RemoteListAddrCast,
Builder.getPtrTy());
3608 ->addFnAttr(Attribute::NoUnwind);
3619 Value *LaneIdGtOffset =
Builder.CreateICmpUGE(LaneId, RemoteLaneOffset);
3620 Value *CondCopy =
Builder.CreateAnd(Algo1, LaneIdGtOffset);
3625 Builder.CreateCondBr(CondCopy, CpyThenBB, CpyElseBB);
3629 EmitRedLsCpRes = emitReductionListCopy(
3631 RemoteListAddrCast, ReduceList, IsByRef);
3634 return EmitRedLsCpRes;
3649OpenMPIRBuilder::generateReductionDescriptor(
3651 Type *DescriptorType,
3657 Value *DescriptorSize =
3658 Builder.getInt64(
M.getDataLayout().getTypeStoreSize(DescriptorType));
3660 DescriptorAddr,
M.getDataLayout().getPrefTypeAlign(DescriptorType),
3661 SrcDescriptorAddr,
M.getDataLayout().getPrefTypeAlign(DescriptorType),
3665 Value *DataPtrField;
3667 DataPtrPtrGen(
Builder.saveIP(), DescriptorAddr, DataPtrField);
3670 return GenResult.takeError();
3673 DataPtr,
Builder.getPtrTy(),
".ascast"),
3679Expected<Function *> OpenMPIRBuilder::emitListToGlobalCopyFunction(
3683 LLVMContext &Ctx =
M.getContext();
3686 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3690 "_omp_reduction_list_to_global_copy_func", &
M);
3697 Builder.SetInsertPoint(EntryBlock);
3707 BufferArg->
getName() +
".addr");
3711 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3712 Value *BufferArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3713 BufferArgAlloca,
Builder.getPtrTy(),
3714 BufferArgAlloca->
getName() +
".ascast");
3715 Value *IdxArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3716 IdxArgAlloca,
Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3717 Value *ReduceListArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3718 ReduceListArgAlloca,
Builder.getPtrTy(),
3719 ReduceListArgAlloca->
getName() +
".ascast");
3721 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3722 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3723 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3725 Value *LocalReduceList =
3727 Value *BufferArgVal =
3731 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
3732 for (
auto En :
enumerate(ReductionInfos)) {
3734 auto *RedListArrayTy =
3738 RedListArrayTy, LocalReduceList,
3739 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3745 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferArgVal, Idxs);
3747 ReductionsBufferTy, BufferVD, 0, En.index());
3749 switch (RI.EvaluationKind) {
3751 Value *TargetElement;
3753 if (IsByRef.
empty() || !IsByRef[En.index()]) {
3754 TargetElement =
Builder.CreateLoad(RI.ElementType, ElemPtr);
3756 if (RI.DataPtrPtrGen) {
3758 RI.DataPtrPtrGen(
Builder.saveIP(), ElemPtr, ElemPtr);
3761 return GenResult.takeError();
3765 TargetElement =
Builder.CreateLoad(RI.ByRefElementType, ElemPtr);
3768 Builder.CreateStore(TargetElement, GlobVal);
3772 Value *SrcRealPtr =
Builder.CreateConstInBoundsGEP2_32(
3773 RI.ElementType, ElemPtr, 0, 0,
".realp");
3775 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
3777 RI.ElementType, ElemPtr, 0, 1,
".imagp");
3779 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
3781 Value *DestRealPtr =
Builder.CreateConstInBoundsGEP2_32(
3782 RI.ElementType, GlobVal, 0, 0,
".realp");
3783 Value *DestImgPtr =
Builder.CreateConstInBoundsGEP2_32(
3784 RI.ElementType, GlobVal, 0, 1,
".imagp");
3785 Builder.CreateStore(SrcReal, DestRealPtr);
3786 Builder.CreateStore(SrcImg, DestImgPtr);
3791 Builder.getInt64(
M.getDataLayout().getTypeStoreSize(RI.ElementType));
3793 GlobVal,
M.getDataLayout().getPrefTypeAlign(RI.ElementType), ElemPtr,
3794 M.getDataLayout().getPrefTypeAlign(RI.ElementType), SizeVal,
false);
3805Expected<Function *> OpenMPIRBuilder::emitListToGlobalReduceFunction(
3809 LLVMContext &Ctx =
M.getContext();
3812 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3816 "_omp_reduction_list_to_global_reduce_func", &
M);
3823 Builder.SetInsertPoint(EntryBlock);
3833 BufferArg->
getName() +
".addr");
3837 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3838 auto *RedListArrayTy =
3843 Value *LocalReduceList =
3844 Builder.CreateAlloca(RedListArrayTy,
nullptr,
".omp.reduction.red_list");
3848 Value *BufferArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3849 BufferArgAlloca,
Builder.getPtrTy(),
3850 BufferArgAlloca->
getName() +
".ascast");
3851 Value *IdxArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3852 IdxArgAlloca,
Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3853 Value *ReduceListArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3854 ReduceListArgAlloca,
Builder.getPtrTy(),
3855 ReduceListArgAlloca->
getName() +
".ascast");
3856 Value *LocalReduceListAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3857 LocalReduceList,
Builder.getPtrTy(),
3858 LocalReduceList->
getName() +
".ascast");
3860 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3861 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3862 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3867 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
3868 for (
auto En :
enumerate(ReductionInfos)) {
3871 Value *TargetElementPtrPtr =
Builder.CreateInBoundsGEP(
3872 RedListArrayTy, LocalReduceListAddrCast,
3873 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3875 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
3877 Value *GlobValPtr =
Builder.CreateConstInBoundsGEP2_32(
3878 ReductionsBufferTy, BufferVD, 0, En.index());
3880 if (!IsByRef.
empty() && IsByRef[En.index()] && RI.DataPtrPtrGen) {
3884 Value *ByRefAlloc =
Builder.CreateAlloca(RI.ByRefAllocatedType);
3885 ByRefAlloc =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3886 ByRefAlloc,
Builder.getPtrTy(), ByRefAlloc->
getName() +
".ascast");
3893 Value *SrcElementPtrPtr =
3894 Builder.CreateInBoundsGEP(RedListArrayTy, ReduceList,
3895 {ConstantInt::get(IndexTy, 0),
3896 ConstantInt::get(IndexTy, En.index())});
3897 Value *SrcDescriptorAddr =
3902 generateReductionDescriptor(ByRefAlloc, GlobValPtr, SrcDescriptorAddr,
3903 RI.ByRefAllocatedType, RI.DataPtrPtrGen);
3906 return GenResult.takeError();
3908 Builder.CreateStore(ByRefAlloc, TargetElementPtrPtr);
3910 Builder.CreateStore(GlobValPtr, TargetElementPtrPtr);
3918 ->addFnAttr(Attribute::NoUnwind);
3924Expected<Function *> OpenMPIRBuilder::emitGlobalToListCopyFunction(
3928 LLVMContext &Ctx =
M.getContext();
3931 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3935 "_omp_reduction_global_to_list_copy_func", &
M);
3942 Builder.SetInsertPoint(EntryBlock);
3952 BufferArg->
getName() +
".addr");
3956 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3957 Value *BufferArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3958 BufferArgAlloca,
Builder.getPtrTy(),
3959 BufferArgAlloca->
getName() +
".ascast");
3960 Value *IdxArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3961 IdxArgAlloca,
Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3962 Value *ReduceListArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3963 ReduceListArgAlloca,
Builder.getPtrTy(),
3964 ReduceListArgAlloca->
getName() +
".ascast");
3965 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3966 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3967 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3969 Value *LocalReduceList =
3974 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
3975 for (
auto En :
enumerate(ReductionInfos)) {
3976 const OpenMPIRBuilder::ReductionInfo &RI = En.value();
3977 auto *RedListArrayTy =
3981 RedListArrayTy, LocalReduceList,
3982 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3987 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
3988 Value *GlobValPtr =
Builder.CreateConstInBoundsGEP2_32(
3989 ReductionsBufferTy, BufferVD, 0, En.index());
3995 if (!IsByRef.
empty() && IsByRef[En.index()]) {
4002 return GenResult.takeError();
4008 Value *TargetElement =
Builder.CreateLoad(ElemType, GlobValPtr);
4009 Builder.CreateStore(TargetElement, ElemPtr);
4013 Value *SrcRealPtr =
Builder.CreateConstInBoundsGEP2_32(
4022 Value *DestRealPtr =
Builder.CreateConstInBoundsGEP2_32(
4024 Value *DestImgPtr =
Builder.CreateConstInBoundsGEP2_32(
4026 Builder.CreateStore(SrcReal, DestRealPtr);
4027 Builder.CreateStore(SrcImg, DestImgPtr);
4034 ElemPtr,
M.getDataLayout().getPrefTypeAlign(RI.
ElementType),
4035 GlobValPtr,
M.getDataLayout().getPrefTypeAlign(RI.
ElementType),
4047Expected<Function *> OpenMPIRBuilder::emitGlobalToListReduceFunction(
4051 LLVMContext &Ctx =
M.getContext();
4054 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
4058 "_omp_reduction_global_to_list_reduce_func", &
M);
4065 Builder.SetInsertPoint(EntryBlock);
4075 BufferArg->
getName() +
".addr");
4079 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
4085 Value *LocalReduceList =
4086 Builder.CreateAlloca(RedListArrayTy,
nullptr,
".omp.reduction.red_list");
4090 Value *BufferArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4091 BufferArgAlloca,
Builder.getPtrTy(),
4092 BufferArgAlloca->
getName() +
".ascast");
4093 Value *IdxArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4094 IdxArgAlloca,
Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
4095 Value *ReduceListArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4096 ReduceListArgAlloca,
Builder.getPtrTy(),
4097 ReduceListArgAlloca->
getName() +
".ascast");
4098 Value *ReductionList =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4099 LocalReduceList,
Builder.getPtrTy(),
4100 LocalReduceList->
getName() +
".ascast");
4102 Builder.CreateStore(BufferArg, BufferArgAddrCast);
4103 Builder.CreateStore(IdxArg, IdxArgAddrCast);
4104 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
4109 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
4110 for (
auto En :
enumerate(ReductionInfos)) {
4113 Value *TargetElementPtrPtr =
Builder.CreateInBoundsGEP(
4114 RedListArrayTy, ReductionList,
4115 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
4118 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
4119 Value *GlobValPtr =
Builder.CreateConstInBoundsGEP2_32(
4120 ReductionsBufferTy, BufferVD, 0, En.index());
4122 if (!IsByRef.
empty() && IsByRef[En.index()] && RI.DataPtrPtrGen) {
4126 Value *ByRefAlloc =
Builder.CreateAlloca(RI.ByRefAllocatedType);
4127 ByRefAlloc =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4128 ByRefAlloc,
Builder.getPtrTy(), ByRefAlloc->
getName() +
".ascast");
4133 Value *ReduceListVal =
4135 Value *SrcElementPtrPtr =
4136 Builder.CreateInBoundsGEP(RedListArrayTy, ReduceListVal,
4137 {ConstantInt::get(IndexTy, 0),
4138 ConstantInt::get(IndexTy, En.index())});
4139 Value *SrcDescriptorAddr =
4144 generateReductionDescriptor(ByRefAlloc, GlobValPtr, SrcDescriptorAddr,
4145 RI.ByRefAllocatedType, RI.DataPtrPtrGen);
4147 return GenResult.takeError();
4149 Builder.CreateStore(ByRefAlloc, TargetElementPtrPtr);
4151 Builder.CreateStore(GlobValPtr, TargetElementPtrPtr);
4159 ->addFnAttr(Attribute::NoUnwind);
4165std::string OpenMPIRBuilder::getReductionFuncName(StringRef Name)
const {
4166 std::string Suffix =
4168 return (Name + Suffix).str();
4171Expected<Function *> OpenMPIRBuilder::createReductionFunction(
4174 AttributeList FuncAttrs) {
4176 {Builder.getPtrTy(), Builder.getPtrTy()},
4178 std::string
Name = getReductionFuncName(ReducerName);
4186 Builder.SetInsertPoint(EntryBB);
4190 Value *LHSArrayPtr =
nullptr;
4191 Value *RHSArrayPtr =
nullptr;
4198 Builder.CreateAlloca(Arg0Type,
nullptr, Arg0->
getName() +
".addr");
4200 Builder.CreateAlloca(Arg1Type,
nullptr, Arg1->
getName() +
".addr");
4201 Value *LHSAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4202 LHSAlloca, Arg0Type, LHSAlloca->
getName() +
".ascast");
4203 Value *RHSAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4204 RHSAlloca, Arg1Type, RHSAlloca->
getName() +
".ascast");
4205 Builder.CreateStore(Arg0, LHSAddrCast);
4206 Builder.CreateStore(Arg1, RHSAddrCast);
4207 LHSArrayPtr =
Builder.CreateLoad(Arg0Type, LHSAddrCast);
4208 RHSArrayPtr =
Builder.CreateLoad(Arg1Type, RHSAddrCast);
4212 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
4214 for (
auto En :
enumerate(ReductionInfos)) {
4217 RedArrayTy, RHSArrayPtr,
4218 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
4220 Value *RHSPtr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4221 RHSI8Ptr, RI.PrivateVariable->getType(),
4222 RHSI8Ptr->
getName() +
".ascast");
4225 RedArrayTy, LHSArrayPtr,
4226 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
4228 Value *LHSPtr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4229 LHSI8Ptr, RI.Variable->getType(), LHSI8Ptr->
getName() +
".ascast");
4238 if (!IsByRef.
empty() && !IsByRef[En.index()]) {
4239 LHS =
Builder.CreateLoad(RI.ElementType, LHSPtr);
4240 RHS =
Builder.CreateLoad(RI.ElementType, RHSPtr);
4247 return AfterIP.takeError();
4248 if (!
Builder.GetInsertBlock())
4249 return ReductionFunc;
4253 if (!IsByRef.
empty() && !IsByRef[En.index()])
4254 Builder.CreateStore(Reduced, LHSPtr);
4259 for (
auto En :
enumerate(ReductionInfos)) {
4260 unsigned Index = En.index();
4262 Value *LHSFixupPtr, *RHSFixupPtr;
4263 Builder.restoreIP(RI.ReductionGenClang(
4264 Builder.saveIP(), Index, &LHSFixupPtr, &RHSFixupPtr, ReductionFunc));
4269 LHSPtrs[Index], [ReductionFunc](
const Use &U) {
4274 RHSPtrs[Index], [ReductionFunc](
const Use &U) {
4288 return ReductionFunc;
4296 assert(RI.Variable &&
"expected non-null variable");
4297 assert(RI.PrivateVariable &&
"expected non-null private variable");
4298 assert((RI.ReductionGen || RI.ReductionGenClang) &&
4299 "expected non-null reduction generator callback");
4302 RI.Variable->getType() == RI.PrivateVariable->getType() &&
4303 "expected variables and their private equivalents to have the same "
4306 assert(RI.Variable->getType()->isPointerTy() &&
4307 "expected variables to be pointers");
4316 unsigned ReductionBufNum,
Value *SrcLocInfo) {
4330 if (ReductionInfos.
size() == 0)
4340 Builder.SetInsertPoint(InsertBlock, InsertBlock->
end());
4344 AttributeList FuncAttrs;
4345 AttrBuilder AttrBldr(Ctx);
4347 AttrBldr.addAttribute(Attr);
4348 AttrBldr.removeAttribute(Attribute::OptimizeNone);
4349 FuncAttrs = FuncAttrs.addFnAttributes(Ctx, AttrBldr);
4353 Builder.GetInsertBlock()->getParent()->getName(), ReductionInfos, IsByRef,
4355 if (!ReductionResult)
4357 Function *ReductionFunc = *ReductionResult;
4361 if (GridValue.has_value())
4362 Config.setGridValue(GridValue.value());
4377 Builder.getPtrTy(
M.getDataLayout().getProgramAddressSpace());
4381 Value *ReductionListAlloca =
4382 Builder.CreateAlloca(RedArrayTy,
nullptr,
".omp.reduction.red_list");
4383 Value *ReductionList =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4384 ReductionListAlloca, PtrTy, ReductionListAlloca->
getName() +
".ascast");
4387 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
4388 for (
auto En :
enumerate(ReductionInfos)) {
4391 RedArrayTy, ReductionList,
4392 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
4395 bool IsByRefElem = !IsByRef.
empty() && IsByRef[En.index()];
4400 Builder.CreatePointerBitCastOrAddrSpaceCast(PrivateVar, PtrTy);
4401 Builder.CreateStore(CastElem, ElemPtr);
4405 ReductionInfos, ReductionFunc, FuncAttrs, IsByRef);
4411 emitInterWarpCopyFunction(
Loc, ReductionInfos, FuncAttrs, IsByRef);
4417 Value *RL =
Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList, PtrTy);
4429 unsigned MaxDataSize = 0;
4431 for (
auto En :
enumerate(ReductionInfos)) {
4435 Type *RedTypeArg = (!IsByRef.
empty() && IsByRef[En.index()])
4436 ? En.value().ByRefElementType
4437 : En.value().ElementType;
4438 auto Size =
M.getDataLayout().getTypeStoreSize(RedTypeArg);
4439 if (
Size > MaxDataSize)
4443 Value *ReductionDataSize =
4444 Builder.getInt64(MaxDataSize * ReductionInfos.
size());
4445 if (!IsTeamsReduction) {
4446 Value *SarFuncCast =
4447 Builder.CreatePointerBitCastOrAddrSpaceCast(*SarFunc, FuncPtrTy);
4449 Builder.CreatePointerBitCastOrAddrSpaceCast(WcFunc, FuncPtrTy);
4450 Value *Args[] = {SrcLocInfo, ReductionDataSize, RL, SarFuncCast,
4453 RuntimeFunction::OMPRTL___kmpc_nvptx_parallel_reduce_nowait_v2);
4458 Ctx, ReductionTypeArgs,
"struct._globalized_locals_ty");
4460 RuntimeFunction::OMPRTL___kmpc_reduction_get_fixed_buffer);
4463 ReductionInfos, ReductionsBufferTy, FuncAttrs, IsByRef);
4468 ReductionInfos, ReductionFunc, ReductionsBufferTy, FuncAttrs, IsByRef);
4473 ReductionInfos, ReductionsBufferTy, FuncAttrs, IsByRef);
4478 ReductionInfos, ReductionFunc, ReductionsBufferTy, FuncAttrs, IsByRef);
4485 RedFixedBufferFn, {},
"_openmp_teams_reductions_buffer_$_$ptr");
4487 Value *Args3[] = {SrcLocInfo,
4488 KernelTeamsReductionPtr,
4489 Builder.getInt32(ReductionBufNum),
4500 RuntimeFunction::OMPRTL___kmpc_nvptx_teams_reduce_nowait_v2);
4517 for (
auto En :
enumerate(ReductionInfos)) {
4525 Value *LHSPtr, *RHSPtr;
4527 &LHSPtr, &RHSPtr, CurFunc));
4540 if (IsByRef.
empty() || !IsByRef[En.index()]) {
4542 "red.value." +
Twine(En.index()));
4553 if (!IsByRef.
empty() && !IsByRef[En.index()])
4558 if (ContinuationBlock) {
4559 Builder.CreateBr(ContinuationBlock);
4560 Builder.SetInsertPoint(ContinuationBlock);
4562 Config.setEmitLLVMUsed();
4573 ".omp.reduction.func", &M);
4583 Builder.SetInsertPoint(ReductionFuncBlock);
4584 Value *LHSArrayPtr =
nullptr;
4585 Value *RHSArrayPtr =
nullptr;
4596 Builder.CreateAlloca(Arg0Type,
nullptr, Arg0->
getName() +
".addr");
4598 Builder.CreateAlloca(Arg1Type,
nullptr, Arg1->
getName() +
".addr");
4599 Value *LHSAddrCast =
4600 Builder.CreatePointerBitCastOrAddrSpaceCast(LHSAlloca, Arg0Type);
4601 Value *RHSAddrCast =
4602 Builder.CreatePointerBitCastOrAddrSpaceCast(RHSAlloca, Arg1Type);
4603 Builder.CreateStore(Arg0, LHSAddrCast);
4604 Builder.CreateStore(Arg1, RHSAddrCast);
4605 LHSArrayPtr = Builder.CreateLoad(Arg0Type, LHSAddrCast);
4606 RHSArrayPtr = Builder.CreateLoad(Arg1Type, RHSAddrCast);
4608 LHSArrayPtr = ReductionFunc->
getArg(0);
4609 RHSArrayPtr = ReductionFunc->
getArg(1);
4612 unsigned NumReductions = ReductionInfos.
size();
4615 for (
auto En :
enumerate(ReductionInfos)) {
4617 Value *LHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
4618 RedArrayTy, LHSArrayPtr, 0, En.index());
4619 Value *LHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), LHSI8PtrPtr);
4620 Value *LHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
4623 Value *RHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
4624 RedArrayTy, RHSArrayPtr, 0, En.index());
4625 Value *RHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), RHSI8PtrPtr);
4626 Value *RHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
4635 Builder.restoreIP(*AfterIP);
4637 if (!Builder.GetInsertBlock())
4641 if (!IsByRef[En.index()])
4642 Builder.CreateStore(Reduced, LHSPtr);
4644 Builder.CreateRetVoid();
4651 bool IsNoWait,
bool IsTeamsReduction) {
4655 IsByRef, IsNoWait, IsTeamsReduction);
4662 if (ReductionInfos.
size() == 0)
4672 unsigned NumReductions = ReductionInfos.
size();
4675 Value *RedArray =
Builder.CreateAlloca(RedArrayTy,
nullptr,
"red.array");
4677 Builder.SetInsertPoint(InsertBlock, InsertBlock->
end());
4679 for (
auto En :
enumerate(ReductionInfos)) {
4680 unsigned Index = En.index();
4682 Value *RedArrayElemPtr =
Builder.CreateConstInBoundsGEP2_64(
4683 RedArrayTy, RedArray, 0, Index,
"red.array.elem." +
Twine(Index));
4690 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
4700 ? IdentFlag::OMP_IDENT_FLAG_ATOMIC_REDUCE
4705 unsigned RedArrayByteSize =
DL.getTypeStoreSize(RedArrayTy);
4706 Constant *RedArraySize = ConstantInt::get(IndexTy, RedArrayByteSize);
4708 Value *Lock = getOMPCriticalRegionLock(
".reduction");
4710 IsNoWait ? RuntimeFunction::OMPRTL___kmpc_reduce_nowait
4711 : RuntimeFunction::OMPRTL___kmpc_reduce);
4714 {Ident, ThreadId, NumVariables, RedArraySize,
4715 RedArray, ReductionFunc, Lock},
4726 Builder.CreateSwitch(ReduceCall, ContinuationBlock, 2);
4727 Switch->addCase(
Builder.getInt32(1), NonAtomicRedBlock);
4728 Switch->addCase(
Builder.getInt32(2), AtomicRedBlock);
4733 Builder.SetInsertPoint(NonAtomicRedBlock);
4734 for (
auto En :
enumerate(ReductionInfos)) {
4740 if (!IsByRef[En.index()]) {
4742 "red.value." +
Twine(En.index()));
4744 Value *PrivateRedValue =
4746 "red.private.value." +
Twine(En.index()));
4754 if (!
Builder.GetInsertBlock())
4757 if (!IsByRef[En.index()])
4761 IsNoWait ? RuntimeFunction::OMPRTL___kmpc_end_reduce_nowait
4762 : RuntimeFunction::OMPRTL___kmpc_end_reduce);
4764 Builder.CreateBr(ContinuationBlock);
4769 Builder.SetInsertPoint(AtomicRedBlock);
4770 if (CanGenerateAtomic &&
llvm::none_of(IsByRef, [](
bool P) {
return P; })) {
4777 if (!
Builder.GetInsertBlock())
4780 Builder.CreateBr(ContinuationBlock);
4793 if (!
Builder.GetInsertBlock())
4796 Builder.SetInsertPoint(ContinuationBlock);
4807 Directive OMPD = Directive::OMPD_master;
4812 Value *Args[] = {Ident, ThreadId};
4820 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
4831 Directive OMPD = Directive::OMPD_masked;
4837 Value *ArgsEnd[] = {Ident, ThreadId};
4845 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
4855 Call->setDoesNotThrow();
4870 bool IsInclusive,
ScanInfo *ScanRedInfo) {
4872 llvm::Error Err = emitScanBasedDirectiveDeclsIR(AllocaIP, ScanVars,
4873 ScanVarsType, ScanRedInfo);
4884 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4887 Type *DestTy = ScanVarsType[i];
4888 Value *Val =
Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
4891 Builder.CreateStore(Src, Val);
4896 Builder.GetInsertBlock()->getParent());
4899 IV = ScanRedInfo->
IV;
4902 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4905 Type *DestTy = ScanVarsType[i];
4907 Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
4909 Builder.CreateStore(Src, ScanVars[i]);
4923 Builder.GetInsertBlock()->getParent());
4928Error OpenMPIRBuilder::emitScanBasedDirectiveDeclsIR(
4932 Builder.restoreIP(AllocaIP);
4934 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4936 Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
"vla");
4943 Builder.restoreIP(CodeGenIP);
4945 Builder.CreateAdd(ScanRedInfo->
Span, Builder.getInt32(1));
4946 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4950 Value *Buff = Builder.CreateMalloc(IntPtrTy, ScanVarsType[i], Allocsize,
4951 AllocSpan,
nullptr,
"arr");
4952 Builder.CreateStore(Buff, (*(ScanRedInfo->
ScanBuffPtrs))[ScanVars[i]]);
4970 Builder.SetInsertPoint(
Builder.GetInsertBlock()->getTerminator());
4979Error OpenMPIRBuilder::emitScanBasedDirectiveFinalsIR(
4985 Value *PrivateVar = RedInfo.PrivateVariable;
4986 Value *OrigVar = RedInfo.Variable;
4990 Type *SrcTy = RedInfo.ElementType;
4995 Builder.CreateStore(Src, OrigVar);
5018 Builder.SetInsertPoint(
Builder.GetInsertBlock()->getTerminator());
5043 Builder.GetInsertBlock()->getModule(),
5050 Builder.GetInsertBlock()->getModule(),
5056 llvm::ConstantInt::get(ScanRedInfo->
Span->
getType(), 1));
5057 Builder.SetInsertPoint(InputBB);
5060 Builder.SetInsertPoint(LoopBB);
5076 Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
5078 Builder.SetInsertPoint(InnerLoopBB);
5082 Value *ReductionVal = RedInfo.PrivateVariable;
5085 Type *DestTy = RedInfo.ElementType;
5088 Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
5091 Builder.CreateInBoundsGEP(DestTy, Buff, OffsetIval,
"arrayOffset");
5096 RedInfo.ReductionGen(
Builder.saveIP(), LHS, RHS, Result);
5099 Builder.CreateStore(Result, LHSPtr);
5102 IVal, llvm::ConstantInt::get(
Builder.getInt32Ty(), 1));
5104 CmpI =
Builder.CreateICmpUGE(NextIVal, Pow2K);
5105 Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
5108 Counter, llvm::ConstantInt::get(Counter->
getType(), 1));
5114 Builder.CreateCondBr(Cmp, LoopBB, ExitBB);
5135 Error Err = emitScanBasedDirectiveFinalsIR(ReductionInfos, ScanRedInfo);
5142Error OpenMPIRBuilder::emitScanBasedDirectiveIR(
5154 Error Err = InputLoopGen();
5165 Error Err = ScanLoopGen(Builder.saveIP());
5172void OpenMPIRBuilder::createScanBBs(ScanInfo *ScanRedInfo) {
5209 Builder.SetInsertPoint(Preheader);
5212 Builder.SetInsertPoint(Header);
5213 PHINode *IndVarPHI =
Builder.CreatePHI(IndVarTy, 2,
"omp_" + Name +
".iv");
5214 IndVarPHI->
addIncoming(ConstantInt::get(IndVarTy, 0), Preheader);
5219 Builder.CreateICmpULT(IndVarPHI, TripCount,
"omp_" + Name +
".cmp");
5220 Builder.CreateCondBr(Cmp, Body, Exit);
5225 Builder.SetInsertPoint(Latch);
5227 "omp_" + Name +
".next",
true);
5238 CL->Header = Header;
5257 NextBB, NextBB, Name);
5289 Value *Start,
Value *Stop,
Value *Step,
bool IsSigned,
bool InclusiveStop,
5298 ComputeLoc, Start, Stop, Step, IsSigned, InclusiveStop, Name);
5299 ScanRedInfo->
Span = TripCount;
5305 ScanRedInfo->
IV =
IV;
5306 createScanBBs(ScanRedInfo);
5309 assert(Terminator->getNumSuccessors() == 1);
5310 BasicBlock *ContinueBlock = Terminator->getSuccessor(0);
5313 Builder.GetInsertBlock()->getParent());
5316 Builder.GetInsertBlock()->getParent());
5317 Builder.CreateBr(ContinueBlock);
5323 const auto &&InputLoopGen = [&]() ->
Error {
5325 Builder.saveIP(), BodyGen, Start, Stop, Step, IsSigned, InclusiveStop,
5326 ComputeIP, Name,
true, ScanRedInfo);
5330 Builder.restoreIP((*LoopInfo)->getAfterIP());
5336 InclusiveStop, ComputeIP, Name,
true, ScanRedInfo);
5340 Builder.restoreIP((*LoopInfo)->getAfterIP());
5344 Error Err = emitScanBasedDirectiveIR(InputLoopGen, ScanLoopGen, ScanRedInfo);
5352 bool IsSigned,
bool InclusiveStop,
const Twine &Name) {
5362 assert(IndVarTy == Stop->
getType() &&
"Stop type mismatch");
5363 assert(IndVarTy == Step->
getType() &&
"Step type mismatch");
5367 ConstantInt *Zero = ConstantInt::get(IndVarTy, 0);
5383 Incr =
Builder.CreateSelect(IsNeg,
Builder.CreateNeg(Step), Step);
5386 Span =
Builder.CreateSub(UB, LB,
"",
false,
true);
5390 Span =
Builder.CreateSub(Stop, Start,
"",
true);
5395 Value *CountIfLooping;
5396 if (InclusiveStop) {
5397 CountIfLooping =
Builder.CreateAdd(
Builder.CreateUDiv(Span, Incr), One);
5403 CountIfLooping =
Builder.CreateSelect(OneCmp, One, CountIfTwo);
5406 return Builder.CreateSelect(ZeroCmp, Zero, CountIfLooping,
5407 "omp_" + Name +
".tripcount");
5412 Value *Start,
Value *Stop,
Value *Step,
bool IsSigned,
bool InclusiveStop,
5419 ComputeLoc, Start, Stop, Step, IsSigned, InclusiveStop, Name);
5426 ScanRedInfo->
IV = IndVar;
5427 return BodyGenCB(
Builder.saveIP(), IndVar);
5433 Builder.getCurrentDebugLocation());
5444 unsigned Bitwidth = Ty->getIntegerBitWidth();
5447 M, omp::RuntimeFunction::OMPRTL___kmpc_dist_for_static_init_4u);
5450 M, omp::RuntimeFunction::OMPRTL___kmpc_dist_for_static_init_8u);
5460 unsigned Bitwidth = Ty->getIntegerBitWidth();
5463 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_4u);
5466 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_8u);
5474 assert(CLI->
isValid() &&
"Requires a valid canonical loop");
5476 "Require dedicated allocate IP");
5482 uint32_t SrcLocStrSize;
5486 case WorksharingLoopType::ForStaticLoop:
5487 Flag = OMP_IDENT_FLAG_WORK_LOOP;
5489 case WorksharingLoopType::DistributeStaticLoop:
5490 Flag = OMP_IDENT_FLAG_WORK_DISTRIBUTE;
5492 case WorksharingLoopType::DistributeForStaticLoop:
5493 Flag = OMP_IDENT_FLAG_WORK_DISTRIBUTE | OMP_IDENT_FLAG_WORK_LOOP;
5500 Type *IVTy =
IV->getType();
5501 FunctionCallee StaticInit =
5502 LoopType == WorksharingLoopType::DistributeForStaticLoop
5505 FunctionCallee StaticFini =
5509 Builder.SetInsertPoint(AllocaIP.getBlock()->getFirstNonPHIOrDbgOrAlloca());
5512 Value *PLastIter =
Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
5513 Value *PLowerBound =
Builder.CreateAlloca(IVTy,
nullptr,
"p.lowerbound");
5514 Value *PUpperBound =
Builder.CreateAlloca(IVTy,
nullptr,
"p.upperbound");
5515 Value *PStride =
Builder.CreateAlloca(IVTy,
nullptr,
"p.stride");
5524 Constant *One = ConstantInt::get(IVTy, 1);
5525 Builder.CreateStore(Zero, PLowerBound);
5527 Builder.CreateStore(UpperBound, PUpperBound);
5528 Builder.CreateStore(One, PStride);
5534 (LoopType == WorksharingLoopType::DistributeStaticLoop)
5535 ? OMPScheduleType::OrderedDistribute
5538 ConstantInt::get(I32Type,
static_cast<int>(SchedType));
5542 auto BuildInitCall = [LoopType, SrcLoc, ThreadNum, PLastIter, PLowerBound,
5543 PUpperBound, IVTy, PStride, One,
Zero, StaticInit,
5546 PLowerBound, PUpperBound});
5547 if (LoopType == WorksharingLoopType::DistributeForStaticLoop) {
5548 Value *PDistUpperBound =
5549 Builder.CreateAlloca(IVTy,
nullptr,
"p.distupperbound");
5550 Args.push_back(PDistUpperBound);
5555 BuildInitCall(SchedulingType,
Builder);
5556 if (HasDistSchedule &&
5557 LoopType != WorksharingLoopType::DistributeStaticLoop) {
5558 Constant *DistScheduleSchedType = ConstantInt::get(
5563 BuildInitCall(DistScheduleSchedType,
Builder);
5565 Value *LowerBound =
Builder.CreateLoad(IVTy, PLowerBound);
5566 Value *InclusiveUpperBound =
Builder.CreateLoad(IVTy, PUpperBound);
5567 Value *TripCountMinusOne =
Builder.CreateSub(InclusiveUpperBound, LowerBound);
5568 Value *TripCount =
Builder.CreateAdd(TripCountMinusOne, One);
5569 CLI->setTripCount(TripCount);
5575 CLI->mapIndVar([&](Instruction *OldIV) ->
Value * {
5579 return Builder.CreateAdd(OldIV, LowerBound);
5591 omp::Directive::OMPD_for,
false,
5594 return BarrierIP.takeError();
5621 Reachable.insert(
Block);
5631 Ctx, {
MDString::get(Ctx,
"llvm.loop.parallel_accesses"), AccessGroup}));
5635OpenMPIRBuilder::applyStaticChunkedWorkshareLoop(
5639 assert(CLI->
isValid() &&
"Requires a valid canonical loop");
5640 assert((ChunkSize || DistScheduleChunkSize) &&
"Chunk size is required");
5645 Type *IVTy =
IV->getType();
5647 "Max supported tripcount bitwidth is 64 bits");
5649 :
Type::getInt64Ty(Ctx);
5652 Constant *One = ConstantInt::get(InternalIVTy, 1);
5658 for (BasicBlock &BB : *
F)
5659 if (!BB.hasTerminator())
5660 UIs.
push_back(
new UnreachableInst(
F->getContext(), &BB));
5665 LoopInfo &&LI = LIA.
run(*
F,
FAM);
5666 for (Instruction *
I : UIs)
5667 I->eraseFromParent();
5670 if (ChunkSize || DistScheduleChunkSize)
5675 FunctionCallee StaticInit =
5677 FunctionCallee StaticFini =
5683 Value *PLastIter =
Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
5684 Value *PLowerBound =
5685 Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.lowerbound");
5686 Value *PUpperBound =
5687 Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.upperbound");
5688 Value *PStride =
Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.stride");
5697 ChunkSize ? ChunkSize : Zero, InternalIVTy,
"chunksize");
5698 Value *CastedDistScheduleChunkSize =
Builder.CreateZExtOrTrunc(
5699 DistScheduleChunkSize ? DistScheduleChunkSize : Zero, InternalIVTy,
5700 "distschedulechunksize");
5701 Value *CastedTripCount =
5702 Builder.CreateZExt(OrigTripCount, InternalIVTy,
"tripcount");
5705 ConstantInt::get(I32Type,
static_cast<int>(SchedType));
5707 ConstantInt::get(I32Type,
static_cast<int>(DistScheduleSchedType));
5708 Builder.CreateStore(Zero, PLowerBound);
5709 Value *OrigUpperBound =
Builder.CreateSub(CastedTripCount, One);
5710 Value *IsTripCountZero =
Builder.CreateICmpEQ(CastedTripCount, Zero);
5712 Builder.CreateSelect(IsTripCountZero, Zero, OrigUpperBound);
5713 Builder.CreateStore(UpperBound, PUpperBound);
5714 Builder.CreateStore(One, PStride);
5718 uint32_t SrcLocStrSize;
5721 if (DistScheduleSchedType != OMPScheduleType::None) {
5722 Flag |= OMP_IDENT_FLAG_WORK_DISTRIBUTE;
5727 auto BuildInitCall = [StaticInit, SrcLoc, ThreadNum, PLastIter, PLowerBound,
5728 PUpperBound, PStride, One,
5729 this](
Value *SchedulingType,
Value *ChunkSize,
5732 StaticInit, {SrcLoc, ThreadNum,
5733 SchedulingType, PLastIter,
5734 PLowerBound, PUpperBound,
5738 BuildInitCall(SchedulingType, CastedChunkSize,
Builder);
5739 if (DistScheduleSchedType != OMPScheduleType::None &&
5740 SchedType != OMPScheduleType::OrderedDistributeChunked &&
5741 SchedType != OMPScheduleType::OrderedDistribute) {
5745 BuildInitCall(DistSchedulingType, CastedDistScheduleChunkSize,
Builder);
5749 Value *FirstChunkStart =
5750 Builder.CreateLoad(InternalIVTy, PLowerBound,
"omp_firstchunk.lb");
5751 Value *FirstChunkStop =
5752 Builder.CreateLoad(InternalIVTy, PUpperBound,
"omp_firstchunk.ub");
5753 Value *FirstChunkEnd =
Builder.CreateAdd(FirstChunkStop, One);
5755 Builder.CreateSub(FirstChunkEnd, FirstChunkStart,
"omp_chunk.range");
5756 Value *NextChunkStride =
5757 Builder.CreateLoad(InternalIVTy, PStride,
"omp_dispatch.stride");
5761 Value *DispatchCounter;
5769 DispatchCounter = Counter;
5772 FirstChunkStart, CastedTripCount, NextChunkStride,
5795 Value *ChunkEnd =
Builder.CreateAdd(DispatchCounter, ChunkRange);
5796 Value *IsLastChunk =
5797 Builder.CreateICmpUGE(ChunkEnd, CastedTripCount,
"omp_chunk.is_last");
5798 Value *CountUntilOrigTripCount =
5799 Builder.CreateSub(CastedTripCount, DispatchCounter);
5801 IsLastChunk, CountUntilOrigTripCount, ChunkRange,
"omp_chunk.tripcount");
5802 Value *BackcastedChunkTC =
5803 Builder.CreateTrunc(ChunkTripCount, IVTy,
"omp_chunk.tripcount.trunc");
5804 CLI->setTripCount(BackcastedChunkTC);
5809 Value *BackcastedDispatchCounter =
5810 Builder.CreateTrunc(DispatchCounter, IVTy,
"omp_dispatch.iv.trunc");
5811 CLI->mapIndVar([&](Instruction *) ->
Value * {
5813 return Builder.CreateAdd(
IV, BackcastedDispatchCounter);
5826 return AfterIP.takeError();
5841static FunctionCallee
5844 unsigned Bitwidth = Ty->getIntegerBitWidth();
5847 case WorksharingLoopType::ForStaticLoop:
5850 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_loop_4u);
5853 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_loop_8u);
5855 case WorksharingLoopType::DistributeStaticLoop:
5858 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_static_loop_4u);
5861 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_static_loop_8u);
5863 case WorksharingLoopType::DistributeForStaticLoop:
5866 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_for_static_loop_4u);
5869 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_for_static_loop_8u);
5872 if (Bitwidth != 32 && Bitwidth != 64) {
5884 Function &LoopBodyFn,
bool NoLoop) {
5895 if (LoopType == WorksharingLoopType::DistributeStaticLoop) {
5896 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
5897 RealArgs.
push_back(ConstantInt::get(Builder.getInt8Ty(), 0));
5898 Builder.restoreIP({InsertBlock, std::prev(InsertBlock->
end())});
5903 M, omp::RuntimeFunction::OMPRTL_omp_get_num_threads);
5904 Builder.restoreIP({InsertBlock, std::prev(InsertBlock->
end())});
5908 Builder.CreateZExtOrTrunc(NumThreads, TripCountTy,
"num.threads.cast"));
5909 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
5910 if (LoopType == WorksharingLoopType::DistributeForStaticLoop) {
5911 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
5912 RealArgs.
push_back(ConstantInt::get(Builder.getInt8Ty(), NoLoop));
5914 RealArgs.
push_back(ConstantInt::get(Builder.getInt8Ty(), 0));
5938 Builder.restoreIP({Preheader, Preheader->
end()});
5941 Builder.CreateBr(CLI->
getExit());
5949 CleanUpInfo.
collectBlocks(RegionBlockSet, BlocksToBeRemoved);
5957 "Expected unique undroppable user of outlined function");
5959 assert(OutlinedFnCallInstruction &&
"Expected outlined function call");
5961 "Expected outlined function call to be located in loop preheader");
5963 if (OutlinedFnCallInstruction->
arg_size() > 1)
5970 LoopBodyArg, TripCount, OutlinedFn, NoLoop);
5972 for (
auto &ToBeDeletedItem : ToBeDeleted)
5973 ToBeDeletedItem->eraseFromParent();
5980 uint32_t SrcLocStrSize;
5984 case WorksharingLoopType::ForStaticLoop:
5985 Flag = OMP_IDENT_FLAG_WORK_LOOP;
5987 case WorksharingLoopType::DistributeStaticLoop:
5988 Flag = OMP_IDENT_FLAG_WORK_DISTRIBUTE;
5990 case WorksharingLoopType::DistributeForStaticLoop:
5991 Flag = OMP_IDENT_FLAG_WORK_DISTRIBUTE | OMP_IDENT_FLAG_WORK_LOOP;
6001 SmallVector<Instruction *, 4> ToBeDeleted;
6003 OI.OuterAllocaBB = AllocaIP.getBlock();
6026 SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet;
6028 OI.collectBlocks(ParallelRegionBlockSet, Blocks);
6030 CodeExtractorAnalysisCache CEAC(*OuterFn);
6031 CodeExtractor Extractor(Blocks,
6044 SetVector<Value *> SinkingCands, HoistingCands;
6048 Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
6055 for (
auto Use :
Users) {
6057 if (ParallelRegionBlockSet.
count(Inst->getParent())) {
6058 Inst->replaceUsesOfWith(CLI->
getIndVar(), NewLoopCntLoad);
6064 OI.ExcludeArgsFromAggregate.push_back(NewLoopCntLoad);
6071 OI.PostOutlineCB = [=, ToBeDeletedVec =
6072 std::move(ToBeDeleted)](
Function &OutlinedFn) {
6082 bool NeedsBarrier, omp::ScheduleKind SchedKind,
Value *ChunkSize,
6083 bool HasSimdModifier,
bool HasMonotonicModifier,
6084 bool HasNonmonotonicModifier,
bool HasOrderedClause,
6086 Value *DistScheduleChunkSize) {
6087 if (
Config.isTargetDevice())
6088 return applyWorkshareLoopTarget(
DL, CLI, AllocaIP, LoopType, NoLoop);
6090 SchedKind, ChunkSize, HasSimdModifier, HasMonotonicModifier,
6091 HasNonmonotonicModifier, HasOrderedClause, DistScheduleChunkSize);
6093 bool IsOrdered = (EffectiveScheduleType & OMPScheduleType::ModifierOrdered) ==
6094 OMPScheduleType::ModifierOrdered;
6096 if (HasDistSchedule) {
6097 DistScheduleSchedType = DistScheduleChunkSize
6098 ? OMPScheduleType::OrderedDistributeChunked
6099 : OMPScheduleType::OrderedDistribute;
6101 switch (EffectiveScheduleType & ~OMPScheduleType::ModifierMask) {
6102 case OMPScheduleType::BaseStatic:
6103 case OMPScheduleType::BaseDistribute:
6104 assert((!ChunkSize || !DistScheduleChunkSize) &&
6105 "No chunk size with static-chunked schedule");
6106 if (IsOrdered && !HasDistSchedule)
6107 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
6108 NeedsBarrier, ChunkSize);
6110 if (DistScheduleChunkSize)
6111 return applyStaticChunkedWorkshareLoop(
6112 DL, CLI, AllocaIP, NeedsBarrier, ChunkSize, EffectiveScheduleType,
6113 DistScheduleChunkSize, DistScheduleSchedType);
6114 return applyStaticWorkshareLoop(
DL, CLI, AllocaIP, LoopType, NeedsBarrier,
6117 case OMPScheduleType::BaseStaticChunked:
6118 case OMPScheduleType::BaseDistributeChunked:
6119 if (IsOrdered && !HasDistSchedule)
6120 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
6121 NeedsBarrier, ChunkSize);
6123 return applyStaticChunkedWorkshareLoop(
6124 DL, CLI, AllocaIP, NeedsBarrier, ChunkSize, EffectiveScheduleType,
6125 DistScheduleChunkSize, DistScheduleSchedType);
6127 case OMPScheduleType::BaseRuntime:
6128 case OMPScheduleType::BaseAuto:
6129 case OMPScheduleType::BaseGreedy:
6130 case OMPScheduleType::BaseBalanced:
6131 case OMPScheduleType::BaseSteal:
6132 case OMPScheduleType::BaseRuntimeSimd:
6134 "schedule type does not support user-defined chunk sizes");
6136 case OMPScheduleType::BaseGuidedSimd:
6137 case OMPScheduleType::BaseDynamicChunked:
6138 case OMPScheduleType::BaseGuidedChunked:
6139 case OMPScheduleType::BaseGuidedIterativeChunked:
6140 case OMPScheduleType::BaseGuidedAnalyticalChunked:
6141 case OMPScheduleType::BaseStaticBalancedChunked:
6142 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
6143 NeedsBarrier, ChunkSize);
6156 unsigned Bitwidth = Ty->getIntegerBitWidth();
6159 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_4u);
6162 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_8u);
6170static FunctionCallee
6172 unsigned Bitwidth = Ty->getIntegerBitWidth();
6175 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_4u);
6178 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_8u);
6185static FunctionCallee
6187 unsigned Bitwidth = Ty->getIntegerBitWidth();
6190 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_4u);
6193 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_8u);
6198OpenMPIRBuilder::applyDynamicWorkshareLoop(
DebugLoc DL, CanonicalLoopInfo *CLI,
6201 bool NeedsBarrier,
Value *Chunk) {
6202 assert(CLI->
isValid() &&
"Requires a valid canonical loop");
6204 "Require dedicated allocate IP");
6206 "Require valid schedule type");
6208 bool Ordered = (SchedType & OMPScheduleType::ModifierOrdered) ==
6209 OMPScheduleType::ModifierOrdered;
6214 uint32_t SrcLocStrSize;
6221 Type *IVTy =
IV->getType();
6226 Builder.SetInsertPoint(AllocaIP.getBlock()->getFirstNonPHIOrDbgOrAlloca());
6228 Value *PLastIter =
Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
6229 Value *PLowerBound =
Builder.CreateAlloca(IVTy,
nullptr,
"p.lowerbound");
6230 Value *PUpperBound =
Builder.CreateAlloca(IVTy,
nullptr,
"p.upperbound");
6231 Value *PStride =
Builder.CreateAlloca(IVTy,
nullptr,
"p.stride");
6240 Constant *One = ConstantInt::get(IVTy, 1);
6241 Builder.CreateStore(One, PLowerBound);
6243 Builder.CreateStore(UpperBound, PUpperBound);
6244 Builder.CreateStore(One, PStride);
6262 ConstantInt::get(I32Type,
static_cast<int>(SchedType));
6274 Builder.SetInsertPoint(OuterCond, OuterCond->getFirstInsertionPt());
6277 {SrcLoc, ThreadNum, PLastIter, PLowerBound, PUpperBound, PStride});
6278 Constant *Zero32 = ConstantInt::get(I32Type, 0);
6281 Builder.CreateSub(
Builder.CreateLoad(IVTy, PLowerBound), One,
"lb");
6282 Builder.CreateCondBr(MoreWork, Header, Exit);
6288 PI->setIncomingBlock(0, OuterCond);
6289 PI->setIncomingValue(0, LowerBound);
6294 Br->setSuccessor(OuterCond);
6300 UpperBound =
Builder.CreateLoad(IVTy, PUpperBound,
"ub");
6303 CI->setOperand(1, UpperBound);
6307 assert(BI->getSuccessor(1) == Exit);
6308 BI->setSuccessor(1, OuterCond);
6322 omp::Directive::OMPD_for,
false,
6325 return BarrierIP.takeError();
6344 auto HasRemainingUses = [&BBsToErase](
BasicBlock *BB) {
6349 if (BBsToErase.
count(UseInst->getParent()))
6356 while (BBsToErase.
remove_if(HasRemainingUses)) {
6367 assert(
Loops.size() >= 1 &&
"At least one loop required");
6368 size_t NumLoops =
Loops.size();
6372 return Loops.front();
6384 Loop->collectControlBlocks(OldControlBBs);
6388 if (ComputeIP.
isSet())
6395 Value *CollapsedTripCount =
nullptr;
6398 "All loops to collapse must be valid canonical loops");
6399 Value *OrigTripCount = L->getTripCount();
6400 if (!CollapsedTripCount) {
6401 CollapsedTripCount = OrigTripCount;
6406 CollapsedTripCount =
6407 Builder.CreateNUWMul(CollapsedTripCount, OrigTripCount);
6413 OrigPreheader->
getNextNode(), OrigAfter,
"collapsed");
6419 Builder.restoreIP(Result->getBodyIP());
6421 Value *Leftover = Result->getIndVar();
6423 NewIndVars.
resize(NumLoops);
6424 for (
int i = NumLoops - 1; i >= 1; --i) {
6425 Value *OrigTripCount =
Loops[i]->getTripCount();
6427 Value *NewIndVar =
Builder.CreateURem(Leftover, OrigTripCount);
6428 NewIndVars[i] = NewIndVar;
6430 Leftover =
Builder.CreateUDiv(Leftover, OrigTripCount);
6433 NewIndVars[0] = Leftover;
6442 BasicBlock *ContinueBlock = Result->getBody();
6444 auto ContinueWith = [&ContinueBlock, &ContinuePred,
DL](
BasicBlock *Dest,
6451 ContinueBlock =
nullptr;
6452 ContinuePred = NextSrc;
6459 for (
size_t i = 0; i < NumLoops - 1; ++i)
6460 ContinueWith(
Loops[i]->getBody(),
Loops[i + 1]->getHeader());
6466 for (
size_t i = NumLoops - 1; i > 0; --i)
6467 ContinueWith(
Loops[i]->getAfter(),
Loops[i - 1]->getLatch());
6470 ContinueWith(Result->getLatch(),
nullptr);
6477 for (
size_t i = 0; i < NumLoops; ++i)
6478 Loops[i]->getIndVar()->replaceAllUsesWith(NewIndVars[i]);
6492std::vector<CanonicalLoopInfo *>
6496 "Must pass as many tile sizes as there are loops");
6497 int NumLoops =
Loops.size();
6498 assert(NumLoops >= 1 &&
"At least one loop to tile required");
6510 Loop->collectControlBlocks(OldControlBBs);
6518 assert(L->isValid() &&
"All input loops must be valid canonical loops");
6519 OrigTripCounts.
push_back(L->getTripCount());
6530 for (
int i = 0; i < NumLoops - 1; ++i) {
6543 for (
int i = 0; i < NumLoops; ++i) {
6545 Value *OrigTripCount = OrigTripCounts[i];
6558 Value *FloorTripOverflow =
6559 Builder.CreateICmpNE(FloorTripRem, ConstantInt::get(IVType, 0));
6561 FloorTripOverflow =
Builder.CreateZExt(FloorTripOverflow, IVType);
6562 Value *FloorTripCount =
6563 Builder.CreateAdd(FloorCompleteTripCount, FloorTripOverflow,
6564 "omp_floor" +
Twine(i) +
".tripcount",
true);
6567 FloorCompleteCount.
push_back(FloorCompleteTripCount);
6573 std::vector<CanonicalLoopInfo *> Result;
6574 Result.reserve(NumLoops * 2);
6587 auto EmbeddNewLoop =
6588 [
this,
DL,
F, InnerEnter, &Enter, &
Continue, &OutroInsertBefore](
6591 DL, TripCount,
F, InnerEnter, OutroInsertBefore, Name);
6596 Enter = EmbeddedLoop->
getBody();
6598 OutroInsertBefore = EmbeddedLoop->
getLatch();
6599 return EmbeddedLoop;
6603 const Twine &NameBase) {
6606 EmbeddNewLoop(
P.value(), NameBase +
Twine(
P.index()));
6607 Result.push_back(EmbeddedLoop);
6611 EmbeddNewLoops(FloorCount,
"floor");
6617 for (
int i = 0; i < NumLoops; ++i) {
6621 Value *FloorIsEpilogue =
6623 Value *TileTripCount =
6630 EmbeddNewLoops(TileCounts,
"tile");
6635 for (std::pair<BasicBlock *, BasicBlock *>
P : InbetweenCode) {
6644 BodyEnter =
nullptr;
6645 BodyEntered = ExitBB;
6657 Builder.restoreIP(Result.back()->getBodyIP());
6658 for (
int i = 0; i < NumLoops; ++i) {
6661 Value *OrigIndVar = OrigIndVars[i];
6689 if (Properties.
empty())
6712 assert(
Loop->isValid() &&
"Expecting a valid CanonicalLoopInfo");
6716 assert(Latch &&
"A valid CanonicalLoopInfo must have a unique latch");
6724 if (
I.mayReadOrWriteMemory()) {
6728 I.setMetadata(LLVMContext::MD_access_group, AccessGroup);
6742 Loop->collectControlBlocks(oldControlBBs);
6747 assert(L->isValid() &&
"All input loops must be valid canonical loops");
6748 origTripCounts.
push_back(L->getTripCount());
6757 Builder.SetInsertPoint(TCBlock);
6758 Value *fusedTripCount =
nullptr;
6760 assert(L->isValid() &&
"All loops to fuse must be valid canonical loops");
6761 Value *origTripCount = L->getTripCount();
6762 if (!fusedTripCount) {
6763 fusedTripCount = origTripCount;
6766 Value *condTP =
Builder.CreateICmpSGT(fusedTripCount, origTripCount);
6767 fusedTripCount =
Builder.CreateSelect(condTP, fusedTripCount, origTripCount,
6781 for (
size_t i = 0; i <
Loops.size() - 1; ++i) {
6782 Loops[i]->getPreheader()->moveBefore(TCBlock);
6783 Loops[i]->getAfter()->moveBefore(TCBlock);
6787 for (
size_t i = 0; i <
Loops.size() - 1; ++i) {
6799 for (
size_t i = 0; i <
Loops.size(); ++i) {
6801 F->getContext(),
"omp.fused.inner.cond",
F,
Loops[i]->getBody());
6802 Builder.SetInsertPoint(condBlock);
6810 for (
size_t i = 0; i <
Loops.size() - 1; ++i) {
6811 Builder.SetInsertPoint(condBBs[i]);
6812 Builder.CreateCondBr(condValues[i],
Loops[i]->getBody(), condBBs[i + 1]);
6828 "omp.fused.pre_latch");
6861 const Twine &NamePrefix) {
6890 C, NamePrefix +
".if.then",
Cond->getParent(),
Cond->getNextNode());
6892 C, NamePrefix +
".if.else",
Cond->getParent(), CanonicalLoop->
getExit());
6895 Builder.SetInsertPoint(SplitBeforeIt);
6897 Builder.CreateCondBr(IfCond, ThenBlock, ElseBlock);
6900 spliceBB(IP, ThenBlock,
false, Builder.getCurrentDebugLocation());
6903 Builder.SetInsertPoint(ElseBlock);
6909 ExistingBlocks.
reserve(L->getNumBlocks() + 1);
6911 ExistingBlocks.
append(L->block_begin(), L->block_end());
6917 assert(LoopCond && LoopHeader &&
"Invalid loop structure");
6919 if (
Block == L->getLoopPreheader() ||
Block == L->getLoopLatch() ||
6926 if (
Block == ThenBlock)
6927 NewBB->
setName(NamePrefix +
".if.else");
6930 VMap[
Block] = NewBB;
6938 L->getLoopLatch()->splitBasicBlockBefore(
L->getLoopLatch()->begin(),
6939 NamePrefix +
".pre_latch");
6943 L->addBasicBlockToLoop(ThenBlock, LI);
6949 if (TargetTriple.
isX86()) {
6950 if (Features.
lookup(
"avx512f"))
6952 else if (Features.
lookup(
"avx"))
6956 if (TargetTriple.
isPPC())
6958 if (TargetTriple.
isWasm())
6965 Value *IfCond, OrderKind Order,
6975 if (!BB.hasTerminator())
6991 I->eraseFromParent();
6994 if (AlignedVars.
size()) {
6996 for (
auto &AlignedItem : AlignedVars) {
6997 Value *AlignedPtr = AlignedItem.first;
6998 Value *Alignment = AlignedItem.second;
7001 Builder.CreateAlignmentAssumption(
F->getDataLayout(), AlignedPtr,
7009 createIfVersion(CanonicalLoop, IfCond, VMap, LIA, LI, L,
"simd");
7022 Reachable.insert(
Block);
7032 if ((Safelen ==
nullptr) || (Order == OrderKind::OMP_ORDER_concurrent))
7048 Ctx, {
MDString::get(Ctx,
"llvm.loop.vectorize.enable"), BoolConst}));
7050 if (Simdlen || Safelen) {
7054 ConstantInt *VectorizeWidth = Simdlen ==
nullptr ? Safelen : Simdlen;
7080static std::unique_ptr<TargetMachine>
7084 StringRef CPU =
F->getFnAttribute(
"target-cpu").getValueAsString();
7085 StringRef Features =
F->getFnAttribute(
"target-features").getValueAsString();
7096 std::nullopt, OptLevel));
7114 if (!BB.hasTerminator())
7127 [&](
const Function &
F) {
return TM->getTargetTransformInfo(
F); });
7128 FAM.registerPass([&]() {
return TIRA; });
7142 I->eraseFromParent();
7145 assert(L &&
"Expecting CanonicalLoopInfo to be recognized as a loop");
7150 nullptr, ORE,
static_cast<int>(OptLevel),
7171 <<
" Threshold=" << UP.
Threshold <<
"\n"
7174 <<
" PartialOptSizeThreshold="
7194 Ptr = Load->getPointerOperand();
7196 Ptr = Store->getPointerOperand();
7203 if (Alloca->getParent() == &
F->getEntryBlock())
7223 int MaxTripCount = 0;
7224 bool MaxOrZero =
false;
7225 unsigned TripMultiple = 0;
7228 MaxTripCount, MaxOrZero, TripMultiple, UCE, UP, PP);
7229 unsigned Factor = UP.
Count;
7230 LLVM_DEBUG(
dbgs() <<
"Suggesting unroll factor of " << Factor <<
"\n");
7241 assert(Factor >= 0 &&
"Unroll factor must not be negative");
7257 Ctx, {
MDString::get(Ctx,
"llvm.loop.unroll.count"), FactorConst}));
7270 *UnrolledCLI =
Loop;
7275 "unrolling only makes sense with a factor of 2 or larger");
7277 Type *IndVarTy =
Loop->getIndVarType();
7284 std::vector<CanonicalLoopInfo *>
LoopNest =
7299 Ctx, {
MDString::get(Ctx,
"llvm.loop.unroll.count"), FactorConst})});
7302 (*UnrolledCLI)->assertOK();
7320 Value *Args[] = {Ident, ThreadId, BufSize, CpyBuf, CpyFn, DidItLD};
7339 if (!CPVars.
empty()) {
7344 Directive OMPD = Directive::OMPD_single;
7349 Value *Args[] = {Ident, ThreadId};
7358 if (
Error Err = FiniCB(IP))
7379 EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCBWrapper,
7386 for (
size_t I = 0, E = CPVars.
size();
I < E; ++
I)
7389 ConstantInt::get(Int64, 0), CPVars[
I],
7392 }
else if (!IsNowait) {
7395 omp::Directive::OMPD_unknown,
false,
7410 Directive OMPD = Directive::OMPD_critical;
7415 Value *LockVar = getOMPCriticalRegionLock(CriticalName);
7416 Value *Args[] = {Ident, ThreadId, LockVar};
7433 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
7441 const Twine &Name,
bool IsDependSource) {
7445 "OpenMP runtime requires depend vec with i64 type");
7458 for (
unsigned I = 0;
I < NumLoops; ++
I) {
7472 Value *Args[] = {Ident, ThreadId, DependBaseAddrGEP};
7490 Directive OMPD = Directive::OMPD_ordered;
7499 Value *Args[] = {Ident, ThreadId};
7509 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
7516 bool HasFinalize,
bool IsCancellable) {
7523 BasicBlock *EntryBB = Builder.GetInsertBlock();
7532 emitCommonDirectiveEntry(OMPD, EntryCall, ExitBB, Conditional);
7543 "Unexpected control flow graph state!!");
7545 emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize);
7547 return AfterIP.takeError();
7552 "Unexpected Insertion point location!");
7555 auto InsertBB = merged ? ExitPredBB : ExitBB;
7558 Builder.SetInsertPoint(InsertBB);
7560 return Builder.saveIP();
7564 Directive OMPD,
Value *EntryCall, BasicBlock *ExitBB,
bool Conditional) {
7566 if (!Conditional || !EntryCall)
7572 auto *UI =
new UnreachableInst(
Builder.getContext(), ThenBB);
7582 Builder.CreateCondBr(CallBool, ThenBB, ExitBB);
7586 UI->eraseFromParent();
7594 omp::Directive OMPD,
InsertPointTy FinIP, Instruction *ExitCall,
7602 "Unexpected finalization stack state!");
7605 assert(Fi.DK == OMPD &&
"Unexpected Directive for Finalization call!");
7607 if (
Error Err = Fi.mergeFiniBB(
Builder, FinIP.getBlock()))
7608 return std::move(Err);
7612 Builder.SetInsertPoint(FinIP.getBlock()->getTerminator());
7622 return IRBuilder<>::InsertPoint(ExitCall->
getParent(),
7656 "copyin.not.master.end");
7663 Builder.SetInsertPoint(OMP_Entry);
7664 Value *MasterPtr =
Builder.CreatePtrToInt(MasterAddr, IntPtrTy);
7665 Value *PrivatePtr =
Builder.CreatePtrToInt(PrivateAddr, IntPtrTy);
7666 Value *cmp =
Builder.CreateICmpNE(MasterPtr, PrivatePtr);
7667 Builder.CreateCondBr(cmp, CopyBegin, CopyEnd);
7669 Builder.SetInsertPoint(CopyBegin);
7687 Value *Args[] = {ThreadId,
Size, Allocator};
7710 return Builder.CreateCall(Fn, Args, Name);
7724 Value *Args[] = {ThreadId, Addr, Allocator};
7732 Value *DependenceAddress,
bool HaveNowaitClause) {
7740 if (Device ==
nullptr)
7742 Constant *InteropTypeVal = ConstantInt::get(Int32, (
int)InteropType);
7743 if (NumDependences ==
nullptr) {
7744 NumDependences = ConstantInt::get(Int32, 0);
7748 Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause);
7750 Ident, ThreadId, InteropVar, InteropTypeVal,
7751 Device, NumDependences, DependenceAddress, HaveNowaitClauseVal};
7760 Value *NumDependences,
Value *DependenceAddress,
bool HaveNowaitClause) {
7768 if (Device ==
nullptr)
7770 if (NumDependences ==
nullptr) {
7771 NumDependences = ConstantInt::get(Int32, 0);
7775 Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause);
7777 Ident, ThreadId, InteropVar, Device,
7778 NumDependences, DependenceAddress, HaveNowaitClauseVal};
7787 Value *NumDependences,
7788 Value *DependenceAddress,
7789 bool HaveNowaitClause) {
7796 if (Device ==
nullptr)
7798 if (NumDependences ==
nullptr) {
7799 NumDependences = ConstantInt::get(Int32, 0);
7803 Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause);
7805 Ident, ThreadId, InteropVar, Device,
7806 NumDependences, DependenceAddress, HaveNowaitClauseVal};
7836 assert(!Attrs.MaxThreads.empty() && !Attrs.MaxTeams.empty() &&
7837 "expected num_threads and num_teams to be specified");
7856 const std::string DebugPrefix =
"_debug__";
7857 if (KernelName.
ends_with(DebugPrefix)) {
7858 KernelName = KernelName.
drop_back(DebugPrefix.length());
7859 Kernel =
M.getFunction(KernelName);
7865 if (Attrs.MinTeams > 1 || Attrs.MaxTeams.front() > 0)
7870 int32_t MaxThreadsVal = Attrs.MaxThreads.front();
7871 if (MaxThreadsVal < 0) {
7877 MaxThreadsVal = Attrs.MinThreads;
7881 if (MaxThreadsVal > 0)
7894 omp::RuntimeFunction::OMPRTL___kmpc_target_init);
7897 Twine DynamicEnvironmentName = KernelName +
"_dynamic_environment";
7898 Constant *DynamicEnvironmentInitializer =
7902 DynamicEnvironmentInitializer, DynamicEnvironmentName,
7904 DL.getDefaultGlobalsAddressSpace());
7908 DynamicEnvironmentGV->
getType() == DynamicEnvironmentPtr
7909 ? DynamicEnvironmentGV
7911 DynamicEnvironmentPtr);
7914 ConfigurationEnvironment, {
7915 UseGenericStateMachineVal,
7916 MayUseNestedParallelismVal,
7923 ReductionBufferLength,
7926 KernelEnvironment, {
7927 ConfigurationEnvironmentInitializer,
7931 std::string KernelEnvironmentName =
7932 (KernelName +
"_kernel_environment").str();
7935 KernelEnvironmentInitializer, KernelEnvironmentName,
7937 DL.getDefaultGlobalsAddressSpace());
7941 KernelEnvironmentGV->
getType() == KernelEnvironmentPtr
7942 ? KernelEnvironmentGV
7944 KernelEnvironmentPtr);
7945 Value *KernelLaunchEnvironment =
7948 KernelLaunchEnvironment =
7949 KernelLaunchEnvironment->
getType() == KernelLaunchEnvParamTy
7950 ? KernelLaunchEnvironment
7951 :
Builder.CreateAddrSpaceCast(KernelLaunchEnvironment,
7952 KernelLaunchEnvParamTy);
7954 Fn, {KernelEnvironment, KernelLaunchEnvironment});
7966 auto *UI =
Builder.CreateUnreachable();
7972 Builder.SetInsertPoint(WorkerExitBB);
7976 Builder.SetInsertPoint(CheckBBTI);
7977 Builder.CreateCondBr(ExecUserCode, UI->getParent(), WorkerExitBB);
7979 CheckBBTI->eraseFromParent();
7980 UI->eraseFromParent();
7988 int32_t TeamsReductionDataSize,
7989 int32_t TeamsReductionBufferLength) {
7994 omp::RuntimeFunction::OMPRTL___kmpc_target_deinit);
7998 if (!TeamsReductionBufferLength || !TeamsReductionDataSize)
8004 const std::string DebugPrefix =
"_debug__";
8006 KernelName = KernelName.
drop_back(DebugPrefix.length());
8007 auto *KernelEnvironmentGV =
8008 M.getNamedGlobal((KernelName +
"_kernel_environment").str());
8009 assert(KernelEnvironmentGV &&
"Expected kernel environment global\n");
8010 auto *KernelEnvironmentInitializer = KernelEnvironmentGV->getInitializer();
8012 KernelEnvironmentInitializer,
8013 ConstantInt::get(Int32, TeamsReductionDataSize), {0, 7});
8015 NewInitializer, ConstantInt::get(Int32, TeamsReductionBufferLength),
8017 KernelEnvironmentGV->setInitializer(NewInitializer);
8022 if (
Kernel.hasFnAttribute(Name)) {
8023 int32_t OldLimit =
Kernel.getFnAttributeAsParsedInteger(Name);
8029std::pair<int32_t, int32_t>
8031 int32_t ThreadLimit =
8032 Kernel.getFnAttributeAsParsedInteger(
"omp_target_thread_limit");
8035 const auto &Attr =
Kernel.getFnAttribute(
"amdgpu-flat-work-group-size");
8036 if (!Attr.isValid() || !Attr.isStringAttribute())
8037 return {0, ThreadLimit};
8038 auto [LBStr, UBStr] = Attr.getValueAsString().split(
',');
8041 return {0, ThreadLimit};
8042 UB = ThreadLimit ? std::min(ThreadLimit, UB) : UB;
8050 return {0, ThreadLimit ? std::min(ThreadLimit, UB) : UB};
8052 return {0, ThreadLimit};
8058 Kernel.addFnAttr(
"omp_target_thread_limit", std::to_string(UB));
8061 Kernel.addFnAttr(
"amdgpu-flat-work-group-size",
8069std::pair<int32_t, int32_t>
8072 return {0,
Kernel.getFnAttributeAsParsedInteger(
"omp_target_num_teams")};
8076 int32_t LB, int32_t UB) {
8083 Kernel.addFnAttr(
"omp_target_num_teams", std::to_string(LB));
8086void OpenMPIRBuilder::setOutlinedTargetRegionFunctionAttributes(
8095 else if (
T.isNVPTX())
8097 else if (
T.isSPIRV())
8102Constant *OpenMPIRBuilder::createOutlinedFunctionID(Function *OutlinedFn,
8103 StringRef EntryFnIDName) {
8104 if (
Config.isTargetDevice()) {
8105 assert(OutlinedFn &&
"The outlined function must exist if embedded");
8109 return new GlobalVariable(
8114Constant *OpenMPIRBuilder::createTargetRegionEntryAddr(Function *OutlinedFn,
8115 StringRef EntryFnName) {
8119 assert(!
M.getGlobalVariable(EntryFnName,
true) &&
8120 "Named kernel already exists?");
8121 return new GlobalVariable(
8134 if (
Config.isTargetDevice() || !
Config.openMPOffloadMandatory()) {
8138 OutlinedFn = *CBResult;
8140 OutlinedFn =
nullptr;
8146 if (!IsOffloadEntry)
8149 std::string EntryFnIDName =
8151 ? std::string(EntryFnName)
8155 EntryFnName, EntryFnIDName);
8163 setOutlinedTargetRegionFunctionAttributes(OutlinedFn);
8164 auto OutlinedFnID = createOutlinedFunctionID(OutlinedFn, EntryFnIDName);
8165 auto EntryAddr = createTargetRegionEntryAddr(OutlinedFn, EntryFnName);
8167 EntryInfo, EntryAddr, OutlinedFnID,
8169 return OutlinedFnID;
8186 bool IsStandAlone = !BodyGenCB;
8193 MapInfo = &GenMapInfoCB(
Builder.saveIP());
8195 AllocaIP,
Builder.saveIP(), *MapInfo, Info, CustomMapperCB,
8196 true, DeviceAddrCB))
8203 Value *PointerNum =
Builder.getInt32(Info.NumberOfPtrs);
8213 SrcLocInfo, DeviceID,
8220 assert(MapperFunc &&
"MapperFunc missing for standalone target data");
8224 if (Info.HasNoWait) {
8234 if (Info.HasNoWait) {
8238 emitBlock(OffloadContBlock, CurFn,
true);
8244 bool RequiresOuterTargetTask = Info.HasNoWait;
8245 if (!RequiresOuterTargetTask)
8246 cantFail(TaskBodyCB(
nullptr,
nullptr,
8250 {}, RTArgs, Info.HasNoWait));
8253 omp::OMPRTL___tgt_target_data_begin_mapper);
8257 for (
auto DeviceMap : Info.DevicePtrInfoMap) {
8261 Builder.CreateStore(LI, DeviceMap.second.second);
8297 Value *PointerNum =
Builder.getInt32(Info.NumberOfPtrs);
8306 Value *OffloadingArgs[] = {SrcLocInfo, DeviceID,
8328 return emitIfClause(IfCond, BeginThenGen, BeginElseGen, AllocaIP);
8329 return BeginThenGen(AllocaIP,
Builder.saveIP());
8344 return emitIfClause(IfCond, EndThenGen, EndElseGen, AllocaIP);
8345 return EndThenGen(AllocaIP,
Builder.saveIP());
8348 return emitIfClause(IfCond, BeginThenGen, EndElseGen, AllocaIP);
8349 return BeginThenGen(AllocaIP,
Builder.saveIP());
8360 bool IsGPUDistribute) {
8361 assert((IVSize == 32 || IVSize == 64) &&
8362 "IV size is not compatible with the omp runtime");
8364 if (IsGPUDistribute)
8366 ? (IVSigned ? omp::OMPRTL___kmpc_distribute_static_init_4
8367 : omp::OMPRTL___kmpc_distribute_static_init_4u)
8368 : (IVSigned ? omp::OMPRTL___kmpc_distribute_static_init_8
8369 : omp::OMPRTL___kmpc_distribute_static_init_8u);
8371 Name = IVSize == 32 ? (IVSigned ? omp::OMPRTL___kmpc_for_static_init_4
8372 : omp::OMPRTL___kmpc_for_static_init_4u)
8373 : (IVSigned ? omp::OMPRTL___kmpc_for_static_init_8
8374 : omp::OMPRTL___kmpc_for_static_init_8u);
8381 assert((IVSize == 32 || IVSize == 64) &&
8382 "IV size is not compatible with the omp runtime");
8384 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_init_4
8385 : omp::OMPRTL___kmpc_dispatch_init_4u)
8386 : (IVSigned ? omp::OMPRTL___kmpc_dispatch_init_8
8387 : omp::OMPRTL___kmpc_dispatch_init_8u);
8394 assert((IVSize == 32 || IVSize == 64) &&
8395 "IV size is not compatible with the omp runtime");
8397 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_next_4
8398 : omp::OMPRTL___kmpc_dispatch_next_4u)
8399 : (IVSigned ? omp::OMPRTL___kmpc_dispatch_next_8
8400 : omp::OMPRTL___kmpc_dispatch_next_8u);
8407 assert((IVSize == 32 || IVSize == 64) &&
8408 "IV size is not compatible with the omp runtime");
8410 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_fini_4
8411 : omp::OMPRTL___kmpc_dispatch_fini_4u)
8412 : (IVSigned ? omp::OMPRTL___kmpc_dispatch_fini_8
8413 : omp::OMPRTL___kmpc_dispatch_fini_8u);
8424 DenseMap<
Value *, std::tuple<Value *, unsigned>> &ValueReplacementMap) {
8432 auto GetUpdatedDIVariable = [&](
DILocalVariable *OldVar,
unsigned arg) {
8436 if (NewVar && (arg == NewVar->
getArg()))
8446 auto UpdateDebugRecord = [&](
auto *DR) {
8449 for (
auto Loc : DR->location_ops()) {
8450 auto Iter = ValueReplacementMap.find(
Loc);
8451 if (Iter != ValueReplacementMap.end()) {
8452 DR->replaceVariableLocationOp(
Loc, std::get<0>(Iter->second));
8453 ArgNo = std::get<1>(Iter->second) + 1;
8457 DR->setVariable(GetUpdatedDIVariable(OldVar, ArgNo));
8462 if (DVR->getNumVariableLocationOps() != 1u) {
8463 DVR->setKillLocation();
8466 Value *
Loc = DVR->getVariableLocationOp(0u);
8473 RequiredBB = &DVR->getFunction()->getEntryBlock();
8475 if (RequiredBB && RequiredBB != CurBB) {
8487 "Unexpected debug intrinsic");
8489 UpdateDebugRecord(&DVR);
8490 MoveDebugRecordToCorrectBlock(&DVR);
8493 for (
auto *DVR : DVRsToDelete)
8494 DVR->getMarker()->MarkedInstr->dropOneDbgRecord(DVR);
8498 Module *M = Func->getParent();
8501 DB.createQualifiedType(dwarf::DW_TAG_pointer_type,
nullptr);
8502 unsigned ArgNo = Func->arg_size();
8504 NewSP,
"dyn_ptr", ArgNo, NewSP->
getFile(), 0, VoidPtrTy,
8505 false, DINode::DIFlags::FlagArtificial);
8507 Argument *LastArg = Func->getArg(Func->arg_size() - 1);
8508 DB.insertDeclare(LastArg, Var, DB.createExpression(),
Loc,
8529 for (
auto &Arg : Inputs)
8530 ParameterTypes.
push_back(Arg->getType()->isPointerTy()
8534 for (
auto &Arg : Inputs)
8535 ParameterTypes.
push_back(Arg->getType());
8543 auto BB = Builder.GetInsertBlock();
8544 auto M = BB->getModule();
8555 if (TargetCpuAttr.isStringAttribute())
8556 Func->addFnAttr(TargetCpuAttr);
8558 auto TargetFeaturesAttr = ParentFn->
getFnAttribute(
"target-features");
8559 if (TargetFeaturesAttr.isStringAttribute())
8560 Func->addFnAttr(TargetFeaturesAttr);
8565 OMPBuilder.
emitUsed(
"llvm.compiler.used", {ExecMode});
8576 Builder.SetInsertPoint(EntryBB);
8582 BasicBlock *UserCodeEntryBB = Builder.GetInsertBlock();
8592 splitBB(Builder,
true,
"outlined.body");
8598 Builder.restoreIP(*AfterIP);
8603 Builder.CreateRetVoid();
8607 auto AllocaIP = Builder.saveIP();
8612 const auto &ArgRange =
make_range(Func->arg_begin(), Func->arg_end() - 1);
8644 if (Instr->getFunction() == Func)
8645 Instr->replaceUsesOfWith(
Input, InputCopy);
8651 for (
auto InArg :
zip(Inputs, ArgRange)) {
8653 Argument &Arg = std::get<1>(InArg);
8654 Value *InputCopy =
nullptr;
8657 ArgAccessorFuncCB(Arg,
Input, InputCopy, AllocaIP, Builder.saveIP());
8660 Builder.restoreIP(*AfterIP);
8661 ValueReplacementMap[
Input] = std::make_tuple(InputCopy, Arg.
getArgNo());
8681 DeferredReplacement.push_back(std::make_pair(
Input, InputCopy));
8688 ReplaceValue(
Input, InputCopy, Func);
8692 for (
auto Deferred : DeferredReplacement)
8693 ReplaceValue(std::get<0>(Deferred), std::get<1>(Deferred), Func);
8696 ValueReplacementMap);
8704 Value *TaskWithPrivates,
8705 Type *TaskWithPrivatesTy) {
8707 Type *TaskTy = OMPIRBuilder.Task;
8710 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskWithPrivates, 0);
8711 Value *Shareds = TaskT;
8721 if (TaskWithPrivatesTy != TaskTy)
8722 Shareds = Builder.CreateStructGEP(TaskTy, TaskT, 0);
8739 const size_t NumOffloadingArrays,
const int SharedArgsOperandNo) {
8744 assert((!NumOffloadingArrays || PrivatesTy) &&
8745 "PrivatesTy cannot be nullptr when there are offloadingArrays"
8778 Type *TaskPtrTy = OMPBuilder.TaskPtr;
8779 [[maybe_unused]]
Type *TaskTy = OMPBuilder.Task;
8785 ".omp_target_task_proxy_func",
8786 Builder.GetInsertBlock()->getModule());
8787 Value *ThreadId = ProxyFn->getArg(0);
8788 Value *TaskWithPrivates = ProxyFn->getArg(1);
8789 ThreadId->
setName(
"thread.id");
8790 TaskWithPrivates->
setName(
"task");
8792 bool HasShareds = SharedArgsOperandNo > 0;
8793 bool HasOffloadingArrays = NumOffloadingArrays > 0;
8796 Builder.SetInsertPoint(EntryBB);
8802 if (HasOffloadingArrays) {
8803 assert(TaskTy != TaskWithPrivatesTy &&
8804 "If there are offloading arrays to pass to the target"
8805 "TaskTy cannot be the same as TaskWithPrivatesTy");
8808 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskWithPrivates, 1);
8809 for (
unsigned int i = 0; i < NumOffloadingArrays; ++i)
8811 Builder.CreateStructGEP(PrivatesTy, Privates, i));
8815 auto *ArgStructAlloca =
8817 assert(ArgStructAlloca &&
8818 "Unable to find the alloca instruction corresponding to arguments "
8819 "for extracted function");
8821 std::optional<TypeSize> ArgAllocSize =
8823 assert(ArgStructType && ArgAllocSize &&
8824 "Unable to determine size of arguments for extracted function");
8825 uint64_t StructSize = ArgAllocSize->getFixedValue();
8828 Builder.CreateAlloca(ArgStructType,
nullptr,
"structArg");
8830 Value *SharedsSize = Builder.getInt64(StructSize);
8833 OMPBuilder, Builder, TaskWithPrivates, TaskWithPrivatesTy);
8835 Builder.CreateMemCpy(
8836 NewArgStructAlloca, NewArgStructAlloca->
getAlign(), LoadShared,
8838 KernelLaunchArgs.
push_back(NewArgStructAlloca);
8841 Builder.CreateRetVoid();
8847 return GEP->getSourceElementType();
8849 return Alloca->getAllocatedType();
8872 if (OffloadingArraysToPrivatize.
empty())
8873 return OMPIRBuilder.Task;
8876 for (
Value *V : OffloadingArraysToPrivatize) {
8877 assert(V->getType()->isPointerTy() &&
8878 "Expected pointer to array to privatize. Got a non-pointer value "
8881 assert(ArrayTy &&
"ArrayType cannot be nullptr");
8887 "struct.task_with_privates");
8901 EntryFnName, Inputs, CBFunc,
8906 EntryInfo, GenerateOutlinedFunction, IsOffloadEntry, OutlinedFn,
9043 TargetTaskAllocaBB->
begin());
9047 OI.
EntryBB = TargetTaskAllocaBB;
9053 Builder, AllocaIP, ToBeDeleted, TargetTaskAllocaIP,
"global.tid",
false));
9056 Builder.restoreIP(TargetTaskBodyIP);
9057 if (
Error Err = TaskBodyCB(DeviceID, RTLoc, TargetTaskAllocaIP))
9075 bool NeedsTargetTask = HasNoWait && DeviceID;
9076 if (NeedsTargetTask) {
9082 OffloadingArraysToPrivatize.
push_back(V);
9087 OI.
PostOutlineCB = [
this, ToBeDeleted, Dependencies, NeedsTargetTask,
9088 DeviceID, OffloadingArraysToPrivatize](
9091 "there must be a single user for the outlined function");
9105 const unsigned int NumStaleCIArgs = StaleCI->
arg_size();
9106 bool HasShareds = NumStaleCIArgs > OffloadingArraysToPrivatize.
size() + 1;
9108 NumStaleCIArgs == (OffloadingArraysToPrivatize.
size() + 2)) &&
9109 "Wrong number of arguments for StaleCI when shareds are present");
9110 int SharedArgOperandNo =
9111 HasShareds ? OffloadingArraysToPrivatize.
size() + 1 : 0;
9117 if (!OffloadingArraysToPrivatize.
empty())
9122 *
this,
Builder, StaleCI, PrivatesTy, TaskWithPrivatesTy,
9123 OffloadingArraysToPrivatize.
size(), SharedArgOperandNo);
9125 LLVM_DEBUG(
dbgs() <<
"Proxy task entry function created: " << *ProxyFn
9128 Builder.SetInsertPoint(StaleCI);
9145 OMPRTL___kmpc_omp_target_task_alloc);
9157 M.getDataLayout().getTypeStoreSize(TaskWithPrivatesTy));
9164 auto *ArgStructAlloca =
9166 assert(ArgStructAlloca &&
9167 "Unable to find the alloca instruction corresponding to arguments "
9168 "for extracted function");
9169 std::optional<TypeSize> ArgAllocSize =
9172 "Unable to determine size of arguments for extracted function");
9173 SharedsSize =
Builder.getInt64(ArgAllocSize->getFixedValue());
9192 TaskSize, SharedsSize,
9195 if (NeedsTargetTask) {
9196 assert(DeviceID &&
"Expected non-empty device ID.");
9206 *
this,
Builder, TaskData, TaskWithPrivatesTy);
9207 Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment,
9210 if (!OffloadingArraysToPrivatize.
empty()) {
9212 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskData, 1);
9213 for (
unsigned int i = 0; i < OffloadingArraysToPrivatize.
size(); ++i) {
9214 Value *PtrToPrivatize = OffloadingArraysToPrivatize[i];
9221 "ElementType should match ArrayType");
9224 Value *Dst =
Builder.CreateStructGEP(PrivatesTy, Privates, i);
9226 Dst, Alignment, PtrToPrivatize, Alignment,
9227 Builder.getInt64(
M.getDataLayout().getTypeStoreSize(ElementType)));
9231 Value *DepArray =
nullptr;
9232 Value *NumDeps =
nullptr;
9235 NumDeps = Dependencies.
NumDeps;
9236 }
else if (!Dependencies.
Deps.empty()) {
9238 NumDeps =
Builder.getInt32(Dependencies.
Deps.size());
9249 if (!NeedsTargetTask) {
9258 ConstantInt::get(
Builder.getInt32Ty(), 0),
9271 }
else if (DepArray) {
9279 {Ident, ThreadID, TaskData, NumDeps, DepArray,
9280 ConstantInt::get(
Builder.getInt32Ty(), 0),
9290 I->eraseFromParent();
9295 << *(
Builder.GetInsertBlock()) <<
"\n");
9297 << *(
Builder.GetInsertBlock()->getParent()->getParent())
9309 CustomMapperCB, IsNonContiguous, DeviceAddrCB))
9326 bool HasNoWait,
Value *DynCGroupMem,
9333 Builder.restoreIP(IP);
9339 return Builder.saveIP();
9342 bool HasDependencies = !Dependencies.
empty();
9343 bool RequiresOuterTargetTask = HasNoWait || HasDependencies;
9360 if (OutlinedFnID && DeviceID)
9362 EmitTargetCallFallbackCB, KArgs,
9363 DeviceID, RTLoc, TargetTaskAllocaIP);
9371 return EmitTargetCallFallbackCB(OMPBuilder.
Builder.
saveIP());
9378 auto &&EmitTargetCallElse =
9384 if (RequiresOuterTargetTask) {
9391 Dependencies, EmptyRTArgs, HasNoWait);
9393 return EmitTargetCallFallbackCB(Builder.saveIP());
9396 Builder.restoreIP(AfterIP);
9400 auto &&EmitTargetCallThen =
9403 Info.HasNoWait = HasNoWait;
9408 AllocaIP, Builder.saveIP(), Info, RTArgs, MapInfo, CustomMapperCB,
9414 for (
auto [DefaultVal, RuntimeVal] :
9416 NumTeamsC.
push_back(RuntimeVal ? RuntimeVal
9417 : Builder.getInt32(DefaultVal));
9421 auto InitMaxThreadsClause = [&Builder](
Value *
Clause) {
9423 Clause = Builder.CreateIntCast(
Clause, Builder.getInt32Ty(),
9427 auto CombineMaxThreadsClauses = [&Builder](
Value *
Clause,
Value *&Result) {
9430 Result ? Builder.CreateSelect(Builder.CreateICmpULT(Result,
Clause),
9438 Value *MaxThreadsClause =
9440 ? InitMaxThreadsClause(RuntimeAttrs.
MaxThreads)
9443 for (
auto [TeamsVal, TargetVal] :
zip_equal(
9445 Value *TeamsThreadLimitClause = InitMaxThreadsClause(TeamsVal);
9446 Value *NumThreads = InitMaxThreadsClause(TargetVal);
9448 CombineMaxThreadsClauses(TeamsThreadLimitClause, NumThreads);
9449 CombineMaxThreadsClauses(MaxThreadsClause, NumThreads);
9451 NumThreadsC.
push_back(NumThreads ? NumThreads : Builder.getInt32(0));
9454 unsigned NumTargetItems = Info.NumberOfPtrs;
9462 Builder.getInt64Ty(),
9464 : Builder.getInt64(0);
9468 DynCGroupMem = Builder.getInt32(0);
9471 NumTargetItems, RTArgs, TripCount, NumTeamsC, NumThreadsC, DynCGroupMem,
9472 HasNoWait, DynCGroupMemFallback);
9479 if (RequiresOuterTargetTask)
9481 RTLoc, AllocaIP, Dependencies,
9482 KArgs.
RTArgs, Info.HasNoWait);
9485 Builder, OutlinedFnID, EmitTargetCallFallbackCB, KArgs,
9486 RuntimeAttrs.
DeviceID, RTLoc, AllocaIP);
9489 Builder.restoreIP(AfterIP);
9496 if (!OutlinedFnID) {
9497 cantFail(EmitTargetCallElse(AllocaIP, Builder.saveIP()));
9503 cantFail(EmitTargetCallThen(AllocaIP, Builder.saveIP()));
9508 EmitTargetCallElse, AllocaIP));
9521 bool HasNowait,
Value *DynCGroupMem,
9535 *
this,
Builder, IsOffloadEntry, EntryInfo, DefaultAttrs, OutlinedFn,
9536 OutlinedFnID, Inputs, CBFunc, ArgAccessorFuncCB))
9542 if (!
Config.isTargetDevice())
9544 IfCond, OutlinedFn, OutlinedFnID, Inputs, GenMapInfoCB,
9545 CustomMapperCB, Dependencies, HasNowait, DynCGroupMem,
9546 DynCGroupMemFallback);
9560 return OS.
str().str();
9565 return OpenMPIRBuilder::getNameWithSeparators(Parts,
Config.firstSeparator(),
9571 auto &Elem = *
InternalVars.try_emplace(Name,
nullptr).first;
9573 assert(Elem.second->getValueType() == Ty &&
9574 "OMP internal variable has different type than requested");
9587 :
M.getTargetTriple().isAMDGPU()
9589 :
DL.getDefaultGlobalsAddressSpace();
9598 const llvm::Align PtrAlign =
DL.getPointerABIAlignment(AddressSpaceVal);
9599 GV->setAlignment(std::max(TypeAlign, PtrAlign));
9606Value *OpenMPIRBuilder::getOMPCriticalRegionLock(
StringRef CriticalName) {
9607 std::string Prefix =
Twine(
"gomp_critical_user_", CriticalName).
str();
9608 std::string Name = getNameWithSeparators({Prefix,
"var"},
".",
".");
9619 return SizePtrToInt;
9624 std::string VarName) {
9632 return MaptypesArrayGlobal;
9637 unsigned NumOperands,
9646 ArrI8PtrTy,
nullptr,
".offload_baseptrs");
9650 ArrI64Ty,
nullptr,
".offload_sizes");
9661 int64_t DeviceID,
unsigned NumOperands) {
9667 Value *ArgsBaseGEP =
9669 {Builder.getInt32(0), Builder.getInt32(0)});
9672 {Builder.getInt32(0), Builder.getInt32(0)});
9673 Value *ArgSizesGEP =
9675 {Builder.getInt32(0), Builder.getInt32(0)});
9679 Builder.getInt32(NumOperands),
9680 ArgsBaseGEP, ArgsGEP, ArgSizesGEP,
9681 MaptypesArg, MapnamesArg, NullPtr});
9688 assert((!ForEndCall || Info.separateBeginEndCalls()) &&
9689 "expected region end call to runtime only when end call is separate");
9691 auto VoidPtrTy = UnqualPtrTy;
9692 auto VoidPtrPtrTy = UnqualPtrTy;
9694 auto Int64PtrTy = UnqualPtrTy;
9696 if (!Info.NumberOfPtrs) {
9708 Info.RTArgs.BasePointersArray,
9711 ArrayType::get(VoidPtrTy, Info.NumberOfPtrs), Info.RTArgs.PointersArray,
9715 ArrayType::get(Int64Ty, Info.NumberOfPtrs), Info.RTArgs.SizesArray,
9719 ForEndCall && Info.RTArgs.MapTypesArrayEnd ? Info.RTArgs.MapTypesArrayEnd
9720 : Info.RTArgs.MapTypesArray,
9726 if (!Info.EmitDebug)
9730 ArrayType::get(VoidPtrTy, Info.NumberOfPtrs), Info.RTArgs.MapNamesArray,
9735 if (!Info.HasMapper)
9739 Builder.CreatePointerCast(Info.RTArgs.MappersArray, VoidPtrPtrTy);
9760 "struct.descriptor_dim");
9762 enum { OffsetFD = 0, CountFD, StrideFD };
9766 for (
unsigned I = 0, L = 0, E = NonContigInfo.
Dims.
size();
I < E; ++
I) {
9769 if (NonContigInfo.
Dims[
I] == 1)
9774 Builder.CreateAlloca(ArrayTy,
nullptr,
"dims");
9776 for (
unsigned II = 0, EE = NonContigInfo.
Dims[
I];
II < EE; ++
II) {
9777 unsigned RevIdx = EE -
II - 1;
9781 Value *OffsetLVal =
Builder.CreateStructGEP(DimTy, DimsLVal, OffsetFD);
9783 NonContigInfo.
Offsets[L][RevIdx], OffsetLVal,
9784 M.getDataLayout().getPrefTypeAlign(OffsetLVal->
getType()));
9786 Value *CountLVal =
Builder.CreateStructGEP(DimTy, DimsLVal, CountFD);
9788 NonContigInfo.
Counts[L][RevIdx], CountLVal,
9789 M.getDataLayout().getPrefTypeAlign(CountLVal->
getType()));
9791 Value *StrideLVal =
Builder.CreateStructGEP(DimTy, DimsLVal, StrideFD);
9793 NonContigInfo.
Strides[L][RevIdx], StrideLVal,
9794 M.getDataLayout().getPrefTypeAlign(CountLVal->
getType()));
9798 Value *DAddr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
9799 DimsAddr,
Builder.getPtrTy());
9802 Info.RTArgs.PointersArray, 0,
I);
9804 DAddr,
P,
M.getDataLayout().getPrefTypeAlign(
Builder.getPtrTy()));
9809void OpenMPIRBuilder::emitUDMapperArrayInitOrDel(
9813 StringRef Prefix = IsInit ?
".init" :
".del";
9819 Builder.CreateICmpSGT(
Size, Builder.getInt64(1),
"omp.arrayinit.isarray");
9820 Value *DeleteBit = Builder.CreateAnd(
9823 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9824 OpenMPOffloadMappingFlags::OMP_MAP_DELETE)));
9829 Value *BaseIsBegin = Builder.CreateICmpNE(
Base, Begin);
9830 Cond = Builder.CreateOr(IsArray, BaseIsBegin);
9831 DeleteCond = Builder.CreateIsNull(
9836 DeleteCond =
Builder.CreateIsNotNull(
9852 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9853 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9854 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9855 MapTypeArg =
Builder.CreateOr(
9858 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9859 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT)));
9863 Value *OffloadingArgs[] = {MapperHandle,
Base, Begin,
9864 ArraySize, MapTypeArg, MapName};
9890 MapperFn->
addFnAttr(Attribute::NoInline);
9891 MapperFn->
addFnAttr(Attribute::NoUnwind);
9901 auto SavedIP =
Builder.saveIP();
9902 Builder.SetInsertPoint(EntryBB);
9914 TypeSize ElementSize =
M.getDataLayout().getTypeStoreSize(ElemTy);
9916 Value *PtrBegin = BeginIn;
9922 emitUDMapperArrayInitOrDel(MapperFn, MapperHandle, BaseIn, BeginIn,
Size,
9923 MapType, MapName, ElementSize, HeadBB,
9934 Builder.CreateICmpEQ(PtrBegin, PtrEnd,
"omp.arraymap.isempty");
9935 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9941 Builder.CreatePHI(PtrBegin->
getType(), 2,
"omp.arraymap.ptrcurrent");
9942 PtrPHI->addIncoming(PtrBegin, HeadBB);
9947 return Info.takeError();
9951 Value *OffloadingArgs[] = {MapperHandle};
9955 Value *ShiftedPreviousSize =
9959 for (
unsigned I = 0;
I < Info->BasePointers.size(); ++
I) {
9960 Value *CurBaseArg = Info->BasePointers[
I];
9961 Value *CurBeginArg = Info->Pointers[
I];
9962 Value *CurSizeArg = Info->Sizes[
I];
9963 Value *CurNameArg = Info->Names.size()
9969 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9971 Value *MemberMapType =
9972 Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9989 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9990 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9991 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
10001 Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
10007 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10008 OpenMPOffloadMappingFlags::OMP_MAP_TO |
10009 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
10015 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10016 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
10017 Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
10023 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10024 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
10030 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10031 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
10032 Builder.CreateCondBr(IsFrom, FromBB, EndBB);
10038 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10039 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
10048 CurMapType->
addIncoming(MemberMapType, ToElseBB);
10050 Value *OffloadingArgs[] = {MapperHandle, CurBaseArg, CurBeginArg,
10051 CurSizeArg, CurMapType, CurNameArg};
10053 auto ChildMapperFn = CustomMapperCB(
I);
10054 if (!ChildMapperFn)
10055 return ChildMapperFn.takeError();
10056 if (*ChildMapperFn) {
10071 Value *PtrNext =
Builder.CreateConstGEP1_32(ElemTy, PtrPHI, 1,
10072 "omp.arraymap.next");
10073 PtrPHI->addIncoming(PtrNext, LastBB);
10074 Value *IsDone =
Builder.CreateICmpEQ(PtrNext, PtrEnd,
"omp.arraymap.isdone");
10076 Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
10081 emitUDMapperArrayInitOrDel(MapperFn, MapperHandle, BaseIn, BeginIn,
Size,
10082 MapType, MapName, ElementSize, DoneBB,
10096 bool IsNonContiguous,
10100 Info.clearArrayInfo();
10103 if (Info.NumberOfPtrs == 0)
10112 Info.RTArgs.BasePointersArray =
Builder.CreateAlloca(
10113 PointerArrayType,
nullptr,
".offload_baseptrs");
10115 Info.RTArgs.PointersArray =
Builder.CreateAlloca(
10116 PointerArrayType,
nullptr,
".offload_ptrs");
10118 PointerArrayType,
nullptr,
".offload_mappers");
10119 Info.RTArgs.MappersArray = MappersArray;
10126 ConstantInt::get(Int64Ty, 0));
10128 for (
unsigned I = 0, E = CombinedInfo.
Sizes.
size();
I < E; ++
I) {
10129 bool IsNonContigEntry =
10131 (
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10133 OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG) != 0);
10136 if (IsNonContigEntry) {
10138 "Index must be in-bounds for NON_CONTIG Dims array");
10140 assert(DimCount > 0 &&
"NON_CONTIG DimCount must be > 0");
10141 ConstSizes[
I] = ConstantInt::get(Int64Ty, DimCount);
10146 ConstSizes[
I] = CI;
10150 RuntimeSizes.
set(
I);
10153 if (RuntimeSizes.
all()) {
10155 Info.RTArgs.SizesArray =
Builder.CreateAlloca(
10156 SizeArrayType,
nullptr,
".offload_sizes");
10162 auto *SizesArrayGbl =
10167 if (!RuntimeSizes.
any()) {
10168 Info.RTArgs.SizesArray = SizesArrayGbl;
10170 unsigned IndexSize =
M.getDataLayout().getIndexSizeInBits(0);
10171 Align OffloadSizeAlign =
M.getDataLayout().getABIIntegerTypeAlignment(64);
10174 SizeArrayType,
nullptr,
".offload_sizes");
10178 Buffer,
M.getDataLayout().getPrefTypeAlign(Buffer->
getType()),
10179 SizesArrayGbl, OffloadSizeAlign,
10184 Info.RTArgs.SizesArray = Buffer;
10192 for (
auto mapFlag : CombinedInfo.
Types)
10194 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10198 Info.RTArgs.MapTypesArray = MapTypesArrayGbl;
10204 Info.RTArgs.MapNamesArray = MapNamesArrayGbl;
10205 Info.EmitDebug =
true;
10207 Info.RTArgs.MapNamesArray =
10209 Info.EmitDebug =
false;
10214 if (Info.separateBeginEndCalls()) {
10215 bool EndMapTypesDiffer =
false;
10217 if (
Type &
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10218 OpenMPOffloadMappingFlags::OMP_MAP_PRESENT)) {
10219 Type &= ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
10220 OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
10221 EndMapTypesDiffer =
true;
10224 if (EndMapTypesDiffer) {
10226 Info.RTArgs.MapTypesArrayEnd = MapTypesArrayGbl;
10231 for (
unsigned I = 0;
I < Info.NumberOfPtrs; ++
I) {
10234 ArrayType::get(PtrTy, Info.NumberOfPtrs), Info.RTArgs.BasePointersArray,
10236 Builder.CreateAlignedStore(BPVal, BP,
10237 M.getDataLayout().getPrefTypeAlign(PtrTy));
10239 if (Info.requiresDevicePointerInfo()) {
10241 CodeGenIP =
Builder.saveIP();
10243 Info.DevicePtrInfoMap[BPVal] = {BP,
Builder.CreateAlloca(PtrTy)};
10244 Builder.restoreIP(CodeGenIP);
10246 DeviceAddrCB(
I, Info.DevicePtrInfoMap[BPVal].second);
10248 Info.DevicePtrInfoMap[BPVal] = {BP, BP};
10250 DeviceAddrCB(
I, BP);
10256 ArrayType::get(PtrTy, Info.NumberOfPtrs), Info.RTArgs.PointersArray, 0,
10259 Builder.CreateAlignedStore(PVal,
P,
10260 M.getDataLayout().getPrefTypeAlign(PtrTy));
10262 if (RuntimeSizes.
test(
I)) {
10264 ArrayType::get(Int64Ty, Info.NumberOfPtrs), Info.RTArgs.SizesArray,
10270 S,
M.getDataLayout().getPrefTypeAlign(PtrTy));
10273 unsigned IndexSize =
M.getDataLayout().getIndexSizeInBits(0);
10276 auto CustomMFunc = CustomMapperCB(
I);
10278 return CustomMFunc.takeError();
10280 MFunc =
Builder.CreatePointerCast(*CustomMFunc, PtrTy);
10283 PointerArrayType, MappersArray,
10286 MFunc, MAddr,
M.getDataLayout().getPrefTypeAlign(MAddr->
getType()));
10290 Info.NumberOfPtrs == 0)
10307 Builder.ClearInsertionPoint();
10337 auto CondConstant = CI->getSExtValue();
10339 return ThenGen(AllocaIP,
Builder.saveIP());
10341 return ElseGen(AllocaIP,
Builder.saveIP());
10351 Builder.CreateCondBr(
Cond, ThenBlock, ElseBlock);
10369bool OpenMPIRBuilder::checkAndEmitFlushAfterAtomic(
10373 "Unexpected Atomic Ordering.");
10375 bool Flush =
false;
10437 assert(
X.Var->getType()->isPointerTy() &&
10438 "OMP Atomic expects a pointer to target memory");
10439 Type *XElemTy =
X.ElemTy;
10442 "OMP atomic read expected a scalar type");
10444 Value *XRead =
nullptr;
10448 Builder.CreateLoad(XElemTy,
X.Var,
X.IsVolatile,
"omp.atomic.read");
10457 unsigned LoadSize =
DL.getTypeStoreSize(XElemTy);
10460 OldVal->
getAlign(),
true , AllocaIP,
X.Var);
10462 XRead = AtomicLoadRes.first;
10469 Builder.CreateLoad(IntCastTy,
X.Var,
X.IsVolatile,
"omp.atomic.load");
10472 XRead =
Builder.CreateBitCast(XLoad, XElemTy,
"atomic.flt.cast");
10474 XRead =
Builder.CreateIntToPtr(XLoad, XElemTy,
"atomic.ptr.cast");
10477 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Read);
10478 Builder.CreateStore(XRead, V.Var, V.IsVolatile);
10489 assert(
X.Var->getType()->isPointerTy() &&
10490 "OMP Atomic expects a pointer to target memory");
10491 Type *XElemTy =
X.ElemTy;
10494 "OMP atomic write expected a scalar type");
10502 unsigned LoadSize =
DL.getTypeStoreSize(XElemTy);
10505 OldVal->
getAlign(),
true , AllocaIP,
X.Var);
10513 Builder.CreateBitCast(Expr, IntCastTy,
"atomic.src.int.cast");
10518 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Write);
10525 AtomicUpdateCallbackTy &UpdateOp,
bool IsXBinopExpr,
10526 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
10532 Type *XTy =
X.Var->getType();
10534 "OMP Atomic expects a pointer to target memory");
10535 Type *XElemTy =
X.ElemTy;
10538 "OMP atomic update expected a scalar or struct type");
10541 "OpenMP atomic does not support LT or GT operations");
10545 AllocaIP,
X.Var,
X.ElemTy, Expr, AO, RMWOp, UpdateOp,
X.IsVolatile,
10546 IsXBinopExpr, IsIgnoreDenormalMode, IsFineGrainedMemory, IsRemoteMemory);
10548 return AtomicResult.takeError();
10549 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Update);
10554Value *OpenMPIRBuilder::emitRMWOpAsInstruction(
Value *Src1,
Value *Src2,
10558 return Builder.CreateAdd(Src1, Src2);
10560 return Builder.CreateSub(Src1, Src2);
10562 return Builder.CreateAnd(Src1, Src2);
10564 return Builder.CreateNeg(Builder.CreateAnd(Src1, Src2));
10566 return Builder.CreateOr(Src1, Src2);
10568 return Builder.CreateXor(Src1, Src2);
10592Expected<std::pair<Value *, Value *>> OpenMPIRBuilder::emitAtomicUpdate(
10595 AtomicUpdateCallbackTy &UpdateOp,
bool VolatileX,
bool IsXBinopExpr,
10596 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
10598 bool emitRMWOp =
false;
10606 emitRMWOp = XElemTy;
10609 emitRMWOp = (IsXBinopExpr && XElemTy);
10616 std::pair<Value *, Value *> Res;
10618 AtomicRMWInst *RMWInst =
10619 Builder.CreateAtomicRMW(RMWOp,
X, Expr, llvm::MaybeAlign(), AO);
10620 if (
T.isAMDGPU()) {
10621 if (IsIgnoreDenormalMode)
10622 RMWInst->
setMetadata(
"amdgpu.ignore.denormal.mode",
10624 if (!IsFineGrainedMemory)
10625 RMWInst->
setMetadata(
"amdgpu.no.fine.grained.memory",
10627 if (!IsRemoteMemory)
10631 Res.first = RMWInst;
10636 Res.second = Res.first;
10638 Res.second = emitRMWOpAsInstruction(Res.first, Expr, RMWOp);
10641 Builder.CreateLoad(XElemTy,
X,
X->getName() +
".atomic.load");
10646 OpenMPIRBuilder::AtomicInfo atomicInfo(
10648 OldVal->
getAlign(),
true , AllocaIP,
X);
10649 auto AtomicLoadRes = atomicInfo.EmitAtomicLoadLibcall(AO);
10652 CurBBTI = CurBBTI ? CurBBTI :
Builder.CreateUnreachable();
10659 AllocaInst *NewAtomicAddr =
Builder.CreateAlloca(XElemTy);
10660 NewAtomicAddr->
setName(
X->getName() +
"x.new.val");
10661 Builder.SetInsertPoint(ContBB);
10663 PHI->addIncoming(AtomicLoadRes.first, CurBB);
10665 Expected<Value *> CBResult = UpdateOp(OldExprVal,
Builder);
10668 Value *Upd = *CBResult;
10669 Builder.CreateStore(Upd, NewAtomicAddr);
10672 auto Result = atomicInfo.EmitAtomicCompareExchangeLibcall(
10673 AtomicLoadRes.second, NewAtomicAddr, AO, Failure);
10674 LoadInst *PHILoad =
Builder.CreateLoad(XElemTy,
Result.first);
10675 PHI->addIncoming(PHILoad,
Builder.GetInsertBlock());
10678 Res.first = OldExprVal;
10681 if (UnreachableInst *ExitTI =
10684 Builder.SetInsertPoint(ExitBB);
10686 Builder.SetInsertPoint(ExitTI);
10689 IntegerType *IntCastTy =
10692 Builder.CreateLoad(IntCastTy,
X,
X->getName() +
".atomic.load");
10701 CurBBTI = CurBBTI ? CurBBTI :
Builder.CreateUnreachable();
10708 AllocaInst *NewAtomicAddr =
Builder.CreateAlloca(XElemTy);
10709 NewAtomicAddr->
setName(
X->getName() +
"x.new.val");
10710 Builder.SetInsertPoint(ContBB);
10712 PHI->addIncoming(OldVal, CurBB);
10717 OldExprVal =
Builder.CreateBitCast(
PHI, XElemTy,
10718 X->getName() +
".atomic.fltCast");
10720 OldExprVal =
Builder.CreateIntToPtr(
PHI, XElemTy,
10721 X->getName() +
".atomic.ptrCast");
10725 Expected<Value *> CBResult = UpdateOp(OldExprVal,
Builder);
10728 Value *Upd = *CBResult;
10729 Builder.CreateStore(Upd, NewAtomicAddr);
10730 LoadInst *DesiredVal =
Builder.CreateLoad(IntCastTy, NewAtomicAddr);
10734 X,
PHI, DesiredVal, llvm::MaybeAlign(), AO, Failure);
10735 Result->setVolatile(VolatileX);
10736 Value *PreviousVal =
Builder.CreateExtractValue(Result, 0);
10737 Value *SuccessFailureVal =
Builder.CreateExtractValue(Result, 1);
10738 PHI->addIncoming(PreviousVal,
Builder.GetInsertBlock());
10739 Builder.CreateCondBr(SuccessFailureVal, ExitBB, ContBB);
10741 Res.first = OldExprVal;
10745 if (UnreachableInst *ExitTI =
10748 Builder.SetInsertPoint(ExitBB);
10750 Builder.SetInsertPoint(ExitTI);
10761 bool UpdateExpr,
bool IsPostfixUpdate,
bool IsXBinopExpr,
10762 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
10767 Type *XTy =
X.Var->getType();
10769 "OMP Atomic expects a pointer to target memory");
10770 Type *XElemTy =
X.ElemTy;
10773 "OMP atomic capture expected a scalar or struct type");
10775 "OpenMP atomic does not support LT or GT operations");
10782 AllocaIP,
X.Var,
X.ElemTy, Expr, AO, AtomicOp, UpdateOp,
X.IsVolatile,
10783 IsXBinopExpr, IsIgnoreDenormalMode, IsFineGrainedMemory, IsRemoteMemory);
10786 Value *CapturedVal =
10787 (IsPostfixUpdate ? AtomicResult->first : AtomicResult->second);
10788 Builder.CreateStore(CapturedVal, V.Var, V.IsVolatile);
10790 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Capture);
10802 IsPostfixUpdate, IsFailOnly, Failure);
10814 assert(
X.Var->getType()->isPointerTy() &&
10815 "OMP atomic expects a pointer to target memory");
10818 assert(V.Var->getType()->isPointerTy() &&
"v.var must be of pointer type");
10819 assert(V.ElemTy ==
X.ElemTy &&
"x and v must be of same type");
10822 bool IsInteger = E->getType()->isIntegerTy();
10824 if (
Op == OMPAtomicCompareOp::EQ) {
10839 Value *OldValue =
Builder.CreateExtractValue(Result, 0);
10841 OldValue =
Builder.CreateBitCast(OldValue,
X.ElemTy);
10843 "OldValue and V must be of same type");
10844 if (IsPostfixUpdate) {
10845 Builder.CreateStore(OldValue, V.Var, V.IsVolatile);
10847 Value *SuccessOrFail =
Builder.CreateExtractValue(Result, 1);
10860 CurBBTI = CurBBTI ? CurBBTI :
Builder.CreateUnreachable();
10862 CurBBTI,
X.Var->getName() +
".atomic.exit");
10868 Builder.CreateCondBr(SuccessOrFail, ExitBB, ContBB);
10870 Builder.SetInsertPoint(ContBB);
10871 Builder.CreateStore(OldValue, V.Var);
10877 Builder.SetInsertPoint(ExitBB);
10879 Builder.SetInsertPoint(ExitTI);
10882 Value *CapturedValue =
10883 Builder.CreateSelect(SuccessOrFail, E, OldValue);
10884 Builder.CreateStore(CapturedValue, V.Var, V.IsVolatile);
10890 assert(R.Var->getType()->isPointerTy() &&
10891 "r.var must be of pointer type");
10892 assert(R.ElemTy->isIntegerTy() &&
"r must be of integral type");
10894 Value *SuccessFailureVal =
Builder.CreateExtractValue(Result, 1);
10895 Value *ResultCast = R.IsSigned
10896 ?
Builder.CreateSExt(SuccessFailureVal, R.ElemTy)
10897 :
Builder.CreateZExt(SuccessFailureVal, R.ElemTy);
10898 Builder.CreateStore(ResultCast, R.Var, R.IsVolatile);
10901 assert((
Op == OMPAtomicCompareOp::MAX ||
Op == OMPAtomicCompareOp::MIN) &&
10902 "Op should be either max or min at this point");
10903 assert(!IsFailOnly &&
"IsFailOnly is only valid when the comparison is ==");
10914 if (IsXBinopExpr) {
10943 Value *CapturedValue =
nullptr;
10944 if (IsPostfixUpdate) {
10945 CapturedValue = OldValue;
10970 Value *NonAtomicCmp =
Builder.CreateCmp(Pred, OldValue, E);
10971 CapturedValue =
Builder.CreateSelect(NonAtomicCmp, E, OldValue);
10973 Builder.CreateStore(CapturedValue, V.Var, V.IsVolatile);
10977 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Compare);
10997 if (&OuterAllocaBB ==
Builder.GetInsertBlock()) {
11024 bool SubClausesPresent =
11025 (NumTeamsLower || NumTeamsUpper || ThreadLimit || IfExpr);
11027 if (!
Config.isTargetDevice() && SubClausesPresent) {
11028 assert((NumTeamsLower ==
nullptr || NumTeamsUpper !=
nullptr) &&
11029 "if lowerbound is non-null, then upperbound must also be non-null "
11030 "for bounds on num_teams");
11032 if (NumTeamsUpper ==
nullptr)
11033 NumTeamsUpper =
Builder.getInt32(0);
11035 if (NumTeamsLower ==
nullptr)
11036 NumTeamsLower = NumTeamsUpper;
11040 "argument to if clause must be an integer value");
11044 IfExpr =
Builder.CreateICmpNE(IfExpr,
11045 ConstantInt::get(IfExpr->
getType(), 0));
11046 NumTeamsUpper =
Builder.CreateSelect(
11047 IfExpr, NumTeamsUpper,
Builder.getInt32(1),
"numTeamsUpper");
11050 NumTeamsLower =
Builder.CreateSelect(
11051 IfExpr, NumTeamsLower,
Builder.getInt32(1),
"numTeamsLower");
11054 if (ThreadLimit ==
nullptr)
11055 ThreadLimit =
Builder.getInt32(0);
11059 Value *NumTeamsLowerInt32 =
11061 Value *NumTeamsUpperInt32 =
11063 Value *ThreadLimitInt32 =
11070 {Ident, ThreadNum, NumTeamsLowerInt32, NumTeamsUpperInt32,
11071 ThreadLimitInt32});
11076 if (
Error Err = BodyGenCB(AllocaIP, CodeGenIP))
11088 Builder, OuterAllocaIP, ToBeDeleted, AllocaIP,
"gid",
true));
11090 Builder, OuterAllocaIP, ToBeDeleted, AllocaIP,
"tid",
true));
11092 auto HostPostOutlineCB = [
this, Ident,
11093 ToBeDeleted](
Function &OutlinedFn)
mutable {
11098 "there must be a single user for the outlined function");
11103 "Outlined function must have two or three arguments only");
11105 bool HasShared = OutlinedFn.
arg_size() == 3;
11113 assert(StaleCI &&
"Error while outlining - no CallInst user found for the "
11114 "outlined function.");
11115 Builder.SetInsertPoint(StaleCI);
11122 omp::RuntimeFunction::OMPRTL___kmpc_fork_teams),
11126 I->eraseFromParent();
11129 if (!
Config.isTargetDevice())
11148 if (OuterAllocaBB ==
Builder.GetInsertBlock()) {
11163 if (
Error Err = BodyGenCB(AllocaIP, CodeGenIP))
11168 if (
Config.isTargetDevice()) {
11183 std::string VarName) {
11192 return MapNamesArrayGlobal;
11197void OpenMPIRBuilder::initializeTypes(
Module &M) {
11201 unsigned ProgramAS = M.getDataLayout().getProgramAddressSpace();
11202#define OMP_TYPE(VarName, InitValue) VarName = InitValue;
11203#define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \
11204 VarName##Ty = ArrayType::get(ElemTy, ArraySize); \
11205 VarName##PtrTy = PointerType::get(Ctx, DefaultTargetAS);
11206#define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \
11207 VarName = FunctionType::get(ReturnType, {__VA_ARGS__}, IsVarArg); \
11208 VarName##Ptr = PointerType::get(Ctx, ProgramAS);
11209#define OMP_STRUCT_TYPE(VarName, StructName, Packed, ...) \
11210 T = StructType::getTypeByName(Ctx, StructName); \
11212 T = StructType::create(Ctx, {__VA_ARGS__}, StructName, Packed); \
11214 VarName##Ptr = PointerType::get(Ctx, DefaultTargetAS);
11215#include "llvm/Frontend/OpenMP/OMPKinds.def"
11226 while (!Worklist.
empty()) {
11230 if (
BlockSet.insert(SuccBB).second)
11242 Name.empty() ? Addr->
getName() : Name,
Size, Flags, 0);
11254 Fn->
addFnAttr(
"uniform-work-group-size");
11255 Fn->
addFnAttr(Attribute::MustProgress);
11273 auto &&GetMDInt = [
this](
unsigned V) {
11280 NamedMDNode *MD =
M.getOrInsertNamedMetadata(
"omp_offload.info");
11281 auto &&TargetRegionMetadataEmitter =
11282 [&
C, MD, &OrderedEntries, &GetMDInt, &GetMDString](
11297 GetMDInt(E.getKind()), GetMDInt(EntryInfo.DeviceID),
11298 GetMDInt(EntryInfo.FileID), GetMDString(EntryInfo.ParentName),
11299 GetMDInt(EntryInfo.Line), GetMDInt(EntryInfo.Count),
11300 GetMDInt(E.getOrder())};
11303 OrderedEntries[E.getOrder()] = std::make_pair(&E, EntryInfo);
11312 auto &&DeviceGlobalVarMetadataEmitter =
11313 [&
C, &OrderedEntries, &GetMDInt, &GetMDString, MD](
11323 Metadata *
Ops[] = {GetMDInt(E.getKind()), GetMDString(MangledName),
11324 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
11328 OrderedEntries[E.getOrder()] = std::make_pair(&E, varInfo);
11335 DeviceGlobalVarMetadataEmitter);
11337 for (
const auto &E : OrderedEntries) {
11338 assert(E.first &&
"All ordered entries must exist!");
11339 if (
const auto *CE =
11342 if (!CE->getID() || !CE->getAddress()) {
11346 if (!
M.getNamedValue(FnName))
11354 }
else if (
const auto *CE =
dyn_cast<
11363 if (
Config.isTargetDevice() &&
Config.hasRequiresUnifiedSharedMemory())
11365 if (!CE->getAddress()) {
11370 if (CE->getVarSize() == 0)
11374 assert(((
Config.isTargetDevice() && !CE->getAddress()) ||
11375 (!
Config.isTargetDevice() && CE->getAddress())) &&
11376 "Declaret target link address is set.");
11377 if (
Config.isTargetDevice())
11379 if (!CE->getAddress()) {
11386 if (!CE->getAddress()) {
11399 if ((
GV->hasLocalLinkage() ||
GV->hasHiddenVisibility()) &&
11403 OMPTargetGlobalVarEntryIndirectVTable))
11412 Flags, CE->getLinkage(), CE->getVarName());
11415 Flags, CE->getLinkage());
11426 if (
Config.hasRequiresFlags() && !
Config.isTargetDevice())
11432 Config.getRequiresFlags());
11442 OS <<
"_" <<
Count;
11447 unsigned NewCount = getTargetRegionEntryInfoCount(EntryInfo);
11450 EntryInfo.
Line, NewCount);
11458 auto FileIDInfo = CallBack();
11462 FileID =
Status->getUniqueID().getFile();
11466 FileID =
hash_value(std::get<0>(FileIDInfo));
11470 std::get<1>(FileIDInfo));
11476 static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
11478 !(Remain & 1); Remain = Remain >> 1)
11496 if (
static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
11498 static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
11505 if (
static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
11511 Flags &=
~omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
11512 Flags |= MemberOfFlag;
11518 bool IsDeclaration,
bool IsExternallyVisible,
11520 std::vector<GlobalVariable *> &GeneratedRefs,
bool OpenMPSIMD,
11521 std::vector<Triple> TargetTriple,
Type *LlvmPtrTy,
11522 std::function<
Constant *()> GlobalInitializer,
11533 Config.hasRequiresUnifiedSharedMemory())) {
11538 if (!IsExternallyVisible)
11540 OS <<
"_decl_tgt_ref_ptr";
11543 Value *Ptr =
M.getNamedValue(PtrName);
11552 if (!
Config.isTargetDevice()) {
11553 if (GlobalInitializer)
11554 GV->setInitializer(GlobalInitializer());
11560 CaptureClause, DeviceClause, IsDeclaration, IsExternallyVisible,
11561 EntryInfo, MangledName, GeneratedRefs, OpenMPSIMD, TargetTriple,
11562 GlobalInitializer, VariableLinkage, LlvmPtrTy,
cast<Constant>(Ptr));
11574 bool IsDeclaration,
bool IsExternallyVisible,
11576 std::vector<GlobalVariable *> &GeneratedRefs,
bool OpenMPSIMD,
11577 std::vector<Triple> TargetTriple,
11578 std::function<
Constant *()> GlobalInitializer,
11582 (TargetTriple.empty() && !
Config.isTargetDevice()))
11593 !
Config.hasRequiresUnifiedSharedMemory()) {
11595 VarName = MangledName;
11598 if (!IsDeclaration)
11600 M.getDataLayout().getTypeSizeInBits(LlvmVal->
getValueType()), 8);
11603 Linkage = (VariableLinkage) ? VariableLinkage() : LlvmVal->
getLinkage();
11607 if (
Config.isTargetDevice() &&
11616 if (!
M.getNamedValue(RefName)) {
11620 GvAddrRef->setConstant(
true);
11622 GvAddrRef->setInitializer(Addr);
11623 GeneratedRefs.push_back(GvAddrRef);
11632 if (
Config.isTargetDevice()) {
11633 VarName = (Addr) ? Addr->
getName() :
"";
11637 CaptureClause, DeviceClause, IsDeclaration, IsExternallyVisible,
11638 EntryInfo, MangledName, GeneratedRefs, OpenMPSIMD, TargetTriple,
11639 LlvmPtrTy, GlobalInitializer, VariableLinkage);
11640 VarName = (Addr) ? Addr->
getName() :
"";
11642 VarSize =
M.getDataLayout().getPointerSize();
11661 auto &&GetMDInt = [MN](
unsigned Idx) {
11666 auto &&GetMDString = [MN](
unsigned Idx) {
11668 return V->getString();
11671 switch (GetMDInt(0)) {
11675 case OffloadEntriesInfoManager::OffloadEntryInfo::
11676 OffloadingEntryInfoTargetRegion: {
11686 case OffloadEntriesInfoManager::OffloadEntryInfo::
11687 OffloadingEntryInfoDeviceGlobalVar:
11700 if (HostFilePath.
empty())
11704 if (std::error_code Err = Buf.getError()) {
11706 "OpenMPIRBuilder: " +
11714 if (std::error_code Err =
M.getError()) {
11716 (
"error parsing host file inside of OpenMPIRBuilder: " + Err.message())
11730 "expected a valid insertion block for creating an iterator loop");
11740 Builder.getCurrentDebugLocation(),
"omp.it.cont");
11752 T->eraseFromParent();
11761 if (!BodyBr || BodyBr->getSuccessor() != CLI->
getLatch()) {
11763 "iterator bodygen must terminate the canonical body with an "
11764 "unconditional branch to the loop latch",
11788 for (
const auto &
ParamAttr : ParamAttrs) {
11831 return std::string(Out.
str());
11839 unsigned VecRegSize;
11841 ISADataTy ISAData[] = {
11860 for (
char Mask :
Masked) {
11861 for (
const ISADataTy &
Data : ISAData) {
11864 Out <<
"_ZGV" <<
Data.ISA << Mask;
11866 assert(NumElts &&
"Non-zero simdlen/cdtsize expected");
11880template <
typename T>
11883 StringRef MangledName,
bool OutputBecomesInput,
11887 Out << Prefix << ISA << LMask << VLEN;
11888 if (OutputBecomesInput)
11890 Out << ParSeq <<
'_' << MangledName;
11899 bool OutputBecomesInput,
11904 OutputBecomesInput, Fn);
11906 OutputBecomesInput, Fn);
11910 OutputBecomesInput, Fn);
11912 OutputBecomesInput, Fn);
11916 OutputBecomesInput, Fn);
11918 OutputBecomesInput, Fn);
11923 OutputBecomesInput, Fn);
11934 char ISA,
unsigned NarrowestDataSize,
bool OutputBecomesInput) {
11935 assert((ISA ==
'n' || ISA ==
's') &&
"Expected ISA either 's' or 'n'.");
11947 OutputBecomesInput, Fn);
11954 OutputBecomesInput, Fn);
11956 OutputBecomesInput, Fn);
11960 OutputBecomesInput, Fn);
11964 OutputBecomesInput, Fn);
11973 OutputBecomesInput, Fn);
11980 MangledName, OutputBecomesInput, Fn);
11982 MangledName, OutputBecomesInput, Fn);
11986 MangledName, OutputBecomesInput, Fn);
11990 MangledName, OutputBecomesInput, Fn);
12000 return OffloadEntriesTargetRegion.empty() &&
12001 OffloadEntriesDeviceGlobalVar.empty();
12004unsigned OffloadEntriesInfoManager::getTargetRegionEntryInfoCount(
12006 auto It = OffloadEntriesTargetRegionCount.find(
12007 getTargetRegionEntryCountKey(EntryInfo));
12008 if (It == OffloadEntriesTargetRegionCount.end())
12013void OffloadEntriesInfoManager::incrementTargetRegionEntryInfoCount(
12015 OffloadEntriesTargetRegionCount[getTargetRegionEntryCountKey(EntryInfo)] =
12016 EntryInfo.
Count + 1;
12022 OffloadEntriesTargetRegion[EntryInfo] =
12025 ++OffloadingEntriesNum;
12031 assert(EntryInfo.
Count == 0 &&
"expected default EntryInfo");
12034 EntryInfo.
Count = getTargetRegionEntryInfoCount(EntryInfo);
12038 if (OMPBuilder->Config.isTargetDevice()) {
12043 auto &Entry = OffloadEntriesTargetRegion[EntryInfo];
12044 Entry.setAddress(Addr);
12046 Entry.setFlags(Flags);
12052 "Target region entry already registered!");
12054 OffloadEntriesTargetRegion[EntryInfo] = Entry;
12055 ++OffloadingEntriesNum;
12057 incrementTargetRegionEntryInfoCount(EntryInfo);
12064 EntryInfo.
Count = getTargetRegionEntryInfoCount(EntryInfo);
12066 auto It = OffloadEntriesTargetRegion.find(EntryInfo);
12067 if (It == OffloadEntriesTargetRegion.end()) {
12071 if (!IgnoreAddressId && (It->second.getAddress() || It->second.getID()))
12079 for (
const auto &It : OffloadEntriesTargetRegion) {
12080 Action(It.first, It.second);
12086 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
12087 ++OffloadingEntriesNum;
12093 if (OMPBuilder->Config.isTargetDevice()) {
12097 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
12099 if (Entry.getVarSize() == 0) {
12100 Entry.setVarSize(VarSize);
12101 Entry.setLinkage(Linkage);
12105 Entry.setVarSize(VarSize);
12106 Entry.setLinkage(Linkage);
12107 Entry.setAddress(Addr);
12110 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
12111 assert(Entry.isValid() && Entry.getFlags() == Flags &&
12112 "Entry not initialized!");
12113 if (Entry.getVarSize() == 0) {
12114 Entry.setVarSize(VarSize);
12115 Entry.setLinkage(Linkage);
12122 OffloadEntriesDeviceGlobalVar.try_emplace(VarName, OffloadingEntriesNum,
12123 Addr, VarSize, Flags, Linkage,
12126 OffloadEntriesDeviceGlobalVar.try_emplace(
12127 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage,
"");
12128 ++OffloadingEntriesNum;
12135 for (
const auto &E : OffloadEntriesDeviceGlobalVar)
12136 Action(E.getKey(), E.getValue());
12143void CanonicalLoopInfo::collectControlBlocks(
12150 BBs.
append({getPreheader(), Header,
Cond, Latch, Exit, getAfter()});
12162void CanonicalLoopInfo::setTripCount(
Value *TripCount) {
12174void CanonicalLoopInfo::mapIndVar(
12184 for (
Use &U : OldIV->
uses()) {
12188 if (
User->getParent() == getCond())
12190 if (
User->getParent() == getLatch())
12196 Value *NewIV = Updater(OldIV);
12199 for (Use *U : ReplacableUses)
12220 "Preheader must terminate with unconditional branch");
12222 "Preheader must jump to header");
12226 "Header must terminate with unconditional branch");
12227 assert(Header->getSingleSuccessor() == Cond &&
12228 "Header must jump to exiting block");
12231 assert(Cond->getSinglePredecessor() == Header &&
12232 "Exiting block only reachable from header");
12235 "Exiting block must terminate with conditional branch");
12237 "Exiting block's first successor jump to the body");
12239 "Exiting block's second successor must exit the loop");
12243 "Body only reachable from exiting block");
12248 "Latch must terminate with unconditional branch");
12249 assert(Latch->getSingleSuccessor() == Header &&
"Latch must jump to header");
12252 assert(Latch->getSinglePredecessor() !=
nullptr);
12257 "Exit block must terminate with unconditional branch");
12258 assert(Exit->getSingleSuccessor() == After &&
12259 "Exit block must jump to after block");
12263 "After block only reachable from exit block");
12267 assert(IndVar &&
"Canonical induction variable not found?");
12269 "Induction variable must be an integer");
12271 "Induction variable must be a PHI in the loop header");
12277 auto *NextIndVar =
cast<PHINode>(IndVar)->getIncomingValue(1);
12285 assert(TripCount &&
"Loop trip count not found?");
12287 "Trip count and induction variable must have the same type");
12291 "Exit condition must be a signed less-than comparison");
12293 "Exit condition must compare the induction variable");
12295 "Exit condition must compare with the trip count");
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Expand Atomic instructions
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This header defines various interfaces for pass management in LLVM.
iv Induction Variable Users
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
static cl::opt< unsigned > TileSize("fuse-matrix-tile-size", cl::init(4), cl::Hidden, cl::desc("Tile size for matrix instruction fusion using square-shaped tiles."))
uint64_t IntrinsicInst * II
#define OMP_KERNEL_ARG_VERSION
Provides definitions for Target specific Grid Values.
static Value * removeASCastIfPresent(Value *V)
static void createTargetLoopWorkshareCall(OpenMPIRBuilder *OMPBuilder, WorksharingLoopType LoopType, BasicBlock *InsertBlock, Value *Ident, Value *LoopBodyArg, Value *TripCount, Function &LoopBodyFn, bool NoLoop)
Value * createFakeIntVal(IRBuilderBase &Builder, OpenMPIRBuilder::InsertPointTy OuterAllocaIP, llvm::SmallVectorImpl< Instruction * > &ToBeDeleted, OpenMPIRBuilder::InsertPointTy InnerAllocaIP, const Twine &Name="", bool AsPtr=true, bool Is64Bit=false)
static void redirectTo(BasicBlock *Source, BasicBlock *Target, DebugLoc DL)
Make Source branch to Target.
static FunctionCallee getKmpcDistForStaticInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
static void applyParallelAccessesMetadata(CanonicalLoopInfo *CLI, LLVMContext &Ctx, Loop *Loop, LoopInfo &LoopInfo, SmallVector< Metadata * > &LoopMDList)
static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, char ISA, StringRef ParSeq, StringRef MangledName, bool OutputBecomesInput, llvm::Function *Fn)
static FunctionCallee getKmpcForDynamicFiniForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for finalizing the dynamic loop using depending on type.
static Expected< Function * > createOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, StringRef FuncName, SmallVectorImpl< Value * > &Inputs, OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc, OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB)
static void FixupDebugInfoForOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, Function *Func, DenseMap< Value *, std::tuple< Value *, unsigned > > &ValueReplacementMap)
static OMPScheduleType getOpenMPOrderingScheduleType(OMPScheduleType BaseScheduleType, bool HasOrderedClause)
Adds ordering modifier flags to schedule type.
static OMPScheduleType getOpenMPMonotonicityScheduleType(OMPScheduleType ScheduleType, bool HasSimdModifier, bool HasMonotonic, bool HasNonmonotonic, bool HasOrderedClause)
Adds monotonicity modifier flags to schedule type.
static std::string mangleVectorParameters(ArrayRef< llvm::OpenMPIRBuilder::DeclareSimdAttrTy > ParamAttrs)
Mangle the parameter part of the vector function name according to their OpenMP classification.
static void workshareLoopTargetCallback(OpenMPIRBuilder *OMPIRBuilder, CanonicalLoopInfo *CLI, Value *Ident, Function &OutlinedFn, const SmallVector< Instruction *, 4 > &ToBeDeleted, WorksharingLoopType LoopType, bool NoLoop)
static bool isValidWorkshareLoopScheduleType(OMPScheduleType SchedType)
static llvm::CallInst * emitNoUnwindRuntimeCall(IRBuilder<> &Builder, llvm::FunctionCallee Callee, ArrayRef< llvm::Value * > Args, const llvm::Twine &Name)
static Error populateReductionFunction(Function *ReductionFunc, ArrayRef< OpenMPIRBuilder::ReductionInfo > ReductionInfos, IRBuilder<> &Builder, ArrayRef< bool > IsByRef, bool IsGPU)
static Function * getFreshReductionFunc(Module &M)
static void raiseUserConstantDataAllocasToEntryBlock(IRBuilderBase &Builder, Function *Function)
static FunctionCallee getKmpcForDynamicNextForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for updating the next loop using OpenMP dynamic scheduling depending...
static bool isConflictIP(IRBuilder<>::InsertPoint IP1, IRBuilder<>::InsertPoint IP2)
Return whether IP1 and IP2 are ambiguous, i.e.
static void checkReductionInfos(ArrayRef< OpenMPIRBuilder::ReductionInfo > ReductionInfos, bool IsGPU)
static Type * getOffloadingArrayType(Value *V)
static OMPScheduleType getOpenMPBaseScheduleType(llvm::omp::ScheduleKind ClauseKind, bool HasChunks, bool HasSimdModifier, bool HasDistScheduleChunks)
Determine which scheduling algorithm to use, determined from schedule clause arguments.
static OMPScheduleType computeOpenMPScheduleType(ScheduleKind ClauseKind, bool HasChunks, bool HasSimdModifier, bool HasMonotonicModifier, bool HasNonmonotonicModifier, bool HasOrderedClause, bool HasDistScheduleChunks)
Determine the schedule type using schedule and ordering clause arguments.
static FunctionCallee getKmpcForDynamicInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for initializing loop bounds using OpenMP dynamic scheduling dependi...
static StructType * createTaskWithPrivatesTy(OpenMPIRBuilder &OMPIRBuilder, ArrayRef< Value * > OffloadingArraysToPrivatize)
static cl::opt< double > UnrollThresholdFactor("openmp-ir-builder-unroll-threshold-factor", cl::Hidden, cl::desc("Factor for the unroll threshold to account for code " "simplifications still taking place"), cl::init(1.5))
static int32_t computeHeuristicUnrollFactor(CanonicalLoopInfo *CLI)
Heuristically determine the best-performant unroll factor for CLI.
static Value * emitTaskDependencies(OpenMPIRBuilder &OMPBuilder, const SmallVectorImpl< OpenMPIRBuilder::DependData > &Dependencies)
static Error emitTargetOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, bool IsOffloadEntry, TargetRegionEntryInfo &EntryInfo, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, Function *&OutlinedFn, Constant *&OutlinedFnID, SmallVectorImpl< Value * > &Inputs, OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc, OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB)
static void updateNVPTXAttr(Function &Kernel, StringRef Name, int32_t Value, bool Min)
static OpenMPIRBuilder::InsertPointTy getInsertPointAfterInstr(Instruction *I)
static void redirectAllPredecessorsTo(BasicBlock *OldTarget, BasicBlock *NewTarget, DebugLoc DL)
Redirect all edges that branch to OldTarget to NewTarget.
static void hoistNonEntryAllocasToEntryBlock(llvm::BasicBlock &Block)
static std::unique_ptr< TargetMachine > createTargetMachine(Function *F, CodeGenOptLevel OptLevel)
Create the TargetMachine object to query the backend for optimization preferences.
static FunctionCallee getKmpcForStaticInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
static void addAccessGroupMetadata(BasicBlock *Block, MDNode *AccessGroup, LoopInfo &LI)
Attach llvm.access.group metadata to the memref instructions of Block.
static void addBasicBlockMetadata(BasicBlock *BB, ArrayRef< Metadata * > Properties)
Attach metadata Properties to the basic block described by BB.
static void restoreIPandDebugLoc(llvm::IRBuilderBase &Builder, llvm::IRBuilderBase::InsertPoint IP)
This is wrapper over IRBuilderBase::restoreIP that also restores the current debug location to the la...
static LoadInst * loadSharedDataFromTaskDescriptor(OpenMPIRBuilder &OMPIRBuilder, IRBuilderBase &Builder, Value *TaskWithPrivates, Type *TaskWithPrivatesTy)
Given a task descriptor, TaskWithPrivates, return the pointer to the block of pointers containing sha...
static cl::opt< bool > OptimisticAttributes("openmp-ir-builder-optimistic-attributes", cl::Hidden, cl::desc("Use optimistic attributes describing " "'as-if' properties of runtime calls."), cl::init(false))
static bool hasGridValue(const Triple &T)
static FunctionCallee getKmpcForStaticLoopForType(Type *Ty, OpenMPIRBuilder *OMPBuilder, WorksharingLoopType LoopType)
static const omp::GV & getGridValue(const Triple &T, Function *Kernel)
static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, StringRef Prefix, char ISA, StringRef ParSeq, StringRef MangledName, bool OutputBecomesInput, llvm::Function *Fn)
static Function * emitTargetTaskProxyFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, CallInst *StaleCI, StructType *PrivatesTy, StructType *TaskWithPrivatesTy, const size_t NumOffloadingArrays, const int SharedArgsOperandNo)
Create an entry point for a target task with the following.
static void addLoopMetadata(CanonicalLoopInfo *Loop, ArrayRef< Metadata * > Properties)
Attach loop metadata Properties to the loop described by Loop.
static void removeUnusedBlocksFromParent(ArrayRef< BasicBlock * > BBs)
Determine which blocks in BBs are reachable from outside and remove the ones that are not reachable f...
static void emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, OpenMPIRBuilder::InsertPointTy AllocaIP, OpenMPIRBuilder::TargetDataInfo &Info, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, const OpenMPIRBuilder::TargetKernelRuntimeAttrs &RuntimeAttrs, Value *IfCond, Function *OutlinedFn, Constant *OutlinedFnID, SmallVectorImpl< Value * > &Args, OpenMPIRBuilder::GenMapInfoCallbackTy GenMapInfoCB, OpenMPIRBuilder::CustomMapperCallbackTy CustomMapperCB, const OpenMPIRBuilder::DependenciesInfo &Dependencies, bool HasNoWait, Value *DynCGroupMem, OMPDynGroupprivateFallbackType DynCGroupMemFallback)
static void targetParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn, Function *OuterFn, BasicBlock *OuterAllocaBB, Value *Ident, Value *IfCondition, Value *NumThreads, Instruction *PrivTID, AllocaInst *PrivTIDAddr, Value *ThreadID, const SmallVector< Instruction *, 4 > &ToBeDeleted)
static void hostParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn, Function *OuterFn, Value *Ident, Value *IfCondition, Instruction *PrivTID, AllocaInst *PrivTIDAddr, const SmallVector< Instruction *, 4 > &ToBeDeleted)
FunctionAnalysisManager FAM
This file defines the Pass Instrumentation classes that provide instrumentation points into the pass ...
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
std::unordered_set< BasicBlock * > BlockSet
This file implements the SmallBitVector class.
This file defines the SmallSet class.
static SymbolRef::Type getType(const Symbol *Sym)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Defines the virtual file system interface vfs::FileSystem.
static cl::opt< unsigned > MaxThreads("xcore-max-threads", cl::Optional, cl::desc("Maximum number of threads (for emulation thread-local storage)"), cl::Hidden, cl::value_desc("number"), cl::init(8))
static const uint32_t IV[8]
Class for arbitrary precision integers.
An arbitrary precision integer that knows its signedness.
static APSInt getUnsigned(uint64_t X)
This class represents a conversion between pointers from one address space to another.
an instruction to allocate memory on the stack
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
PointerType * getType() const
Overload to return most specific pointer type.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
unsigned getAddressSpace() const
Return the address space for the allocation.
LLVM_ABI std::optional< TypeSize > getAllocationSize(const DataLayout &DL) const
Get allocation size in bytes.
LLVM_ABI bool isArrayAllocation() const
Return true if there is an allocation size parameter to the allocation instruction that is not 1.
void setAlignment(Align Align)
const Value * getArraySize() const
Get the number of elements allocated.
bool registerPass(PassBuilderT &&PassBuilder)
Register an analysis pass with the manager.
This class represents an incoming formal argument to a Function.
unsigned getArgNo() const
Return the index of this formal argument in its containing function.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
Class to represent array types.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
A function analysis which provides an AssumptionCache.
LLVM_ABI AssumptionCache run(Function &F, FunctionAnalysisManager &)
A cache of @llvm.assume calls within a function.
An instruction that atomically checks whether a specified value is in a memory location,...
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
LLVM_ABI std::pair< LoadInst *, AllocaInst * > EmitAtomicLoadLibcall(AtomicOrdering AO)
LLVM_ABI void EmitAtomicStoreLibcall(AtomicOrdering AO, Value *Source)
an instruction that atomically reads a memory location, combines it with another value,...
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ USubCond
Subtract only if no unsigned overflow.
@ FMinimum
*p = minimum(old, v) minimum matches the behavior of llvm.minimum.
@ Min
*p = old <signed v ? old : v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ FMaximum
*p = maximum(old, v) maximum matches the behavior of llvm.maximum.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ UMax
*p = old >unsigned v ? old : v
@ FMaximumNum
*p = maximumnum(old, v) maximumnum matches the behavior of llvm.maximumnum.
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
@ FMinimumNum
*p = minimumnum(old, v) minimumnum matches the behavior of llvm.minimumnum.
This class holds the attributes for a particular argument, parameter, function, or return value.
LLVM_ABI AttributeSet addAttributes(LLVMContext &C, AttributeSet AS) const
Add attributes to the attribute set.
LLVM_ABI AttributeSet addAttribute(LLVMContext &C, Attribute::AttrKind Kind) const
Add an argument attribute.
LLVM Basic Block Representation.
LLVM_ABI void replaceSuccessorsPhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block's successors to refer to basic block New instead of basic bl...
iterator begin()
Instruction iterator methods.
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="")
Split the basic block into two basic blocks at the specified instruction.
const Function * getParent() const
Return the enclosing method, or null if none.
reverse_iterator rbegin()
bool hasTerminator() const LLVM_READONLY
Returns whether the block has a terminator.
const Instruction & back() const
LLVM_ABI BasicBlock * splitBasicBlockBefore(iterator I, const Twine &BBName="")
Split the basic block into two basic blocks at the specified instruction and insert the new basic blo...
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
LLVM_ABI void insertDbgRecordBefore(DbgRecord *DR, InstListType::iterator Here)
Insert a DbgRecord into a block at the position given by Here.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
LLVM_ABI InstListType::const_iterator getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
LLVM_ABI const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Instruction & front() const
InstListType::reverse_iterator reverse_iterator
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
const Instruction * getTerminatorOrNull() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
LLVM_ABI SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
InstListType::iterator iterator
Instruction iterators...
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
LLVM_ABI void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Value * getArgOperand(unsigned i) const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
Class to represented the control flow structure of an OpenMP canonical loop.
Value * getTripCount() const
Returns the llvm::Value containing the number of loop iterations.
BasicBlock * getHeader() const
The header is the entry for each iteration.
LLVM_ABI void assertOK() const
Consistency self-check.
Type * getIndVarType() const
Return the type of the induction variable (and the trip count).
BasicBlock * getBody() const
The body block is the single entry for a loop iteration and not controlled by CanonicalLoopInfo.
bool isValid() const
Returns whether this object currently represents the IR of a loop.
void setLastIter(Value *IterVar)
Sets the last iteration variable for this loop.
OpenMPIRBuilder::InsertPointTy getAfterIP() const
Return the insertion point for user code after the loop.
OpenMPIRBuilder::InsertPointTy getBodyIP() const
Return the insertion point for user code in the body.
BasicBlock * getAfter() const
The after block is intended for clean-up code such as lifetime end markers.
Function * getFunction() const
LLVM_ABI void invalidate()
Invalidate this loop.
BasicBlock * getLatch() const
Reaching the latch indicates the end of the loop body code.
OpenMPIRBuilder::InsertPointTy getPreheaderIP() const
Return the insertion point for user code before the loop.
BasicBlock * getCond() const
The condition block computes whether there is another loop iteration.
BasicBlock * getExit() const
Reaching the exit indicates no more iterations are being executed.
LLVM_ABI BasicBlock * getPreheader() const
The preheader ensures that there is only a single edge entering the loop.
Instruction * getIndVar() const
Returns the instruction representing the current logical induction variable.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ ICMP_ULT
unsigned less than
@ ICMP_ULE
unsigned less or equal
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
static LLVM_ABI Constant * getString(LLVMContext &Context, StringRef Initializer, bool AddNull=true, bool ByteString=false)
This method constructs a CDS and initializes it with a text string.
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getTruncOrBitCast(Constant *C, Type *Ty)
static LLVM_ABI Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
static LLVM_ABI Constant * getSizeOf(Type *Ty)
getSizeOf constant expr - computes the (alloc) size of a type (in address-units, not bits) in a targe...
static LLVM_ABI Constant * getAddrSpaceCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
This is an important base class in LLVM.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
DILocalScope * getScope() const
Get the local scope for this variable.
DINodeArray getAnnotations() const
Subprogram description. Uses SubclassData1.
uint32_t getAlignInBits() const
StringRef getName() const
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Record of a variable value-assignment, aka a non instruction representation of the dbg....
Analysis pass which computes a DominatorTree.
LLVM_ABI DominatorTree run(Function &F, FunctionAnalysisManager &)
Run the analysis pass over a function and produce a dominator tree.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Represents either an error or a value T.
Lightweight error class with error context and mandatory checking.
static ErrorSuccess success()
Create a success value.
Tagged union holding either a T or a Error.
Error takeError()
Take ownership of the stored error.
reference get()
Returns a reference to the stored T value.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Class to represent function types.
Type * getParamType(unsigned i) const
Parameter type accessors.
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
void addFnAttr(Attribute::AttrKind Kind)
Add function attributes to this function.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
const BasicBlock & getEntryBlock() const
FunctionType * getFunctionType() const
Returns the FunctionType for me.
void removeFromParent()
removeFromParent - This method unlinks 'this' from the containing module, but does not delete it.
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
AttributeList getAttributes() const
Return the attribute list for this Function.
const Function & getFunction() const
void setAttributes(AttributeList Attrs)
Set the attribute list for this Function.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
adds the attribute to the list of attributes for the given arg.
Function::iterator insert(Function::iterator Position, BasicBlock *BB)
Insert BB in the basic block list at Position.
Type * getReturnType() const
Returns the type of the ret val.
void setCallingConv(CallingConv::ID CC)
Argument * getArg(unsigned i) const
bool hasMetadata() const
Return true if this GlobalObject has any metadata attached to it.
LLVM_ABI void addMetadata(unsigned KindID, MDNode &MD)
Add a metadata attachment.
LinkageTypes getLinkage() const
void setLinkage(LinkageTypes LT)
Module * getParent()
Get the module that this global value is contained inside of...
void setDSOLocal(bool Local)
PointerType * getType() const
Global values are always pointers.
@ HiddenVisibility
The GV is hidden.
@ ProtectedVisibility
The GV is protected.
void setVisibility(VisibilityTypes V)
LinkageTypes
An enumeration for the kinds of linkage for global values.
@ PrivateLinkage
Like Internal, but omit from symbol table.
@ CommonLinkage
Tentative definitions.
@ InternalLinkage
Rename collisions when linking (static functions).
@ WeakODRLinkage
Same, but only replaced by something equivalent.
@ WeakAnyLinkage
Keep one copy of named function when linking (weak)
@ AppendingLinkage
Special purpose, only applies to global arrays.
@ LinkOnceODRLinkage
Same, but only replaced by something equivalent.
Type * getValueType() const
InsertPoint - A saved insertion point.
BasicBlock * getBlock() const
bool isSet() const
Returns true if this insert point is set.
BasicBlock::iterator getPoint() const
Common base class shared among various IRBuilders.
InsertPoint saveIP() const
Returns the current insert point.
void restoreIP(InsertPoint IP)
Sets the current insert point to a previously-saved location.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
LLVM_ABI const DebugLoc & getStableDebugLoc() const
Fetch the debug location for this node, unless this is a debug intrinsic, in which case fetch the deb...
LLVM_ABI void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI void moveBeforePreserving(InstListType::iterator MovePos)
Perform a moveBefore operation, while signalling that the caller intends to preserve the original ord...
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void insertAfter(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately after the specified instruction.
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
Align getAlign() const
Return the alignment of the access that is being performed.
Analysis pass that exposes the LoopInfo for a function.
LLVM_ABI LoopInfo run(Function &F, FunctionAnalysisManager &AM)
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
This class represents a loop nest and can be used to query its properties.
Represents a single loop in the control flow graph.
LLVM_ABI MDNode * createCallbackEncoding(unsigned CalleeArgNo, ArrayRef< int > Arguments, bool VarArgsArePassed)
Return metadata describing a callback (see llvm::AbstractCallSite).
LLVM_ABI void replaceOperandWith(unsigned I, Metadata *New)
Replace a specific operand.
static MDTuple * getDistinct(LLVMContext &Context, ArrayRef< Metadata * > MDs)
ArrayRef< MDOperand > operands() const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
This class implements a map that also provides access to all stored values in a deterministic order.
A Module instance is used to store all the information related to an LLVM module.
LLVMContext & getContext() const
Get the global data context.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
iterator_range< op_iterator > operands()
LLVM_ABI void addOperand(MDNode *M)
Device global variable entries info.
Target region entries info.
Base class of the entries info.
Class that manages information about offload code regions and data.
function_ref< void(StringRef, const OffloadEntryInfoDeviceGlobalVar &)> OffloadDeviceGlobalVarEntryInfoActTy
Applies action Action on all registered entries.
OMPTargetDeviceClauseKind
Kind of device clause for declare target variables and functions NOTE: Currently not used as a part o...
@ OMPTargetDeviceClauseAny
The target is marked for all devices.
LLVM_ABI void registerDeviceGlobalVarEntryInfo(StringRef VarName, Constant *Addr, int64_t VarSize, OMPTargetGlobalVarEntryKind Flags, GlobalValue::LinkageTypes Linkage)
Register device global variable entry.
LLVM_ABI void initializeDeviceGlobalVarEntryInfo(StringRef Name, OMPTargetGlobalVarEntryKind Flags, unsigned Order)
Initialize device global variable entry.
LLVM_ABI void actOnDeviceGlobalVarEntriesInfo(const OffloadDeviceGlobalVarEntryInfoActTy &Action)
OMPTargetRegionEntryKind
Kind of the target registry entry.
@ OMPTargetRegionEntryTargetRegion
Mark the entry as target region.
LLVM_ABI void getTargetRegionEntryFnName(SmallVectorImpl< char > &Name, const TargetRegionEntryInfo &EntryInfo)
LLVM_ABI bool hasTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, bool IgnoreAddressId=false) const
Return true if a target region entry with the provided information exists.
LLVM_ABI void registerTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, Constant *Addr, Constant *ID, OMPTargetRegionEntryKind Flags)
Register target region entry.
LLVM_ABI void actOnTargetRegionEntriesInfo(const OffloadTargetRegionEntryInfoActTy &Action)
LLVM_ABI void initializeTargetRegionEntryInfo(const TargetRegionEntryInfo &EntryInfo, unsigned Order)
Initialize target region entry.
OMPTargetGlobalVarEntryKind
Kind of the global variable entry..
@ OMPTargetGlobalVarEntryEnter
Mark the entry as a declare target enter.
@ OMPTargetGlobalRegisterRequires
Mark the entry as a register requires global.
@ OMPTargetGlobalVarEntryIndirect
Mark the entry as a declare target indirect global.
@ OMPTargetGlobalVarEntryLink
Mark the entry as a to declare target link.
@ OMPTargetGlobalVarEntryTo
Mark the entry as a to declare target.
@ OMPTargetGlobalVarEntryIndirectVTable
Mark the entry as a declare target indirect vtable.
function_ref< void(const TargetRegionEntryInfo &EntryInfo, const OffloadEntryInfoTargetRegion &)> OffloadTargetRegionEntryInfoActTy
brief Applies action Action on all registered entries.
bool hasDeviceGlobalVarEntryInfo(StringRef VarName) const
Checks if the variable with the given name has been registered already.
LLVM_ABI bool empty() const
Return true if a there are no entries defined.
std::optional< bool > IsTargetDevice
Flag to define whether to generate code for the role of the OpenMP host (if set to false) or device (...
std::optional< bool > IsGPU
Flag for specifying if the compilation is done for an accelerator.
LLVM_ABI int64_t getRequiresFlags() const
Returns requires directive clauses as flags compatible with those expected by libomptarget.
std::optional< bool > OpenMPOffloadMandatory
Flag for specifying if offloading is mandatory.
LLVM_ABI void setHasRequiresReverseOffload(bool Value)
LLVM_ABI OpenMPIRBuilderConfig()
LLVM_ABI bool hasRequiresUnifiedSharedMemory() const
LLVM_ABI void setHasRequiresUnifiedSharedMemory(bool Value)
unsigned getDefaultTargetAS() const
LLVM_ABI bool hasRequiresDynamicAllocators() const
LLVM_ABI void setHasRequiresUnifiedAddress(bool Value)
bool isTargetDevice() const
LLVM_ABI void setHasRequiresDynamicAllocators(bool Value)
LLVM_ABI bool hasRequiresReverseOffload() const
bool hasRequiresFlags() const
LLVM_ABI bool hasRequiresUnifiedAddress() const
Struct that keeps the information that should be kept throughout a 'target data' region.
An interface to create LLVM-IR for OpenMP directives.
LLVM_ABI InsertPointOrErrorTy createOrderedThreadsSimd(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool IsThreads)
Generator for 'omp ordered [threads | simd]'.
LLVM_ABI void emitAArch64DeclareSimdFunction(llvm::Function *Fn, unsigned VLENVal, llvm::ArrayRef< DeclareSimdAttrTy > ParamAttrs, DeclareSimdBranch Branch, char ISA, unsigned NarrowestDataSize, bool OutputBecomesInput)
Emit AArch64 vector-function ABI attributes for a declare simd function.
LLVM_ABI Constant * getOrCreateIdent(Constant *SrcLocStr, uint32_t SrcLocStrSize, omp::IdentFlag Flags=omp::IdentFlag(0), unsigned Reserve2Flags=0)
Return an ident_t* encoding the source location SrcLocStr and Flags.
LLVM_ABI FunctionCallee getOrCreateRuntimeFunction(Module &M, omp::RuntimeFunction FnID)
Return the function declaration for the runtime function with FnID.
LLVM_ABI InsertPointOrErrorTy createCancel(const LocationDescription &Loc, Value *IfCondition, omp::Directive CanceledDirective)
Generator for 'omp cancel'.
std::function< Expected< Function * >(StringRef FunctionName)> FunctionGenCallback
Functions used to generate a function with the given name.
ReductionGenCBKind
Enum class for the RedctionGen CallBack type to be used.
LLVM_ABI CanonicalLoopInfo * collapseLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops, InsertPointTy ComputeIP)
Collapse a loop nest into a single loop.
LLVM_ABI void createTaskyield(const LocationDescription &Loc)
Generator for 'omp taskyield'.
std::function< Error(InsertPointTy CodeGenIP)> FinalizeCallbackTy
Callback type for variable finalization (think destructors).
LLVM_ABI void emitBranch(BasicBlock *Target)
LLVM_ABI Error emitCancelationCheckImpl(Value *CancelFlag, omp::Directive CanceledDirective)
Generate control flow and cleanup for cancellation.
static LLVM_ABI void writeThreadBoundsForKernel(const Triple &T, Function &Kernel, int32_t LB, int32_t UB)
LLVM_ABI void emitTaskwaitImpl(const LocationDescription &Loc)
Generate a taskwait runtime call.
LLVM_ABI Constant * registerTargetRegionFunction(TargetRegionEntryInfo &EntryInfo, Function *OutlinedFunction, StringRef EntryFnName, StringRef EntryFnIDName)
Registers the given function and sets up the attribtues of the function Returns the FunctionID.
LLVM_ABI GlobalVariable * emitKernelExecutionMode(StringRef KernelName, omp::OMPTgtExecModeFlags Mode)
Emit the kernel execution mode.
LLVM_ABI InsertPointOrErrorTy createDistribute(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB)
Generator for #omp distribute
LLVM_ABI void initialize()
Initialize the internal state, this will put structures types and potentially other helpers into the ...
LLVM_ABI void createTargetDeinit(const LocationDescription &Loc, int32_t TeamsReductionDataSize=0, int32_t TeamsReductionBufferLength=1024)
Create a runtime call for kmpc_target_deinit.
LLVM_ABI InsertPointOrErrorTy createTaskgroup(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB)
Generator for the taskgroup construct.
LLVM_ABI InsertPointTy createAtomicWrite(const LocationDescription &Loc, AtomicOpValue &X, Value *Expr, AtomicOrdering AO, InsertPointTy AllocaIP)
Emit atomic write for : X = Expr — Only Scalar data types.
LLVM_ABI void loadOffloadInfoMetadata(Module &M)
Loads all the offload entries information from the host IR metadata.
function_ref< MapInfosTy &(InsertPointTy CodeGenIP)> GenMapInfoCallbackTy
Callback type for creating the map infos for the kernel parameters.
LLVM_ABI Error emitOffloadingArrays(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo, TargetDataInfo &Info, CustomMapperCallbackTy CustomMapperCB, bool IsNonContiguous=false, function_ref< void(unsigned int, Value *)> DeviceAddrCB=nullptr)
Emit the arrays used to pass the captures and map information to the offloading runtime library.
LLVM_ABI void unrollLoopFull(DebugLoc DL, CanonicalLoopInfo *Loop)
Fully unroll a loop.
function_ref< Error(InsertPointTy CodeGenIP, Value *IndVar)> LoopBodyGenCallbackTy
Callback type for loop body code generation.
LLVM_ABI InsertPointOrErrorTy emitScanReduction(const LocationDescription &Loc, ArrayRef< llvm::OpenMPIRBuilder::ReductionInfo > ReductionInfos, ScanInfo *ScanRedInfo)
This function performs the scan reduction of the values updated in the input phase.
LLVM_ABI void emitFlush(const LocationDescription &Loc)
Generate a flush runtime call.
static LLVM_ABI std::pair< int32_t, int32_t > readThreadBoundsForKernel(const Triple &T, Function &Kernel)
}
OpenMPIRBuilderConfig Config
The OpenMPIRBuilder Configuration.
LLVM_ABI CallInst * createOMPInteropDestroy(const LocationDescription &Loc, Value *InteropVar, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_destroy.
LLVM_ABI Error emitIfClause(Value *Cond, BodyGenCallbackTy ThenGen, BodyGenCallbackTy ElseGen, InsertPointTy AllocaIP={})
Emits code for OpenMP 'if' clause using specified BodyGenCallbackTy Here is the logic: if (Cond) { Th...
function_ref< InsertPointOrErrorTy( Argument &Arg, Value *Input, Value *&RetVal, InsertPointTy AllocaIP, InsertPointTy CodeGenIP)> TargetGenArgAccessorsCallbackTy
LLVM_ABI void emitUsed(StringRef Name, ArrayRef< llvm::WeakTrackingVH > List)
Emit the llvm.used metadata.
LLVM_ABI InsertPointOrErrorTy createSingle(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool IsNowait, ArrayRef< llvm::Value * > CPVars={}, ArrayRef< llvm::Function * > CPFuncs={})
Generator for 'omp single'.
LLVM_ABI InsertPointOrErrorTy createTeams(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, Value *NumTeamsLower=nullptr, Value *NumTeamsUpper=nullptr, Value *ThreadLimit=nullptr, Value *IfExpr=nullptr)
Generator for #omp teams
std::forward_list< CanonicalLoopInfo > LoopInfos
Collection of owned canonical loop objects that eventually need to be free'd.
LLVM_ABI void createTaskwait(const LocationDescription &Loc)
Generator for 'omp taskwait'.
LLVM_ABI llvm::StructType * getKmpTaskAffinityInfoTy()
Return the LLVM struct type matching runtime kmp_task_affinity_info_t.
LLVM_ABI CanonicalLoopInfo * createLoopSkeleton(DebugLoc DL, Value *TripCount, Function *F, BasicBlock *PreInsertBefore, BasicBlock *PostInsertBefore, const Twine &Name={})
Create the control flow structure of a canonical OpenMP loop.
LLVM_ABI std::string createPlatformSpecificName(ArrayRef< StringRef > Parts) const
Get the create a name using the platform specific separators.
LLVM_ABI FunctionCallee createDispatchNextFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_next_* runtime function for the specified size IVSize and sign IVSigned.
static LLVM_ABI void getKernelArgsVector(TargetKernelArgs &KernelArgs, IRBuilderBase &Builder, SmallVector< Value * > &ArgsVector)
Create the kernel args vector used by emitTargetKernel.
function_ref< Error(InsertPointTy AllocaIP, InsertPointTy CodeGenIP)> BodyGenCallbackTy
Callback type for body (=inner region) code generation.
LLVM_ABI void unrollLoopHeuristic(DebugLoc DL, CanonicalLoopInfo *Loop)
Fully or partially unroll a loop.
LLVM_ABI InsertPointOrErrorTy createParallel(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads, omp::ProcBindKind ProcBind, bool IsCancellable)
Generator for 'omp parallel'.
LLVM_ABI omp::OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position)
Get OMP_MAP_MEMBER_OF flag with extra bits reserved based on the position given.
LLVM_ABI void addAttributes(omp::RuntimeFunction FnID, Function &Fn)
Add attributes known for FnID to Fn.
Module & M
The underlying LLVM-IR module.
StringMap< Constant * > SrcLocStrMap
Map to remember source location strings.
LLVM_ABI void createMapperAllocas(const LocationDescription &Loc, InsertPointTy AllocaIP, unsigned NumOperands, struct MapperAllocas &MapperAllocas)
Create the allocas instruction used in call to mapper functions.
LLVM_ABI Constant * getOrCreateSrcLocStr(StringRef LocStr, uint32_t &SrcLocStrSize)
Return the (LLVM-IR) string describing the source location LocStr.
void addOutlineInfo(OutlineInfo &&OI)
Add a new region that will be outlined later.
LLVM_ABI Error emitTargetRegionFunction(TargetRegionEntryInfo &EntryInfo, FunctionGenCallback &GenerateFunctionCallback, bool IsOffloadEntry, Function *&OutlinedFn, Constant *&OutlinedFnID)
Create a unique name for the entry function using the source location information of the current targ...
LLVM_ABI InsertPointOrErrorTy createIteratorLoop(LocationDescription Loc, llvm::Value *TripCount, IteratorBodyGenTy BodyGen, llvm::StringRef Name="iterator")
Create a canonical iterator loop at the current insertion point.
LLVM_ABI Expected< SmallVector< llvm::CanonicalLoopInfo * > > createCanonicalScanLoops(const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop, InsertPointTy ComputeIP, const Twine &Name, ScanInfo *ScanRedInfo)
Generator for the control flow structure of an OpenMP canonical loops if the parent directive has an ...
LLVM_ABI FunctionCallee createDispatchFiniFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_fini_* runtime function for the specified size IVSize and sign IVSigned.
LLVM_ABI void unrollLoopPartial(DebugLoc DL, CanonicalLoopInfo *Loop, int32_t Factor, CanonicalLoopInfo **UnrolledCLI)
Partially unroll a loop.
function_ref< Error(Value *DeviceID, Value *RTLoc, IRBuilderBase::InsertPoint TargetTaskAllocaIP)> TargetTaskBodyCallbackTy
Callback type for generating the bodies of device directives that require outer target tasks (e....
Expected< MapInfosTy & > MapInfosOrErrorTy
LLVM_ABI void emitTaskyieldImpl(const LocationDescription &Loc)
Generate a taskyield runtime call.
LLVM_ABI void emitMapperCall(const LocationDescription &Loc, Function *MapperFunc, Value *SrcLocInfo, Value *MaptypesArg, Value *MapnamesArg, struct MapperAllocas &MapperAllocas, int64_t DeviceID, unsigned NumOperands)
Create the call for the target mapper function.
function_ref< Expected< Function * >(unsigned int)> CustomMapperCallbackTy
LLVM_ABI InsertPointTy createAtomicCompare(const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V, AtomicOpValue &R, Value *E, Value *D, AtomicOrdering AO, omp::OMPAtomicCompareOp Op, bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly)
Emit atomic compare for constructs: — Only scalar data types cond-expr-stmt: x = x ordop expr ?
LLVM_ABI InsertPointTy createOrderedDepend(const LocationDescription &Loc, InsertPointTy AllocaIP, unsigned NumLoops, ArrayRef< llvm::Value * > StoreValues, const Twine &Name, bool IsDependSource)
Generator for 'omp ordered depend (source | sink)'.
LLVM_ABI InsertPointTy createCopyinClauseBlocks(InsertPointTy IP, Value *MasterAddr, Value *PrivateAddr, llvm::IntegerType *IntPtrTy, bool BranchtoEnd=true)
Generate conditional branch and relevant BasicBlocks through which private threads copy the 'copyin' ...
function_ref< InsertPointOrErrorTy( InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &Original, Value &Inner, Value *&ReplVal)> PrivatizeCallbackTy
Callback type for variable privatization (think copy & default constructor).
LLVM_ABI bool isFinalized()
Check whether the finalize function has already run.
function_ref< InsertPointOrErrorTy( InsertPointTy AllocaIP, InsertPointTy CodeGenIP)> TargetBodyGenCallbackTy
SmallVector< FinalizationInfo, 8 > FinalizationStack
The finalization stack made up of finalize callbacks currently in-flight, wrapped into FinalizationIn...
LLVM_ABI std::vector< CanonicalLoopInfo * > tileLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops, ArrayRef< Value * > TileSizes)
Tile a loop nest.
LLVM_ABI CallInst * createOMPInteropInit(const LocationDescription &Loc, Value *InteropVar, omp::OMPInteropType InteropType, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_init.
LLVM_ABI void finalize(Function *Fn=nullptr)
Finalize the underlying module, e.g., by outlining regions.
SmallVector< OutlineInfo, 16 > OutlineInfos
Collection of regions that need to be outlined during finalization.
LLVM_ABI Function * getOrCreateRuntimeFunctionPtr(omp::RuntimeFunction FnID)
LLVM_ABI InsertPointTy createTargetInit(const LocationDescription &Loc, const llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs)
The omp target interface.
LLVM_ABI InsertPointOrErrorTy createReductions(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< ReductionInfo > ReductionInfos, ArrayRef< bool > IsByRef, bool IsNoWait=false, bool IsTeamsReduction=false)
Generator for 'omp reduction'.
const Triple T
The target triple of the underlying module.
DenseMap< std::pair< Constant *, uint64_t >, Constant * > IdentMap
Map to remember existing ident_t*.
LLVM_ABI CallInst * createOMPFree(const LocationDescription &Loc, Value *Addr, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_free.
LLVM_ABI FunctionCallee createForStaticInitFunction(unsigned IVSize, bool IVSigned, bool IsGPUDistribute)
Returns __kmpc_for_static_init_* runtime function for the specified size IVSize and sign IVSigned.
LLVM_ABI CallInst * createOMPAlloc(const LocationDescription &Loc, Value *Size, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_alloc.
LLVM_ABI void emitNonContiguousDescriptor(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo, TargetDataInfo &Info)
Emit an array of struct descriptors to be assigned to the offload args.
LLVM_ABI InsertPointOrErrorTy createSection(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB)
Generator for 'omp section'.
LLVM_ABI InsertPointOrErrorTy createTarget(const LocationDescription &Loc, bool IsOffloadEntry, OpenMPIRBuilder::InsertPointTy AllocaIP, OpenMPIRBuilder::InsertPointTy CodeGenIP, TargetDataInfo &Info, TargetRegionEntryInfo &EntryInfo, const TargetKernelDefaultAttrs &DefaultAttrs, const TargetKernelRuntimeAttrs &RuntimeAttrs, Value *IfCond, SmallVectorImpl< Value * > &Inputs, GenMapInfoCallbackTy GenMapInfoCB, TargetBodyGenCallbackTy BodyGenCB, TargetGenArgAccessorsCallbackTy ArgAccessorFuncCB, CustomMapperCallbackTy CustomMapperCB, const DependenciesInfo &Dependencies={}, bool HasNowait=false, Value *DynCGroupMem=nullptr, omp::OMPDynGroupprivateFallbackType DynCGroupMemFallback=omp::OMPDynGroupprivateFallbackType::Abort)
Generator for 'omp target'.
function_ref< InsertPointOrErrorTy(InsertPointTy)> EmitFallbackCallbackTy
Callback function type for functions emitting the host fallback code that is executed when the kernel...
static LLVM_ABI TargetRegionEntryInfo getTargetEntryUniqueInfo(FileIdentifierInfoCallbackTy CallBack, vfs::FileSystem &VFS, StringRef ParentName="")
Creates a unique info for a target entry when provided a filename and line number from.
LLVM_ABI void emitTaskDependency(IRBuilderBase &Builder, Value *Entry, const DependData &Dep)
Store one kmp_depend_info entry at the given Entry pointer.
LLVM_ABI void emitBlock(BasicBlock *BB, Function *CurFn, bool IsFinished=false)
LLVM_ABI Value * getOrCreateThreadID(Value *Ident)
Return the current thread ID.
LLVM_ABI InsertPointOrErrorTy createMaster(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB)
Generator for 'omp master'.
LLVM_ABI InsertPointOrErrorTy createTargetData(const LocationDescription &Loc, InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value *DeviceID, Value *IfCond, TargetDataInfo &Info, GenMapInfoCallbackTy GenMapInfoCB, CustomMapperCallbackTy CustomMapperCB, omp::RuntimeFunction *MapperFunc=nullptr, function_ref< InsertPointOrErrorTy(InsertPointTy CodeGenIP, BodyGenTy BodyGenType)> BodyGenCB=nullptr, function_ref< void(unsigned int, Value *)> DeviceAddrCB=nullptr, Value *SrcLocInfo=nullptr)
Generator for 'omp target data'.
CallInst * createRuntimeFunctionCall(FunctionCallee Callee, ArrayRef< Value * > Args, StringRef Name="")
LLVM_ABI InsertPointOrErrorTy emitKernelLaunch(const LocationDescription &Loc, Value *OutlinedFnID, EmitFallbackCallbackTy EmitTargetCallFallbackCB, TargetKernelArgs &Args, Value *DeviceID, Value *RTLoc, InsertPointTy AllocaIP)
Generate a target region entry call and host fallback call.
StringMap< GlobalVariable *, BumpPtrAllocator > InternalVars
An ordered map of auto-generated variables to their unique names.
LLVM_ABI InsertPointOrErrorTy createCancellationPoint(const LocationDescription &Loc, omp::Directive CanceledDirective)
Generator for 'omp cancellation point'.
LLVM_ABI CallInst * createOMPAlignedAlloc(const LocationDescription &Loc, Value *Align, Value *Size, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_align_alloc.
LLVM_ABI FunctionCallee createDispatchInitFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_init_* runtime function for the specified size IVSize and sign IVSigned.
LLVM_ABI InsertPointOrErrorTy createScan(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< llvm::Value * > ScanVars, ArrayRef< llvm::Type * > ScanVarsType, bool IsInclusive, ScanInfo *ScanRedInfo)
This directive split and directs the control flow to input phase blocks or scan phase blocks based on...
LLVM_ABI CallInst * createOMPInteropUse(const LocationDescription &Loc, Value *InteropVar, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_use.
IRBuilder<>::InsertPoint InsertPointTy
Type used throughout for insertion points.
LLVM_ABI GlobalVariable * getOrCreateInternalVariable(Type *Ty, const StringRef &Name, std::optional< unsigned > AddressSpace={})
Gets (if variable with the given name already exist) or creates internal global variable with the spe...
LLVM_ABI GlobalVariable * createOffloadMapnames(SmallVectorImpl< llvm::Constant * > &Names, std::string VarName)
Create the global variable holding the offload names information.
std::forward_list< ScanInfo > ScanInfos
Collection of owned ScanInfo objects that eventually need to be free'd.
static LLVM_ABI void writeTeamsForKernel(const Triple &T, Function &Kernel, int32_t LB, int32_t UB)
LLVM_ABI Value * calculateCanonicalLoopTripCount(const LocationDescription &Loc, Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop, const Twine &Name="loop")
Calculate the trip count of a canonical loop.
LLVM_ABI InsertPointOrErrorTy createBarrier(const LocationDescription &Loc, omp::Directive Kind, bool ForceSimpleCall=false, bool CheckCancelFlag=true)
Emitter methods for OpenMP directives.
LLVM_ABI void setCorrectMemberOfFlag(omp::OpenMPOffloadMappingFlags &Flags, omp::OpenMPOffloadMappingFlags MemberOfFlag)
Given an initial flag set, this function modifies it to contain the passed in MemberOfFlag generated ...
LLVM_ABI Error emitOffloadingArraysAndArgs(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info, TargetDataRTArgs &RTArgs, MapInfosTy &CombinedInfo, CustomMapperCallbackTy CustomMapperCB, bool IsNonContiguous=false, bool ForEndCall=false, function_ref< void(unsigned int, Value *)> DeviceAddrCB=nullptr)
Allocates memory for and populates the arrays required for offloading (offload_{baseptrs|ptrs|mappers...
LLVM_ABI Constant * getOrCreateDefaultSrcLocStr(uint32_t &SrcLocStrSize)
Return the (LLVM-IR) string describing the default source location.
LLVM_ABI InsertPointOrErrorTy createCritical(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, StringRef CriticalName, Value *HintInst)
Generator for 'omp critical'.
LLVM_ABI void createOffloadEntry(Constant *ID, Constant *Addr, uint64_t Size, int32_t Flags, GlobalValue::LinkageTypes, StringRef Name="")
Creates offloading entry for the provided entry ID ID, address Addr, size Size, and flags Flags.
static LLVM_ABI unsigned getOpenMPDefaultSimdAlign(const Triple &TargetTriple, const StringMap< bool > &Features)
Get the default alignment value for given target.
LLVM_ABI unsigned getFlagMemberOffset()
Get the offset of the OMP_MAP_MEMBER_OF field.
LLVM_ABI InsertPointOrErrorTy applyWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, bool NeedsBarrier, llvm::omp::ScheduleKind SchedKind=llvm::omp::OMP_SCHEDULE_Default, Value *ChunkSize=nullptr, bool HasSimdModifier=false, bool HasMonotonicModifier=false, bool HasNonmonotonicModifier=false, bool HasOrderedClause=false, omp::WorksharingLoopType LoopType=omp::WorksharingLoopType::ForStaticLoop, bool NoLoop=false, bool HasDistSchedule=false, Value *DistScheduleChunkSize=nullptr)
Modifies the canonical loop to be a workshare loop.
LLVM_ABI InsertPointOrErrorTy createAtomicCapture(const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X, AtomicOpValue &V, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool UpdateExpr, bool IsPostfixUpdate, bool IsXBinopExpr, bool IsIgnoreDenormalMode=false, bool IsFineGrainedMemory=false, bool IsRemoteMemory=false)
Emit atomic update for constructs: — Only Scalar data types V = X; X = X BinOp Expr ,...
LLVM_ABI void createOffloadEntriesAndInfoMetadata(EmitMetadataErrorReportFunctionTy &ErrorReportFunction)
LLVM_ABI void applySimd(CanonicalLoopInfo *Loop, MapVector< Value *, Value * > AlignedVars, Value *IfCond, omp::OrderKind Order, ConstantInt *Simdlen, ConstantInt *Safelen)
Add metadata to simd-ize a loop.
LLVM_ABI InsertPointOrErrorTy createAtomicUpdate(const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool IsXBinopExpr, bool IsIgnoreDenormalMode=false, bool IsFineGrainedMemory=false, bool IsRemoteMemory=false)
Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X For complex Operations: X = ...
std::function< std::tuple< std::string, uint64_t >()> FileIdentifierInfoCallbackTy
bool isLastFinalizationInfoCancellable(omp::Directive DK)
Return true if the last entry in the finalization stack is of kind DK and cancellable.
LLVM_ABI InsertPointTy emitTargetKernel(const LocationDescription &Loc, InsertPointTy AllocaIP, Value *&Return, Value *Ident, Value *DeviceID, Value *NumTeams, Value *NumThreads, Value *HostPtr, ArrayRef< Value * > KernelArgs)
Generate a target region entry call.
LLVM_ABI GlobalVariable * createOffloadMaptypes(SmallVectorImpl< uint64_t > &Mappings, std::string VarName)
Create the global variable holding the offload mappings information.
LLVM_ABI ~OpenMPIRBuilder()
LLVM_ABI CallInst * createCachedThreadPrivate(const LocationDescription &Loc, llvm::Value *Pointer, llvm::ConstantInt *Size, const llvm::Twine &Name=Twine(""))
Create a runtime call for kmpc_threadprivate_cached.
IRBuilder Builder
The LLVM-IR Builder used to create IR.
LLVM_ABI GlobalValue * createGlobalFlag(unsigned Value, StringRef Name)
Create a hidden global flag Name in the module with initial value Value.
LLVM_ABI InsertPointOrErrorTy createTask(const LocationDescription &Loc, InsertPointTy AllocaIP, BodyGenCallbackTy BodyGenCB, bool Tied=true, Value *Final=nullptr, Value *IfCondition=nullptr, const DependenciesInfo &Dependencies={}, const AffinityData &Affinities={}, bool Mergeable=false, Value *EventHandle=nullptr, Value *Priority=nullptr)
Generator for #omp taskloop
LLVM_ABI void emitOffloadingArraysArgument(IRBuilderBase &Builder, OpenMPIRBuilder::TargetDataRTArgs &RTArgs, OpenMPIRBuilder::TargetDataInfo &Info, bool ForEndCall=false)
Emit the arguments to be passed to the runtime library based on the arrays of base pointers,...
LLVM_ABI InsertPointOrErrorTy createMasked(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, Value *Filter)
Generator for 'omp masked'.
LLVM_ABI Expected< CanonicalLoopInfo * > createCanonicalLoop(const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, Value *TripCount, const Twine &Name="loop")
Generator for the control flow structure of an OpenMP canonical loop.
function_ref< Expected< InsertPointTy >( InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value *DestPtr, Value *SrcPtr)> TaskDupCallbackTy
Callback type for task duplication function code generation.
LLVM_ABI Value * getSizeInBytes(Value *BasePtr)
Computes the size of type in bytes.
llvm::function_ref< llvm::Error( InsertPointTy BodyIP, llvm::Value *LinearIV)> IteratorBodyGenTy
LLVM_ABI InsertPointOrErrorTy createReductionsGPU(const LocationDescription &Loc, InsertPointTy AllocaIP, InsertPointTy CodeGenIP, ArrayRef< ReductionInfo > ReductionInfos, ArrayRef< bool > IsByRef, bool IsNoWait=false, bool IsTeamsReduction=false, ReductionGenCBKind ReductionGenCBKind=ReductionGenCBKind::MLIR, std::optional< omp::GV > GridValue={}, unsigned ReductionBufNum=1024, Value *SrcLocInfo=nullptr)
Design of OpenMP reductions on the GPU.
LLVM_ABI Expected< Function * > emitUserDefinedMapper(function_ref< MapInfosOrErrorTy(InsertPointTy CodeGenIP, llvm::Value *PtrPHI, llvm::Value *BeginArg)> PrivAndGenMapInfoCB, llvm::Type *ElemTy, StringRef FuncName, CustomMapperCallbackTy CustomMapperCB)
Emit the user-defined mapper function.
LLVM_ABI FunctionCallee createDispatchDeinitFunction()
Returns __kmpc_dispatch_deinit runtime function.
LLVM_ABI void registerTargetGlobalVariable(OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause, OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause, bool IsDeclaration, bool IsExternallyVisible, TargetRegionEntryInfo EntryInfo, StringRef MangledName, std::vector< GlobalVariable * > &GeneratedRefs, bool OpenMPSIMD, std::vector< Triple > TargetTriple, std::function< Constant *()> GlobalInitializer, std::function< GlobalValue::LinkageTypes()> VariableLinkage, Type *LlvmPtrTy, Constant *Addr)
Registers a target variable for device or host.
BodyGenTy
Type of BodyGen to use for region codegen.
LLVM_ABI CanonicalLoopInfo * fuseLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops)
Fuse a sequence of loops.
LLVM_ABI void emitX86DeclareSimdFunction(llvm::Function *Fn, unsigned NumElements, const llvm::APSInt &VLENVal, llvm::ArrayRef< DeclareSimdAttrTy > ParamAttrs, DeclareSimdBranch Branch)
Emit x86 vector-function ABI attributes for a declare simd function.
SmallVector< llvm::Function *, 16 > ConstantAllocaRaiseCandidates
A collection of candidate target functions that's constant allocas will attempt to be raised on a cal...
OffloadEntriesInfoManager OffloadInfoManager
Info manager to keep track of target regions.
static LLVM_ABI std::pair< int32_t, int32_t > readTeamBoundsForKernel(const Triple &T, Function &Kernel)
Read/write a bounds on teams for Kernel.
const std::string ompOffloadInfoName
OMP Offload Info Metadata name string.
Expected< InsertPointTy > InsertPointOrErrorTy
Type used to represent an insertion point or an error value.
LLVM_ABI InsertPointTy createCopyPrivate(const LocationDescription &Loc, llvm::Value *BufSize, llvm::Value *CpyBuf, llvm::Value *CpyFn, llvm::Value *DidIt)
Generator for __kmpc_copyprivate.
LLVM_ABI InsertPointOrErrorTy createSections(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< StorableBodyGenCallbackTy > SectionCBs, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, bool IsCancellable, bool IsNowait)
Generator for 'omp sections'.
std::function< void(EmitMetadataErrorKind, TargetRegionEntryInfo)> EmitMetadataErrorReportFunctionTy
Callback function type.
LLVM_ABI Expected< ScanInfo * > scanInfoInitialize()
Creates a ScanInfo object, allocates and returns the pointer.
LLVM_ABI InsertPointOrErrorTy emitTargetTask(TargetTaskBodyCallbackTy TaskBodyCB, Value *DeviceID, Value *RTLoc, OpenMPIRBuilder::InsertPointTy AllocaIP, const DependenciesInfo &Dependencies, const TargetDataRTArgs &RTArgs, bool HasNoWait)
Generate a target-task for the target construct.
LLVM_ABI InsertPointTy createAtomicRead(const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V, AtomicOrdering AO, InsertPointTy AllocaIP)
Emit atomic Read for : V = X — Only Scalar data types.
bool updateToLocation(const LocationDescription &Loc)
Update the internal location to Loc.
LLVM_ABI void createFlush(const LocationDescription &Loc)
Generator for 'omp flush'.
LLVM_ABI Constant * getAddrOfDeclareTargetVar(OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause, OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause, bool IsDeclaration, bool IsExternallyVisible, TargetRegionEntryInfo EntryInfo, StringRef MangledName, std::vector< GlobalVariable * > &GeneratedRefs, bool OpenMPSIMD, std::vector< Triple > TargetTriple, Type *LlvmPtrTy, std::function< Constant *()> GlobalInitializer, std::function< GlobalValue::LinkageTypes()> VariableLinkage)
Retrieve (or create if non-existent) the address of a declare target variable, used in conjunction wi...
EmitMetadataErrorKind
The kind of errors that can occur when emitting the offload entries and metadata.
@ EMIT_MD_DECLARE_TARGET_ERROR
@ EMIT_MD_GLOBAL_VAR_INDIRECT_ERROR
@ EMIT_MD_GLOBAL_VAR_LINK_ERROR
@ EMIT_MD_TARGET_REGION_ERROR
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Pseudo-analysis pass that exposes the PassInstrumentation to pass managers.
Class to represent pointers.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
Analysis pass that exposes the ScalarEvolution for a function.
LLVM_ABI ScalarEvolution run(Function &F, FunctionAnalysisManager &AM)
The main scalar evolution driver.
ScanInfo holds the information to assist in lowering of Scan reduction.
llvm::SmallDenseMap< llvm::Value *, llvm::Value * > * ScanBuffPtrs
Maps the private reduction variable to the pointer of the temporary buffer.
llvm::BasicBlock * OMPScanLoopExit
Exit block of loop body.
llvm::Value * IV
Keeps track of value of iteration variable for input/scan loop to be used for Scan directive lowering...
llvm::BasicBlock * OMPAfterScanBlock
Dominates the body of the loop before scan directive.
llvm::BasicBlock * OMPScanInit
Block before loop body where scan initializations are done.
llvm::BasicBlock * OMPBeforeScanBlock
Dominates the body of the loop before scan directive.
llvm::BasicBlock * OMPScanFinish
Block after loop body where scan finalizations are done.
llvm::Value * Span
Stores the span of canonical loop being lowered to be used for temporary buffer allocation or Finaliz...
bool OMPFirstScanLoop
If true, it indicates Input phase is lowered; else it indicates ScanPhase is lowered.
llvm::BasicBlock * OMPScanDispatch
Controls the flow to before or after scan blocks.
A vector that has set insertion semantics.
bool remove_if(UnaryPredicate P)
Remove items from the set vector based on a predicate function.
bool empty() const
Determine if the SetVector is empty or not.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
bool test(unsigned Idx) const
bool all() const
Returns true if all bits are set.
bool any() const
Returns true if any bit is set.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
bool remove_if(UnaryPredicate P)
Remove elements that match the given predicate.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
void append(StringRef RHS)
Append from a StringRef.
StringRef str() const
Explicit conversion to StringRef.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAlignment(Align Align)
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
ValueTy lookup(StringRef Key) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
StringRef - Represent a constant reference to a string, i.e.
std::string str() const
str - Get the contents as an std::string.
constexpr bool empty() const
empty - Check if the string is empty.
constexpr size_t size() const
size - Get the string size.
size_t count(char C) const
Return the number of occurrences of C in the string.
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
StringRef drop_back(size_t N=1) const
Return a StringRef equal to 'this' but with the last N elements dropped.
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
static LLVM_ABI StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
Type * getElementType(unsigned N) const
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
Analysis pass providing the TargetTransformInfo.
LLVM_ABI Result run(const Function &F, FunctionAnalysisManager &)
TargetTransformInfo Result
Analysis pass providing the TargetLibraryInfo.
Target - Wrapper for Target specific information.
TargetMachine * createTargetMachine(const Triple &TT, StringRef CPU, StringRef Features, const TargetOptions &Options, std::optional< Reloc::Model > RM, std::optional< CodeModel::Model > CM=std::nullopt, CodeGenOptLevel OL=CodeGenOptLevel::Default, bool JIT=false) const
createTargetMachine - Create a target specific machine implementation for the specified Triple.
Triple - Helper class for working with autoconf configuration names.
bool isPPC() const
Tests whether the target is PowerPC (32- or 64-bit LE or BE).
bool isX86() const
Tests whether the target is x86 (32- or 64-bit).
bool isWasm() const
Tests whether the target is wasm (32- and 64-bit).
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
LLVM_ABI unsigned getIntegerBitWidth() const
LLVM_ABI Type * getStructElementType(unsigned N) const
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
bool isStructTy() const
True if this is an instance of StructType.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isVoidTy() const
Return true if this is 'void'.
Unconditional Branch instruction.
static UncondBrInst * Create(BasicBlock *Target, InsertPosition InsertBefore=nullptr)
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
This function has undefined behavior.
Produce an estimate of the unrolled cost of the specified loop.
LLVM_ABI bool canUnroll(OptimizationRemarkEmitter *ORE=nullptr, const Loop *L=nullptr) const
Whether it is legal to unroll this loop.
uint64_t getRolledLoopSize() const
A Use represents the edge between a Value definition and its users.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
LLVM_ABI bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
LLVM_ABI User * getUniqueUndroppableUser()
Return true if there is exactly one unique user of this value that cannot be dropped (that user can h...
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
LLVM_ABI bool replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)
Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...
iterator_range< use_iterator > uses()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
The virtual file system interface.
llvm::ErrorOr< std::unique_ptr< llvm::MemoryBuffer > > getBufferForFile(const Twine &Name, int64_t FileSize=-1, bool RequiresNullTerminator=true, bool IsVolatile=false, bool IsText=true)
This is a convenience method that opens a file, gets its content and then closes the file.
virtual llvm::ErrorOr< Status > status(const Twine &Path)=0
Get the status of the entry at Path, if one exists.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
Flag
These should be considered private to the implementation of the MCInstrDesc class.
constexpr StringLiteral MaxNTID("nvvm.maxntid")
constexpr StringLiteral MaxClusterRank("nvvm.maxclusterrank")
initializer< Ty > init(const Ty &Val)
@ User
could "use" a pointer
LLVM_ABI GlobalVariable * emitOffloadingEntry(Module &M, object::OffloadKind Kind, Constant *Addr, StringRef Name, uint64_t Size, uint32_t Flags, uint64_t Data, Constant *AuxAddr=nullptr, StringRef SectionName="llvm_offload_entries")
Create an offloading section struct used to register this global at runtime.
OpenMPOffloadMappingFlags
Values for bit flags used to specify the mapping type for offloading.
@ OMP_MAP_PTR_AND_OBJ
The element being mapped is a pointer-pointee pair; both the pointer and the pointee should be mapped...
@ OMP_MAP_MEMBER_OF
The 16 MSBs of the flags indicate whether the entry is member of some struct/class.
IdentFlag
IDs for all omp runtime library ident_t flag encodings (see their defintion in openmp/runtime/src/kmp...
RuntimeFunction
IDs for all omp runtime library (RTL) functions.
constexpr const GV & getAMDGPUGridValues()
static constexpr GV SPIRVGridValues
For generic SPIR-V GPUs.
OMPDynGroupprivateFallbackType
The fallback types for the dyn_groupprivate clause.
static constexpr GV NVPTXGridValues
For Nvidia GPUs.
Function * Kernel
Summary of a kernel (=entry point for target offloading).
WorksharingLoopType
A type of worksharing loop construct.
OMPAtomicCompareOp
Atomic compare operations. Currently OpenMP only supports ==, >, and <.
NodeAddr< PhiNode * > Phi
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
LLVM_ABI BasicBlock * splitBBWithSuffix(IRBuilderBase &Builder, bool CreateBranch, llvm::Twine Suffix=".split")
Like splitBB, but reuses the current block's name for the new name.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
FunctionAddr VTableAddr Value
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
hash_code hash_value(const FixedPointSemantics &Val)
LLVM_ABI Expected< std::unique_ptr< Module > > parseBitcodeFile(MemoryBufferRef Buffer, LLVMContext &Context, ParserCallbacks Callbacks={})
Read the specified bitcode file, returning the module.
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
@ LLVM_MARK_AS_BITMASK_ENUM
LLVM_ABI BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, bool MapAtoms=true)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
unsigned getPointerAddressSpace(const Type *T)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
auto successors(const MachineBasicBlock *BB)
LLVM_ABI std::error_code inconvertibleErrorCode()
The value returned by this function can be returned from convertToErrorCode for Error values where no...
testing::Matcher< const detail::ErrorHolder & > Failed()
constexpr from_range_t from_range
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
LLVM_ABI BasicBlock * splitBB(IRBuilderBase::InsertPoint IP, bool CreateBranch, DebugLoc DL, llvm::Twine Name={})
Split a BasicBlock at an InsertPoint, even if the block is degenerate (missing the terminator).
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
std::string utostr(uint64_t X, bool isNeg=false)
ErrorOr< T > expectedToErrorOrAndEmitErrors(LLVMContext &Ctx, Expected< T > Val)
bool isa_and_nonnull(const Y &Val)
LLVM_ABI bool convertUsersOfConstantsToInstructions(ArrayRef< Constant * > Consts, Function *RestrictToFunc=nullptr, bool RemoveDeadConstants=true, bool IncludeSelf=false)
Replace constant expressions users of the given constants with instructions.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
FunctionAddr VTableAddr uintptr_t uintptr_t Version
auto reverse(ContainerTy &&C)
TargetTransformInfo::PeelingPreferences gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, std::optional< bool > UserAllowPeeling, std::optional< bool > UserAllowProfileBasedPeeling, bool UnrollingSpecficValues=false)
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
CodeGenOptLevel
Code generation optimization level.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Error make_error(ArgTs &&... Args)
Make a Error instance representing failure using the given error info type.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
LLVM_ABI bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
@ Mul
Product of integers.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the specified block at the specified instruction.
FunctionAddr VTableAddr Next
DWARFExpression::Operation Op
LLVM_ABI void remapInstructionsInBlocks(ArrayRef< BasicBlock * > Blocks, ValueToValueMapTy &VMap)
Remaps instructions in Blocks using the mapping in VMap.
ArrayRef(const T &OneElt) -> ArrayRef< T >
LLVM_ABI TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, llvm::OptimizationRemarkEmitter &ORE, int OptLevel, std::optional< unsigned > UserThreshold, std::optional< unsigned > UserCount, std::optional< bool > UserAllowPartial, std::optional< bool > UserRuntime, std::optional< bool > UserUpperBound, std::optional< unsigned > UserFullUnrollMaxCount)
Gather the various unrolling parameters based on the defaults, compiler flags, TTI overrides and user...
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
LLVM_ABI void spliceBB(IRBuilderBase::InsertPoint IP, BasicBlock *New, bool CreateBranch, DebugLoc DL)
Move the instruction after an InsertPoint to the beginning of another BasicBlock.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto predecessors(const MachineBasicBlock *BB)
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
LLVM_ABI Constant * ConstantFoldInsertValueInstruction(Constant *Agg, Constant *Val, ArrayRef< unsigned > Idxs)
Attempt to constant fold an insertvalue instruction with the specified operands and indices.
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI void DeleteDeadBlocks(ArrayRef< BasicBlock * > BBs, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified blocks from BB.
bool to_integer(StringRef S, N &Num, unsigned Base=0)
Convert the string S to an integer of the specified type using the radix Base. If Base is 0,...
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
LLVM_ABI void computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, AssumptionCache *AC, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, OptimizationRemarkEmitter *ORE, unsigned TripCount, unsigned MaxTripCount, bool MaxOrZero, unsigned TripMultiple, const UnrollCostEstimator &UCE, TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP)
This struct is a compact representation of a valid (non-zero power of two) alignment.
static LLVM_ABI void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
A struct to pack the relevant information for an OpenMP affinity clause.
a struct to pack relevant information while generating atomic Ops
A struct to pack the relevant information for an OpenMP depend clause.
omp::RTLDependenceKindTy DepKind
A struct to pack static and dynamic dependency information for a task.
SmallVector< DependData > Deps
Error mergeFiniBB(IRBuilderBase &Builder, BasicBlock *ExistingFiniBB)
For cases where there is an unavoidable existing finalization block (e.g.
Expected< BasicBlock * > getFiniBB(IRBuilderBase &Builder)
The basic block to which control should be transferred to implement the FiniCB.
Description of a LLVM-IR insertion point (IP) and a debug/source location (filename,...
MapNonContiguousArrayTy Offsets
MapNonContiguousArrayTy Counts
MapNonContiguousArrayTy Strides
This structure contains combined information generated for mappable clauses, including base pointers,...
MapDeviceInfoArrayTy DevicePointers
MapValuesArrayTy BasePointers
MapValuesArrayTy Pointers
StructNonContiguousInfo NonContigInfo
Helper that contains information about regions we need to outline during finalization.
PostOutlineCBTy PostOutlineCB
LLVM_ABI void collectBlocks(SmallPtrSetImpl< BasicBlock * > &BlockSet, SmallVectorImpl< BasicBlock * > &BlockVector)
Collect all blocks in between EntryBB and ExitBB in both the given vector and set.
SmallVector< Value *, 2 > ExcludeArgsFromAggregate
bool FixUpNonEntryAllocas
BasicBlock * OuterAllocaBB
Information about an OpenMP reduction.
EvalKind EvaluationKind
Reduction evaluation kind - scalar, complex or aggregate.
ReductionGenAtomicCBTy AtomicReductionGen
Callback for generating the atomic reduction body, may be null.
ReductionGenCBTy ReductionGen
Callback for generating the reduction body.
Value * Variable
Reduction variable of pointer type.
Value * PrivateVariable
Thread-private partial reduction variable.
ReductionGenClangCBTy ReductionGenClang
Clang callback for generating the reduction body.
Type * ElementType
Reduction element type, must match pointee type of variable.
ReductionGenDataPtrPtrCBTy DataPtrPtrGen
Container for the arguments used to pass data to the runtime library.
Value * SizesArray
The array of sizes passed to the runtime library.
Value * PointersArray
The array of section pointers passed to the runtime library.
Value * MappersArray
The array of user-defined mappers passed to the runtime library.
Value * MapTypesArrayEnd
The array of map types passed to the runtime library for the end of the region, or nullptr if there a...
Value * BasePointersArray
The array of base pointer passed to the runtime library.
Value * MapTypesArray
The array of map types passed to the runtime library for the beginning of the region or for the entir...
Value * MapNamesArray
The array of original declaration names of mapped pointers sent to the runtime library for debugging.
Data structure that contains the needed information to construct the kernel args vector.
ArrayRef< Value * > NumThreads
The number of threads.
TargetDataRTArgs RTArgs
Arguments passed to the runtime library.
Value * NumIterations
The number of iterations.
Value * DynCGroupMem
The size of the dynamic shared memory.
unsigned NumTargetItems
Number of arguments passed to the runtime library.
bool HasNoWait
True if the kernel has 'no wait' clause.
ArrayRef< Value * > NumTeams
The number of teams.
omp::OMPDynGroupprivateFallbackType DynCGroupMemFallback
The fallback mechanism for the shared memory.
Container to pass the default attributes with which a kernel must be launched, used to set kernel att...
omp::OMPTgtExecModeFlags ExecFlags
SmallVector< int32_t, 3 > MaxTeams
Container to pass LLVM IR runtime values or constants related to the number of teams and threads with...
Value * DeviceID
Device ID value used in the kernel launch.
SmallVector< Value *, 3 > MaxTeams
Value * MaxThreads
'parallel' construct 'num_threads' clause value, if present and it is an SPMD kernel.
Value * LoopTripCount
Total number of iterations of the SPMD or Generic-SPMD kernel or null if it is a generic kernel.
SmallVector< Value *, 3 > TargetThreadLimit
SmallVector< Value *, 3 > TeamsThreadLimit
Data structure to contain the information needed to uniquely identify a target entry.
static LLVM_ABI void getTargetRegionEntryFnName(SmallVectorImpl< char > &Name, StringRef ParentName, unsigned DeviceID, unsigned FileID, unsigned Line, unsigned Count)
static constexpr const char * KernelNamePrefix
The prefix used for kernel names.
static const Target * lookupTarget(StringRef TripleStr, std::string &Error)
lookupTarget - Lookup a target based on a target triple.
Defines various target-specific GPU grid values that must be consistent between host RTL (plugin),...