70#define DEBUG_TYPE "openmp-ir-builder"
77 cl::desc(
"Use optimistic attributes describing "
78 "'as-if' properties of runtime calls."),
82 "openmp-ir-builder-unroll-threshold-factor",
cl::Hidden,
83 cl::desc(
"Factor for the unroll threshold to account for code "
84 "simplifications still taking place"),
88 "openmp-ir-builder-use-default-max-threads",
cl::Hidden,
99 if (!IP1.isSet() || !IP2.isSet())
101 return IP1.getBlock() == IP2.getBlock() && IP1.getPoint() == IP2.getPoint();
106 switch (SchedType & ~OMPScheduleType::MonotonicityMask) {
107 case OMPScheduleType::UnorderedStaticChunked:
108 case OMPScheduleType::UnorderedStatic:
109 case OMPScheduleType::UnorderedDynamicChunked:
110 case OMPScheduleType::UnorderedGuidedChunked:
111 case OMPScheduleType::UnorderedRuntime:
112 case OMPScheduleType::UnorderedAuto:
113 case OMPScheduleType::UnorderedTrapezoidal:
114 case OMPScheduleType::UnorderedGreedy:
115 case OMPScheduleType::UnorderedBalanced:
116 case OMPScheduleType::UnorderedGuidedIterativeChunked:
117 case OMPScheduleType::UnorderedGuidedAnalyticalChunked:
118 case OMPScheduleType::UnorderedSteal:
119 case OMPScheduleType::UnorderedStaticBalancedChunked:
120 case OMPScheduleType::UnorderedGuidedSimd:
121 case OMPScheduleType::UnorderedRuntimeSimd:
122 case OMPScheduleType::OrderedStaticChunked:
123 case OMPScheduleType::OrderedStatic:
124 case OMPScheduleType::OrderedDynamicChunked:
125 case OMPScheduleType::OrderedGuidedChunked:
126 case OMPScheduleType::OrderedRuntime:
127 case OMPScheduleType::OrderedAuto:
128 case OMPScheduleType::OrderdTrapezoidal:
129 case OMPScheduleType::NomergeUnorderedStaticChunked:
130 case OMPScheduleType::NomergeUnorderedStatic:
131 case OMPScheduleType::NomergeUnorderedDynamicChunked:
132 case OMPScheduleType::NomergeUnorderedGuidedChunked:
133 case OMPScheduleType::NomergeUnorderedRuntime:
134 case OMPScheduleType::NomergeUnorderedAuto:
135 case OMPScheduleType::NomergeUnorderedTrapezoidal:
136 case OMPScheduleType::NomergeUnorderedGreedy:
137 case OMPScheduleType::NomergeUnorderedBalanced:
138 case OMPScheduleType::NomergeUnorderedGuidedIterativeChunked:
139 case OMPScheduleType::NomergeUnorderedGuidedAnalyticalChunked:
140 case OMPScheduleType::NomergeUnorderedSteal:
141 case OMPScheduleType::NomergeOrderedStaticChunked:
142 case OMPScheduleType::NomergeOrderedStatic:
143 case OMPScheduleType::NomergeOrderedDynamicChunked:
144 case OMPScheduleType::NomergeOrderedGuidedChunked:
145 case OMPScheduleType::NomergeOrderedRuntime:
146 case OMPScheduleType::NomergeOrderedAuto:
147 case OMPScheduleType::NomergeOrderedTrapezoidal:
148 case OMPScheduleType::OrderedDistributeChunked:
149 case OMPScheduleType::OrderedDistribute:
157 SchedType & OMPScheduleType::MonotonicityMask;
158 if (MonotonicityFlags == OMPScheduleType::MonotonicityMask)
170 Builder.restoreIP(IP);
178 return T.isAMDGPU() ||
T.isNVPTX() ||
T.isSPIRV();
184 Kernel->getFnAttribute(
"target-features").getValueAsString();
185 if (Features.
count(
"+wavefrontsize64"))
200 bool HasSimdModifier,
bool HasDistScheduleChunks) {
202 switch (ClauseKind) {
203 case OMP_SCHEDULE_Default:
204 case OMP_SCHEDULE_Static:
205 return HasChunks ? OMPScheduleType::BaseStaticChunked
206 : OMPScheduleType::BaseStatic;
207 case OMP_SCHEDULE_Dynamic:
208 return OMPScheduleType::BaseDynamicChunked;
209 case OMP_SCHEDULE_Guided:
210 return HasSimdModifier ? OMPScheduleType::BaseGuidedSimd
211 : OMPScheduleType::BaseGuidedChunked;
212 case OMP_SCHEDULE_Auto:
214 case OMP_SCHEDULE_Runtime:
215 return HasSimdModifier ? OMPScheduleType::BaseRuntimeSimd
216 : OMPScheduleType::BaseRuntime;
217 case OMP_SCHEDULE_Distribute:
218 return HasDistScheduleChunks ? OMPScheduleType::BaseDistributeChunked
219 : OMPScheduleType::BaseDistribute;
227 bool HasOrderedClause) {
228 assert((BaseScheduleType & OMPScheduleType::ModifierMask) ==
229 OMPScheduleType::None &&
230 "Must not have ordering nor monotonicity flags already set");
233 ? OMPScheduleType::ModifierOrdered
234 : OMPScheduleType::ModifierUnordered;
235 OMPScheduleType OrderingScheduleType = BaseScheduleType | OrderingModifier;
238 if (OrderingScheduleType ==
239 (OMPScheduleType::BaseGuidedSimd | OMPScheduleType::ModifierOrdered))
240 return OMPScheduleType::OrderedGuidedChunked;
241 else if (OrderingScheduleType == (OMPScheduleType::BaseRuntimeSimd |
242 OMPScheduleType::ModifierOrdered))
243 return OMPScheduleType::OrderedRuntime;
245 return OrderingScheduleType;
251 bool HasSimdModifier,
bool HasMonotonic,
252 bool HasNonmonotonic,
bool HasOrderedClause) {
253 assert((ScheduleType & OMPScheduleType::MonotonicityMask) ==
254 OMPScheduleType::None &&
255 "Must not have monotonicity flags already set");
256 assert((!HasMonotonic || !HasNonmonotonic) &&
257 "Monotonic and Nonmonotonic are contradicting each other");
260 return ScheduleType | OMPScheduleType::ModifierMonotonic;
261 }
else if (HasNonmonotonic) {
262 return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
272 if ((BaseScheduleType == OMPScheduleType::BaseStatic) ||
273 (BaseScheduleType == OMPScheduleType::BaseStaticChunked) ||
279 return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
287 bool HasSimdModifier,
bool HasMonotonicModifier,
288 bool HasNonmonotonicModifier,
bool HasOrderedClause,
289 bool HasDistScheduleChunks) {
291 ClauseKind, HasChunks, HasSimdModifier, HasDistScheduleChunks);
295 OrderedSchedule, HasSimdModifier, HasMonotonicModifier,
296 HasNonmonotonicModifier, HasOrderedClause);
304static std::optional<omp::OMPTgtExecModeFlags>
309 if (
Call->getCalledFunction()->getName() ==
"__kmpc_target_init") {
310 TargetInitCall =
Call;
335 std::optional<omp::OMPTgtExecModeFlags> ExecMode =
347 if (
Instruction *Term = Source->getTerminatorOrNull()) {
356 NewBr->setDebugLoc(
DL);
361 assert(New->getFirstInsertionPt() == New->begin() &&
362 "Target BB must not have PHI nodes");
378 New->splice(New->begin(), Old, IP.
getPoint(), Old->
end());
382 NewBr->setDebugLoc(
DL);
394 Builder.SetInsertPoint(Old);
398 Builder.SetCurrentDebugLocation(
DebugLoc);
408 New->replaceSuccessorsPhiUsesWith(Old, New);
417 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
419 Builder.SetInsertPoint(Builder.GetInsertBlock());
422 Builder.SetCurrentDebugLocation(
DebugLoc);
431 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
433 Builder.SetInsertPoint(Builder.GetInsertBlock());
436 Builder.SetCurrentDebugLocation(
DebugLoc);
453 const Twine &Name =
"",
bool AsPtr =
true,
454 bool Is64Bit =
false) {
455 Builder.restoreIP(OuterAllocaIP);
459 Builder.CreateAlloca(IntTy,
nullptr, Name +
".addr");
463 FakeVal = FakeValAddr;
465 FakeVal = Builder.CreateLoad(IntTy, FakeValAddr, Name +
".val");
470 Builder.restoreIP(InnerAllocaIP);
473 UseFakeVal = Builder.CreateLoad(IntTy, FakeVal, Name +
".use");
476 FakeVal, Is64Bit ? Builder.getInt64(10) : Builder.getInt32(10)));
489enum OpenMPOffloadingRequiresDirFlags {
491 OMP_REQ_UNDEFINED = 0x000,
493 OMP_REQ_NONE = 0x001,
495 OMP_REQ_REVERSE_OFFLOAD = 0x002,
497 OMP_REQ_UNIFIED_ADDRESS = 0x004,
499 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008,
501 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010,
508 DominatorTree *DT =
nullptr,
bool AggregateArgs =
false,
509 BlockFrequencyInfo *BFI =
nullptr,
510 BranchProbabilityInfo *BPI =
nullptr,
511 AssumptionCache *AC =
nullptr,
bool AllowVarArgs =
false,
512 bool AllowAlloca =
false,
513 BasicBlock *AllocationBlock =
nullptr,
515 std::string Suffix =
"",
bool ArgsInZeroAddressSpace =
false)
516 : CodeExtractor(BBs, DT, AggregateArgs, BFI, BPI, AC, AllowVarArgs,
517 AllowAlloca, AllocationBlock, DeallocationBlocks, Suffix,
518 ArgsInZeroAddressSpace),
519 OMPBuilder(OMPBuilder) {}
521 virtual ~OMPCodeExtractor() =
default;
524 OpenMPIRBuilder &OMPBuilder;
527class DeviceSharedMemCodeExtractor :
public OMPCodeExtractor {
529 using OMPCodeExtractor::OMPCodeExtractor;
530 virtual ~DeviceSharedMemCodeExtractor() =
default;
534 allocateVar(IRBuilder<>::InsertPoint AllocaIP,
Type *VarType,
535 const Twine &Name = Twine(
""),
536 AddrSpaceCastInst **CastedAlloc =
nullptr)
override {
537 return OMPBuilder.createOMPAllocShared(AllocaIP, VarType, Name);
540 virtual Instruction *deallocateVar(IRBuilder<>::InsertPoint DeallocIP,
542 return OMPBuilder.createOMPFreeShared(DeallocIP, Var, VarType);
549 OpenMPIRBuilder &OMPBuilder;
551 DeviceSharedMemOutlineInfo(OpenMPIRBuilder &OMPBuilder)
552 : OMPBuilder(OMPBuilder) {}
553 virtual ~DeviceSharedMemOutlineInfo() =
default;
555 virtual std::unique_ptr<CodeExtractor>
557 bool ArgsInZeroAddressSpace,
558 Twine Suffix = Twine(
""))
override;
564 : RequiresFlags(OMP_REQ_UNDEFINED) {}
568 bool HasRequiresReverseOffload,
bool HasRequiresUnifiedAddress,
569 bool HasRequiresUnifiedSharedMemory,
bool HasRequiresDynamicAllocators)
572 RequiresFlags(OMP_REQ_UNDEFINED) {
573 if (HasRequiresReverseOffload)
574 RequiresFlags |= OMP_REQ_REVERSE_OFFLOAD;
575 if (HasRequiresUnifiedAddress)
576 RequiresFlags |= OMP_REQ_UNIFIED_ADDRESS;
577 if (HasRequiresUnifiedSharedMemory)
578 RequiresFlags |= OMP_REQ_UNIFIED_SHARED_MEMORY;
579 if (HasRequiresDynamicAllocators)
580 RequiresFlags |= OMP_REQ_DYNAMIC_ALLOCATORS;
584 return RequiresFlags & OMP_REQ_REVERSE_OFFLOAD;
588 return RequiresFlags & OMP_REQ_UNIFIED_ADDRESS;
592 return RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY;
596 return RequiresFlags & OMP_REQ_DYNAMIC_ALLOCATORS;
601 :
static_cast<int64_t
>(OMP_REQ_NONE);
606 RequiresFlags |= OMP_REQ_REVERSE_OFFLOAD;
608 RequiresFlags &= ~OMP_REQ_REVERSE_OFFLOAD;
613 RequiresFlags |= OMP_REQ_UNIFIED_ADDRESS;
615 RequiresFlags &= ~OMP_REQ_UNIFIED_ADDRESS;
620 RequiresFlags |= OMP_REQ_UNIFIED_SHARED_MEMORY;
622 RequiresFlags &= ~OMP_REQ_UNIFIED_SHARED_MEMORY;
627 RequiresFlags |= OMP_REQ_DYNAMIC_ALLOCATORS;
629 RequiresFlags &= ~OMP_REQ_DYNAMIC_ALLOCATORS;
642 constexpr size_t MaxDim = 3;
647 Value *DynCGroupMemFallbackFlag =
649 DynCGroupMemFallbackFlag =
Builder.CreateShl(DynCGroupMemFallbackFlag, 2);
652 StrictFlag =
Builder.CreateShl(StrictFlag, 6);
654 Value *Flags =
Builder.CreateOr(HasNoWaitFlag, DynCGroupMemFallbackFlag);
655 Flags =
Builder.CreateOr(Flags, StrictFlag);
661 Value *NumThreads3D =
692 auto FnAttrs = Attrs.getFnAttrs();
693 auto RetAttrs = Attrs.getRetAttrs();
695 for (
size_t ArgNo = 0; ArgNo < Fn.
arg_size(); ++ArgNo)
700 bool Param =
true) ->
void {
701 bool HasSignExt = AS.hasAttribute(Attribute::SExt);
702 bool HasZeroExt = AS.hasAttribute(Attribute::ZExt);
703 if (HasSignExt || HasZeroExt) {
704 assert(AS.getNumAttributes() == 1 &&
705 "Currently not handling extension attr combined with others.");
707 if (
auto AK = TargetLibraryInfo::getExtAttrForI32Param(
T, HasSignExt))
710 TargetLibraryInfo::getExtAttrForI32Return(
T, HasSignExt))
717#define OMP_ATTRS_SET(VarName, AttrSet) AttributeSet VarName = AttrSet;
718#include "llvm/Frontend/OpenMP/OMPKinds.def"
722#define OMP_RTL_ATTRS(Enum, FnAttrSet, RetAttrSet, ArgAttrSets) \
724 FnAttrs = FnAttrs.addAttributes(Ctx, FnAttrSet); \
725 addAttrSet(RetAttrs, RetAttrSet, false); \
726 for (size_t ArgNo = 0; ArgNo < ArgAttrSets.size(); ++ArgNo) \
727 addAttrSet(ArgAttrs[ArgNo], ArgAttrSets[ArgNo]); \
728 Fn.setAttributes(AttributeList::get(Ctx, FnAttrs, RetAttrs, ArgAttrs)); \
730#include "llvm/Frontend/OpenMP/OMPKinds.def"
744#define OMP_RTL(Enum, Str, IsVarArg, ReturnType, ...) \
746 FnTy = FunctionType::get(ReturnType, ArrayRef<Type *>{__VA_ARGS__}, \
748 Fn = M.getFunction(Str); \
750#include "llvm/Frontend/OpenMP/OMPKinds.def"
756#define OMP_RTL(Enum, Str, ...) \
758 Fn = Function::Create(FnTy, GlobalValue::ExternalLinkage, Str, M); \
760#include "llvm/Frontend/OpenMP/OMPKinds.def"
764 if (FnID == OMPRTL___kmpc_fork_call || FnID == OMPRTL___kmpc_fork_teams) {
774 LLVMContext::MD_callback,
776 2, {-1, -1},
true)}));
789 assert(Fn &&
"Failed to create OpenMP runtime function");
800 Builder.SetInsertPoint(FiniBB);
812 FiniBB = OtherFiniBB;
814 Builder.SetInsertPoint(FiniBB->getFirstNonPHIIt());
822 auto EndIt = FiniBB->end();
823 if (FiniBB->size() >= 1)
824 if (
auto Prev = std::prev(EndIt); Prev->isTerminator())
829 FiniBB->replaceAllUsesWith(OtherFiniBB);
830 FiniBB->eraseFromParent();
831 FiniBB = OtherFiniBB;
838 assert(Fn &&
"Failed to create OpenMP runtime function pointer");
861 for (
auto Inst =
Block->getReverseIterator()->begin();
862 Inst !=
Block->getReverseIterator()->end();) {
891 Block.getParent()->getEntryBlock().getTerminator()->getIterator();
912 DeferredOutlines.
push_back(std::move(OI));
916 ParallelRegionBlockSet.
clear();
918 OI->collectBlocks(ParallelRegionBlockSet, Blocks);
928 bool ArgsInZeroAddressSpace =
Config.isTargetDevice();
929 std::unique_ptr<CodeExtractor> Extractor =
930 OI->createCodeExtractor(Blocks, ArgsInZeroAddressSpace,
".omp_par");
934 <<
" Exit: " << OI->ExitBB->getName() <<
"\n");
935 assert(Extractor->isEligible() &&
936 "Expected OpenMP outlining to be possible!");
938 for (
auto *V : OI->ExcludeArgsFromAggregate)
939 Extractor->excludeArgFromAggregate(V);
942 Extractor->extractCodeRegion(CEAC, OI->Inputs, OI->Outputs);
946 if (TargetCpuAttr.isStringAttribute())
949 auto TargetFeaturesAttr = OuterFn->
getFnAttribute(
"target-features");
950 if (TargetFeaturesAttr.isStringAttribute())
951 OutlinedFn->
addFnAttr(TargetFeaturesAttr);
954 LLVM_DEBUG(
dbgs() <<
" Outlined function: " << *OutlinedFn <<
"\n");
956 "OpenMP outlined functions should not return a value!");
961 M.getFunctionList().insertAfter(OuterFn->
getIterator(), OutlinedFn);
968 assert(OI->EntryBB->getUniquePredecessor() == &ArtificialEntry);
975 "Expected instructions to add in the outlined region entry");
977 End = ArtificialEntry.
rend();
982 if (
I.isTerminator()) {
984 if (
Instruction *TI = OI->EntryBB->getTerminatorOrNull())
985 TI->adoptDbgRecords(&ArtificialEntry,
I.getIterator(),
false);
989 I.moveBeforePreserving(*OI->EntryBB,
990 OI->EntryBB->getFirstInsertionPt());
993 OI->EntryBB->moveBefore(&ArtificialEntry);
1000 if (OI->PostOutlineCB)
1001 OI->PostOutlineCB(*OutlinedFn);
1003 if (OI->FixUpNonEntryAllocas)
1035 errs() <<
"Error of kind: " << Kind
1036 <<
" when emitting offload entries and metadata during "
1037 "OMPIRBuilder finalization \n";
1043 if (
Config.EmitLLVMUsedMetaInfo.value_or(
false)) {
1044 std::vector<WeakTrackingVH> LLVMCompilerUsed = {
1045 M.getGlobalVariable(
"__openmp_nvptx_data_transfer_temporary_storage")};
1046 emitUsed(
"llvm.compiler.used", LLVMCompilerUsed);
1063 ConstantInt::get(I32Ty,
Value), Name);
1076 for (
unsigned I = 0, E =
List.size();
I != E; ++
I)
1080 if (UsedArray.
empty())
1087 GV->setSection(
"llvm.metadata");
1093 auto *Int8Ty =
Builder.getInt8Ty();
1096 ConstantInt::get(Int8Ty, Mode),
Twine(KernelName,
"_exec_mode"));
1104 unsigned Reserve2Flags) {
1106 LocFlags |= OMP_IDENT_FLAG_KMPC;
1113 ConstantInt::get(Int32,
uint32_t(LocFlags)),
1114 ConstantInt::get(Int32, Reserve2Flags),
1115 ConstantInt::get(Int32, SrcLocStrSize), SrcLocStr};
1117 size_t SrcLocStrArgIdx = 4;
1118 if (OpenMPIRBuilder::Ident->getElementType(SrcLocStrArgIdx)
1122 SrcLocStr, OpenMPIRBuilder::Ident->getElementType(SrcLocStrArgIdx));
1129 if (
GV.getValueType() == OpenMPIRBuilder::Ident &&
GV.hasInitializer())
1130 if (
GV.getInitializer() == Initializer)
1135 M, OpenMPIRBuilder::Ident,
1138 M.getDataLayout().getDefaultGlobalsAddressSpace());
1150 SrcLocStrSize = LocStr.
size();
1159 if (
GV.isConstant() &&
GV.hasInitializer() &&
1160 GV.getInitializer() == Initializer)
1163 SrcLocStr =
Builder.CreateGlobalString(
1164 LocStr,
"",
M.getDataLayout().getDefaultGlobalsAddressSpace(),
1172 unsigned Line,
unsigned Column,
1178 Buffer.
append(FunctionName);
1180 Buffer.
append(std::to_string(Line));
1182 Buffer.
append(std::to_string(Column));
1190 StringRef UnknownLoc =
";unknown;unknown;0;0;;";
1201 !DIL->getFilename().empty() ? DIL->getFilename() :
M.getName();
1206 DIL->getColumn(), SrcLocStrSize);
1212 Loc.IP.getBlock()->getParent());
1218 "omp_global_thread_num");
1223 bool ForceSimpleCall,
bool CheckCancelFlag) {
1233 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_FOR;
1236 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SECTIONS;
1239 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SINGLE;
1242 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_EXPL;
1245 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL;
1258 bool UseCancelBarrier =
1263 ? OMPRTL___kmpc_cancel_barrier
1264 : OMPRTL___kmpc_barrier),
1267 if (UseCancelBarrier && CheckCancelFlag)
1277 omp::Directive CanceledDirective) {
1282 auto *UI =
Builder.CreateUnreachable();
1290 Builder.SetInsertPoint(ElseTI);
1291 auto ElseIP =
Builder.saveIP();
1299 Builder.SetInsertPoint(ThenTI);
1301 Value *CancelKind =
nullptr;
1302 switch (CanceledDirective) {
1303#define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
1304 case DirectiveEnum: \
1305 CancelKind = Builder.getInt32(Value); \
1307#include "llvm/Frontend/OpenMP/OMPKinds.def"
1324 Builder.SetInsertPoint(UI->getParent());
1325 UI->eraseFromParent();
1332 omp::Directive CanceledDirective) {
1337 auto *UI =
Builder.CreateUnreachable();
1340 Value *CancelKind =
nullptr;
1341 switch (CanceledDirective) {
1342#define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
1343 case DirectiveEnum: \
1344 CancelKind = Builder.getInt32(Value); \
1346#include "llvm/Frontend/OpenMP/OMPKinds.def"
1363 Builder.SetInsertPoint(UI->getParent());
1364 UI->eraseFromParent();
1377 auto *KernelArgsPtr =
1378 Builder.CreateAlloca(OpenMPIRBuilder::KernelArgs,
nullptr,
"kernel_args");
1383 Builder.CreateStructGEP(OpenMPIRBuilder::KernelArgs, KernelArgsPtr,
I);
1386 M.getDataLayout().getPrefTypeAlign(KernelArgs[
I]->getType()));
1390 NumThreads, HostPtr, KernelArgsPtr};
1417 assert(OutlinedFnID &&
"Invalid outlined function ID!");
1421 Value *Return =
nullptr;
1441 Builder, AllocaIP, Return, RTLoc, DeviceID, Args.NumTeams.front(),
1442 Args.NumThreads.front(), OutlinedFnID, ArgsVector));
1449 Builder.CreateCondBr(
Failed, OffloadFailedBlock, OffloadContBlock);
1451 auto CurFn =
Builder.GetInsertBlock()->getParent();
1458 emitBlock(OffloadContBlock, CurFn,
true);
1463 Value *CancelFlag, omp::Directive CanceledDirective) {
1465 "Unexpected cancellation!");
1485 Builder.CreateCondBr(Cmp, NonCancellationBlock, CancellationBlock,
1494 Builder.SetInsertPoint(CancellationBlock);
1495 Builder.CreateBr(*FiniBBOrErr);
1498 Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->
begin());
1510 size_t NumArgs = OutlinedFn.
arg_size();
1511 assert((NumArgs == 2 || NumArgs == 3) &&
1512 "expected a 2-3 argument parallel outlined function");
1513 bool UseArgStruct = NumArgs == 3;
1518 {Builder.getInt16Ty(), Builder.getInt32Ty()},
1522 OutlinedFn.
getName() +
".wrapper", OMPIRBuilder->
M);
1524 WrapperFn->addParamAttr(0, Attribute::NoUndef);
1525 WrapperFn->addParamAttr(0, Attribute::ZExt);
1526 WrapperFn->addParamAttr(1, Attribute::NoUndef);
1530 Builder.SetInsertPoint(EntryBB);
1533 Value *AddrAlloca = Builder.CreateAlloca(Builder.getInt32Ty(),
1535 AddrAlloca = Builder.CreatePointerBitCastOrAddrSpaceCast(
1536 AddrAlloca, Builder.getPtrTy(0),
1537 AddrAlloca->
getName() +
".ascast");
1539 Value *ZeroAlloca = Builder.CreateAlloca(Builder.getInt32Ty(),
1541 ZeroAlloca = Builder.CreatePointerBitCastOrAddrSpaceCast(
1542 ZeroAlloca, Builder.getPtrTy(0),
1543 ZeroAlloca->
getName() +
".ascast");
1545 Value *ArgsAlloca =
nullptr;
1547 ArgsAlloca = Builder.CreateAlloca(Builder.getPtrTy(),
1548 nullptr,
"global_args");
1549 ArgsAlloca = Builder.CreatePointerBitCastOrAddrSpaceCast(
1550 ArgsAlloca, Builder.getPtrTy(0),
1551 ArgsAlloca->
getName() +
".ascast");
1555 Builder.CreateStore(WrapperFn->getArg(1), AddrAlloca);
1556 Builder.CreateStore(Builder.getInt32(0), ZeroAlloca);
1560 llvm::omp::RuntimeFunction::OMPRTL___kmpc_get_shared_variables),
1568 Value *StructArg = Builder.CreateLoad(Builder.getPtrTy(), ArgsAlloca);
1569 StructArg = Builder.CreateInBoundsGEP(Builder.getPtrTy(), StructArg,
1570 {Builder.getInt64(0)});
1571 StructArg = Builder.CreateLoad(Builder.getPtrTy(), StructArg,
"structArg");
1572 Args.push_back(StructArg);
1576 Builder.CreateCall(&OutlinedFn, Args);
1577 Builder.CreateRetVoid();
1592 "Expected at least tid and bounded tid as arguments");
1593 unsigned NumCapturedVars = OutlinedFn.
arg_size() - 2;
1601 OutlinedFn.
addFnAttr(Attribute::NoUnwind);
1604 assert(CI &&
"Expected call instruction to outlined function");
1605 CI->
getParent()->setName(
"omp_parallel");
1607 Builder.SetInsertPoint(CI);
1608 Type *PtrTy = OMPIRBuilder->VoidPtr;
1611 OpenMPIRBuilder ::InsertPointTy CurrentIP = Builder.saveIP();
1615 Value *Args = ArgsAlloca;
1619 Args = Builder.CreatePointerCast(ArgsAlloca, PtrTy);
1620 Builder.restoreIP(CurrentIP);
1623 for (
unsigned Idx = 0; Idx < NumCapturedVars; Idx++) {
1625 Value *StoreAddress = Builder.CreateConstInBoundsGEP2_64(
1627 Builder.CreateStore(V, StoreAddress);
1631 IfCondition ? Builder.CreateSExtOrTrunc(IfCondition, OMPIRBuilder->Int32)
1632 : Builder.getInt32(1);
1633 Value *NumThreadsArg =
1634 NumThreads ? Builder.CreateZExtOrTrunc(NumThreads, OMPIRBuilder->Int32)
1635 : Builder.getInt32(-1);
1645 Value *Parallel60CallArgs[] = {
1650 Builder.getInt32(-1),
1654 Builder.getInt64(NumCapturedVars),
1655 Builder.getInt32(0)};
1663 << *Builder.GetInsertBlock()->getParent() <<
"\n");
1666 Builder.SetInsertPoint(PrivTID);
1668 Builder.CreateStore(Builder.CreateLoad(OMPIRBuilder->Int32, OutlinedAI),
1675 I->eraseFromParent();
1698 if (!
F->hasMetadata(LLVMContext::MD_callback)) {
1706 F->addMetadata(LLVMContext::MD_callback,
1715 OutlinedFn.
addFnAttr(Attribute::NoUnwind);
1718 "Expected at least tid and bounded tid as arguments");
1719 unsigned NumCapturedVars = OutlinedFn.
arg_size() - 2;
1722 CI->
getParent()->setName(
"omp_parallel");
1723 Builder.SetInsertPoint(CI);
1726 Value *ForkCallArgs[] = {Ident, Builder.getInt32(NumCapturedVars),
1730 RealArgs.
append(std::begin(ForkCallArgs), std::end(ForkCallArgs));
1732 Value *
Cond = Builder.CreateSExtOrTrunc(IfCondition, OMPIRBuilder->Int32);
1739 auto PtrTy = OMPIRBuilder->VoidPtr;
1740 if (IfCondition && NumCapturedVars == 0) {
1748 << *Builder.GetInsertBlock()->getParent() <<
"\n");
1751 Builder.SetInsertPoint(PrivTID);
1753 Builder.CreateStore(Builder.CreateLoad(OMPIRBuilder->Int32, OutlinedAI),
1760 I->eraseFromParent();
1768 Value *NumThreads, omp::ProcBindKind ProcBind,
bool IsCancellable) {
1777 const bool NeedThreadID = NumThreads ||
Config.isTargetDevice() ||
1778 (ProcBind != OMP_PROC_BIND_default);
1785 bool ArgsInZeroAddressSpace =
Config.isTargetDevice();
1789 if (NumThreads && !
Config.isTargetDevice()) {
1792 Builder.CreateIntCast(NumThreads, Int32,
false)};
1797 if (ProcBind != OMP_PROC_BIND_default) {
1801 ConstantInt::get(Int32,
unsigned(ProcBind),
true)};
1823 Builder.CreateAlloca(Int32,
nullptr,
"zero.addr");
1826 if (ArgsInZeroAddressSpace &&
M.getDataLayout().getAllocaAddrSpace() != 0) {
1829 TIDAddrAlloca, PointerType ::get(
M.getContext(), 0),
"tid.addr.ascast");
1833 PointerType ::get(
M.getContext(), 0),
1834 "zero.addr.ascast");
1858 if (IP.getBlock()->end() == IP.getPoint()) {
1864 assert(IP.getBlock()->getTerminator()->getNumSuccessors() == 1 &&
1865 IP.getBlock()->getTerminator()->getSuccessor(0) == PRegExitBB &&
1866 "Unexpected insertion point for finalization call!");
1878 Builder.CreateAlloca(Int32,
nullptr,
"tid.addr.local");
1884 Builder.CreateLoad(Int32, ZeroAddr,
"zero.addr.use");
1902 LLVM_DEBUG(
dbgs() <<
"Before body codegen: " << *OuterFn <<
"\n");
1905 assert(BodyGenCB &&
"Expected body generation callback!");
1907 if (
Error Err = BodyGenCB(InnerAllocaIP, CodeGenIP, PRegExitBB))
1910 LLVM_DEBUG(
dbgs() <<
"After body codegen: " << *OuterFn <<
"\n");
1914 bool UsesDeviceSharedMemory =
1916 std::unique_ptr<OutlineInfo> OI =
1917 UsesDeviceSharedMemory
1918 ? std::make_unique<DeviceSharedMemOutlineInfo>(*
this)
1919 : std::make_unique<OutlineInfo>();
1921 if (
Config.isTargetDevice()) {
1923 OI->PostOutlineCB = [=, ToBeDeletedVec =
1924 std::move(ToBeDeleted)](
Function &OutlinedFn) {
1926 IfCondition, NumThreads, PrivTID, PrivTIDAddr,
1927 ThreadID, ToBeDeletedVec);
1931 OI->PostOutlineCB = [=, ToBeDeletedVec =
1932 std::move(ToBeDeleted)](
Function &OutlinedFn) {
1934 PrivTID, PrivTIDAddr, ToBeDeletedVec);
1938 OI->FixUpNonEntryAllocas =
true;
1939 OI->OuterAllocBB = OuterAllocaBlock;
1940 OI->EntryBB = PRegEntryBB;
1941 OI->ExitBB = PRegExitBB;
1942 OI->OuterDeallocBBs.reserve(OuterDeallocBlocks.
size());
1943 copy(OuterDeallocBlocks, OI->OuterDeallocBBs.
end());
1947 OI->collectBlocks(ParallelRegionBlockSet, Blocks);
1959 ".omp_par", ArgsInZeroAddressSpace);
1964 Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
1966 Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands,
1971 return GV->getValueType() == OpenMPIRBuilder::Ident;
1976 LLVM_DEBUG(
dbgs() <<
"Before privatization: " << *OuterFn <<
"\n");
1982 if (&V == TIDAddr || &V == ZeroAddr) {
1983 OI->ExcludeArgsFromAggregate.push_back(&V);
1988 for (
Use &U : V.uses())
1990 if (ParallelRegionBlockSet.
count(UserI->getParent()))
2000 if (!V.getType()->isPointerTy()) {
2004 Builder.restoreIP(OuterAllocIP);
2006 if (UsesDeviceSharedMemory) {
2009 V.getName() +
".reloaded");
2010 for (
BasicBlock *DeallocBlock : OuterDeallocBlocks)
2012 InsertPointTy(DeallocBlock, DeallocBlock->getFirstInsertionPt()),
2015 Ptr =
Builder.CreateAlloca(V.getType(),
nullptr,
2016 V.getName() +
".reloaded");
2021 Builder.SetInsertPoint(InsertBB,
2026 Builder.restoreIP(InnerAllocaIP);
2027 Inner =
Builder.CreateLoad(V.getType(), Ptr);
2030 Value *ReplacementValue =
nullptr;
2033 ReplacementValue = PrivTID;
2036 PrivCB(InnerAllocaIP,
Builder.saveIP(), V, *Inner, ReplacementValue);
2044 assert(ReplacementValue &&
2045 "Expected copy/create callback to set replacement value!");
2046 if (ReplacementValue == &V)
2051 UPtr->set(ReplacementValue);
2076 for (
Value *Output : Outputs)
2080 "OpenMP outlining should not produce live-out values!");
2082 LLVM_DEBUG(
dbgs() <<
"After privatization: " << *OuterFn <<
"\n");
2084 for (
auto *BB : Blocks)
2085 dbgs() <<
" PBR: " << BB->getName() <<
"\n";
2093 assert(FiniInfo.DK == OMPD_parallel &&
2094 "Unexpected finalization stack state!");
2105 Builder.CreateBr(*FiniBBOrErr);
2109 Term->eraseFromParent();
2115 InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end());
2116 UI->eraseFromParent();
2179 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr));
2181 Builder.CreateStore(DepValPtr, Addr);
2184 DependInfo, Entry,
static_cast<unsigned int>(RTLDependInfoFields::Len));
2186 ConstantInt::get(SizeTy,
2191 DependInfo, Entry,
static_cast<unsigned int>(RTLDependInfoFields::Flags));
2193 static_cast<unsigned int>(Dep.
DepKind)),
2206 if (Dependencies.
empty())
2226 Type *DependInfo = OMPBuilder.DependInfo;
2228 Value *DepArray =
nullptr;
2230 Builder.SetInsertPoint(
2234 DepArray = Builder.CreateAlloca(DepArrayTy,
nullptr,
".dep.arr.addr");
2236 Builder.restoreIP(OldIP);
2238 for (
const auto &[DepIdx, Dep] :
enumerate(Dependencies)) {
2240 Builder.CreateConstInBoundsGEP2_64(DepArrayTy, DepArray, 0, DepIdx);
2247Expected<Value *> OpenMPIRBuilder::createTaskDuplicationFunction(
2249 unsigned ProgramAddressSpace =
M.getDataLayout().getProgramAddressSpace();
2264 Builder.getVoidTy(), {VoidPtrTy, VoidPtrTy, Builder.getInt32Ty()},
2268 "omp_taskloop_dup",
M);
2271 Value *LastprivateFlagArg = DupFunction->
getArg(2);
2272 DestTaskArg->
setName(
"dest_task");
2273 SrcTaskArg->
setName(
"src_task");
2274 LastprivateFlagArg->
setName(
"lastprivate_flag");
2276 IRBuilderBase::InsertPointGuard Guard(
Builder);
2280 auto GetTaskContextPtrFromArg = [&](
Value *Arg) ->
Value * {
2281 Type *TaskWithPrivatesTy =
2284 TaskWithPrivatesTy, Arg, {
Builder.getInt32(0),
Builder.getInt32(1)});
2286 PrivatesTy, TaskPrivates,
2291 Value *DestTaskContextPtr = GetTaskContextPtrFromArg(DestTaskArg);
2292 Value *SrcTaskContextPtr = GetTaskContextPtrFromArg(SrcTaskArg);
2294 DestTaskContextPtr->
setName(
"destPtr");
2295 SrcTaskContextPtr->
setName(
"srcPtr");
2300 Expected<IRBuilderBase::InsertPoint> AfterIPOrError =
2301 DupCB(AllocaIP, CodeGenIP, DestTaskContextPtr, SrcTaskContextPtr);
2302 if (!AfterIPOrError)
2304 Builder.restoreIP(*AfterIPOrError);
2314 llvm::function_ref<llvm::Expected<llvm::CanonicalLoopInfo *>()> LoopInfo,
2316 Value *GrainSize,
bool NoGroup,
int Sched,
Value *Final,
bool Mergeable,
2318 Value *TaskContextStructPtrVal) {
2323 uint32_t SrcLocStrSize;
2339 if (
Error Err = BodyGenCB(TaskloopAllocaIP, TaskloopBodyIP, TaskloopExitBB))
2342 llvm::Expected<llvm::CanonicalLoopInfo *> result = LoopInfo();
2347 llvm::CanonicalLoopInfo *CLI = result.
get();
2348 auto OI = std::make_unique<OutlineInfo>();
2349 OI->EntryBB = TaskloopAllocaBB;
2350 OI->OuterAllocBB = AllocaIP.getBlock();
2351 OI->ExitBB = TaskloopExitBB;
2352 OI->OuterDeallocBBs.reserve(DeallocBlocks.
size());
2353 copy(DeallocBlocks, OI->OuterDeallocBBs.end());
2359 Builder, AllocaIP, ToBeDeleted, TaskloopAllocaIP,
"global.tid",
false));
2361 TaskloopAllocaIP,
"lb",
false,
true);
2363 TaskloopAllocaIP,
"ub",
false,
true);
2365 TaskloopAllocaIP,
"step",
false,
true);
2368 OI->Inputs.insert(FakeLB);
2369 OI->Inputs.insert(FakeUB);
2370 OI->Inputs.insert(FakeStep);
2371 if (TaskContextStructPtrVal)
2372 OI->Inputs.insert(TaskContextStructPtrVal);
2373 assert(((TaskContextStructPtrVal && DupCB) ||
2374 (!TaskContextStructPtrVal && !DupCB)) &&
2375 "Task context struct ptr and duplication callback must be both set "
2381 unsigned ProgramAddressSpace =
M.getDataLayout().getProgramAddressSpace();
2385 {FakeLB->getType(), FakeUB->getType(), FakeStep->getType(), PointerTy});
2386 Expected<Value *> TaskDupFnOrErr = createTaskDuplicationFunction(
2389 if (!TaskDupFnOrErr) {
2392 Value *TaskDupFn = *TaskDupFnOrErr;
2394 OI->PostOutlineCB = [
this, Ident, LBVal, UBVal, StepVal, Untied,
2395 TaskloopAllocaBB, CLI, Loc, TaskDupFn, ToBeDeleted,
2396 IfCond, GrainSize, NoGroup, Sched, FakeLB, FakeUB,
2397 FakeStep, FakeSharedsTy, Final, Mergeable, Priority,
2398 NumOfCollapseLoops](
Function &OutlinedFn)
mutable {
2400 assert(OutlinedFn.hasOneUse() &&
2401 "there must be a single user for the outlined function");
2408 Value *CastedLBVal =
2409 Builder.CreateIntCast(LBVal,
Builder.getInt64Ty(),
true,
"lb64");
2410 Value *CastedUBVal =
2411 Builder.CreateIntCast(UBVal,
Builder.getInt64Ty(),
true,
"ub64");
2412 Value *CastedStepVal =
2413 Builder.CreateIntCast(StepVal,
Builder.getInt64Ty(),
true,
"step64");
2415 Builder.SetInsertPoint(StaleCI);
2428 Builder.CreateCall(TaskgroupFn, {Ident, ThreadID});
2449 divideCeil(
M.getDataLayout().getTypeSizeInBits(Task), 8));
2451 AllocaInst *ArgStructAlloca =
2453 assert(ArgStructAlloca &&
2454 "Unable to find the alloca instruction corresponding to arguments "
2455 "for extracted function");
2456 std::optional<TypeSize> ArgAllocSize =
2459 "Unable to determine size of arguments for extracted function");
2460 Value *SharedsSize =
Builder.getInt64(ArgAllocSize->getFixedValue());
2465 CallInst *TaskData =
Builder.CreateCall(
2466 TaskAllocFn, {Ident, ThreadID,
Flags,
2467 TaskSize, SharedsSize,
2472 Value *TaskShareds =
Builder.CreateLoad(VoidPtr, TaskData);
2473 Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment,
2478 FakeSharedsTy, TaskShareds, {
Builder.getInt32(0),
Builder.getInt32(0)});
2481 FakeSharedsTy, TaskShareds, {
Builder.getInt32(0),
Builder.getInt32(1)});
2484 FakeSharedsTy, TaskShareds, {
Builder.getInt32(0),
Builder.getInt32(2)});
2490 IfCond ?
Builder.CreateIntCast(IfCond,
Builder.getInt32Ty(),
true)
2496 Value *GrainSizeVal =
2497 GrainSize ?
Builder.CreateIntCast(GrainSize,
Builder.getInt64Ty(),
true)
2499 Value *TaskDup = TaskDupFn;
2501 Value *
Args[] = {Ident, ThreadID, TaskData, IfCondVal, Lb, Ub,
2502 Loadstep, NoGroupVal, SchedVal, GrainSizeVal, TaskDup};
2507 Builder.CreateCall(TaskloopFn, Args);
2514 Builder.CreateCall(EndTaskgroupFn, {Ident, ThreadID});
2519 Builder.SetInsertPoint(TaskloopAllocaBB, TaskloopAllocaBB->begin());
2521 LoadInst *SharedsOutlined =
2522 Builder.CreateLoad(VoidPtr, OutlinedFn.getArg(1));
2523 OutlinedFn.getArg(1)->replaceUsesWithIf(
2525 [SharedsOutlined](Use &U) {
return U.getUser() != SharedsOutlined; });
2528 Type *IVTy =
IV->getType();
2534 Value *TaskLB =
nullptr;
2535 Value *TaskUB =
nullptr;
2536 Value *TaskStep =
nullptr;
2537 Value *LoadTaskLB =
nullptr;
2538 Value *LoadTaskUB =
nullptr;
2539 Value *LoadTaskStep =
nullptr;
2540 for (Instruction &
I : *TaskloopAllocaBB) {
2541 if (
I.getOpcode() == Instruction::GetElementPtr) {
2544 switch (CI->getZExtValue()) {
2556 }
else if (
I.getOpcode() == Instruction::Load) {
2558 if (
Load.getPointerOperand() == TaskLB) {
2559 assert(TaskLB !=
nullptr &&
"Expected value for TaskLB");
2561 }
else if (
Load.getPointerOperand() == TaskUB) {
2562 assert(TaskUB !=
nullptr &&
"Expected value for TaskUB");
2564 }
else if (
Load.getPointerOperand() == TaskStep) {
2565 assert(TaskStep !=
nullptr &&
"Expected value for TaskStep");
2571 Builder.SetInsertPoint(CLI->getPreheader()->getTerminator());
2573 assert(LoadTaskLB !=
nullptr &&
"Expected value for LoadTaskLB");
2574 assert(LoadTaskUB !=
nullptr &&
"Expected value for LoadTaskUB");
2575 assert(LoadTaskStep !=
nullptr &&
"Expected value for LoadTaskStep");
2577 Builder.CreateSub(LoadTaskUB, LoadTaskLB), LoadTaskStep);
2578 Value *TripCount =
Builder.CreateAdd(TripCountMinusOne, One,
"trip_cnt");
2579 Value *CastedTripCount =
Builder.CreateIntCast(TripCount, IVTy,
true);
2580 Value *CastedTaskLB =
Builder.CreateIntCast(LoadTaskLB, IVTy,
true);
2582 CLI->setTripCount(CastedTripCount);
2584 Builder.SetInsertPoint(CLI->getBody(),
2585 CLI->getBody()->getFirstInsertionPt());
2587 if (NumOfCollapseLoops > 1) {
2593 Builder.CreateSub(CastedTaskLB, ConstantInt::get(IVTy, 1)));
2596 for (
auto IVUse = CLI->getIndVar()->uses().begin();
2597 IVUse != CLI->getIndVar()->uses().end(); IVUse++) {
2598 User *IVUser = IVUse->getUser();
2600 if (
Op->getOpcode() == Instruction::URem ||
2601 Op->getOpcode() == Instruction::UDiv) {
2606 for (User *User : UsersToReplace) {
2607 User->replaceUsesOfWith(CLI->getIndVar(), IVPlusTaskLB);
2624 assert(CLI->getIndVar()->getNumUses() == 3 &&
2625 "Canonical loop should have exactly three uses of the ind var");
2626 for (User *IVUser : CLI->getIndVar()->users()) {
2628 if (
Mul->getOpcode() == Instruction::Mul) {
2629 for (User *MulUser :
Mul->users()) {
2631 if (
Add->getOpcode() == Instruction::Add) {
2632 Add->setOperand(1, CastedTaskLB);
2641 FakeLB->replaceAllUsesWith(CastedLBVal);
2642 FakeUB->replaceAllUsesWith(CastedUBVal);
2643 FakeStep->replaceAllUsesWith(CastedStepVal);
2645 I->eraseFromParent();
2650 Builder.SetInsertPoint(TaskloopExitBB, TaskloopExitBB->
begin());
2656 M.getContext(),
M.getDataLayout().getPointerSizeInBits());
2666 bool Mergeable,
Value *EventHandle,
Value *Priority) {
2698 if (
Error Err = BodyGenCB(TaskAllocaIP, TaskBodyIP, TaskExitBB))
2701 auto OI = std::make_unique<OutlineInfo>();
2702 OI->EntryBB = TaskAllocaBB;
2703 OI->OuterAllocBB = AllocaIP.
getBlock();
2704 OI->ExitBB = TaskExitBB;
2705 OI->OuterDeallocBBs.reserve(DeallocBlocks.
size());
2706 copy(DeallocBlocks, OI->OuterDeallocBBs.
end());
2711 Builder, AllocaIP, ToBeDeleted, TaskAllocaIP,
"global.tid",
false));
2713 OI->PostOutlineCB = [
this, Ident, Tied, Final, IfCondition, Dependencies,
2714 Affinities, Mergeable, Priority, EventHandle,
2716 ToBeDeleted](
Function &OutlinedFn)
mutable {
2718 assert(OutlinedFn.hasOneUse() &&
2719 "there must be a single user for the outlined function");
2724 bool HasShareds = StaleCI->
arg_size() > 1;
2725 Builder.SetInsertPoint(StaleCI);
2750 bool UseMergedIf0Path = ConstIfCondition && ConstIfCondition->isZero();
2754 Flags =
Builder.CreateOr(FinalFlag, Flags);
2757 if (Mergeable || UseMergedIf0Path)
2769 divideCeil(
M.getDataLayout().getTypeSizeInBits(Task), 8));
2778 assert(ArgStructAlloca &&
2779 "Unable to find the alloca instruction corresponding to arguments "
2780 "for extracted function");
2781 std::optional<TypeSize> ArgAllocSize =
2784 "Unable to determine size of arguments for extracted function");
2785 SharedsSize =
Builder.getInt64(ArgAllocSize->getFixedValue());
2791 TaskAllocFn, {Ident, ThreadID, Flags,
2792 TaskSize, SharedsSize,
2795 if (Affinities.
Count && Affinities.
Info) {
2797 OMPRTL___kmpc_omp_reg_task_with_affinity);
2808 OMPRTL___kmpc_task_allow_completion_event);
2812 Builder.CreatePointerBitCastOrAddrSpaceCast(EventHandle,
2814 EventVal =
Builder.CreatePtrToInt(EventVal,
Builder.getInt64Ty());
2815 Builder.CreateStore(EventVal, EventHandleAddr);
2821 Value *TaskShareds =
Builder.CreateLoad(VoidPtr, TaskData);
2822 Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment,
2840 Builder.CreateInBoundsGEP(TaskPtr, TaskData, {Zero, Zero});
2843 VoidPtr, VoidPtr,
Builder.getInt32Ty(), VoidPtr, VoidPtr);
2845 TaskStructType, TaskGEP, {Zero, ConstantInt::get(
Int32Ty, 4)});
2848 Value *CmplrData =
Builder.CreateInBoundsGEP(CmplrStructType,
2849 PriorityData, {Zero, Zero});
2850 Builder.CreateStore(Priority, CmplrData);
2853 Value *DepArray =
nullptr;
2854 Value *NumDeps =
nullptr;
2857 NumDeps = Dependencies.
NumDeps;
2858 }
else if (!Dependencies.
Deps.empty()) {
2860 NumDeps =
Builder.getInt32(Dependencies.
Deps.size());
2880 if (IfCondition && !UseMergedIf0Path) {
2885 Builder.GetInsertPoint()->getParent()->getTerminator();
2886 Instruction *ThenTI = IfTerminator, *ElseTI =
nullptr;
2887 Builder.SetInsertPoint(IfTerminator);
2890 Builder.SetInsertPoint(ElseTI);
2897 {Ident, ThreadID, NumDeps, DepArray,
2898 ConstantInt::get(
Builder.getInt32Ty(), 0),
2913 Builder.SetInsertPoint(ThenTI);
2921 {Ident, ThreadID, TaskData, NumDeps, DepArray,
2922 ConstantInt::get(
Builder.getInt32Ty(), 0),
2933 Builder.SetInsertPoint(TaskAllocaBB, TaskAllocaBB->
begin());
2935 LoadInst *Shareds =
Builder.CreateLoad(VoidPtr, OutlinedFn.getArg(1));
2936 OutlinedFn.getArg(1)->replaceUsesWithIf(
2937 Shareds, [Shareds](
Use &U) {
return U.getUser() != Shareds; });
2941 I->eraseFromParent();
2945 Builder.SetInsertPoint(TaskExitBB, TaskExitBB->
begin());
2967 if (
Error Err = BodyGenCB(AllocaIP,
Builder.saveIP(), DeallocBlocks))
2970 Builder.SetInsertPoint(TaskgroupExitBB);
3013 unsigned CaseNumber = 0;
3014 for (
auto SectionCB : SectionCBs) {
3016 M.getContext(),
"omp_section_loop.body.case", CurFn,
Continue);
3018 Builder.SetInsertPoint(CaseBB);
3022 {CaseEndBr->getParent(), CaseEndBr->getIterator()}, {}))
3033 Value *LB = ConstantInt::get(I32Ty, 0);
3034 Value *UB = ConstantInt::get(I32Ty, SectionCBs.
size());
3035 Value *ST = ConstantInt::get(I32Ty, 1);
3037 Loc, LoopBodyGenCB, LB, UB, ST,
true,
false, AllocaIP,
"section_loop");
3042 applyStaticWorkshareLoop(
Loc.DL, *
LoopInfo, AllocaIP,
3043 WorksharingLoopType::ForStaticLoop, !IsNowait);
3049 assert(LoopFini &&
"Bad structure of static workshare loop finalization");
3053 assert(FiniInfo.DK == OMPD_sections &&
3054 "Unexpected finalization stack state!");
3055 if (
Error Err = FiniInfo.mergeFiniBB(
Builder, LoopFini))
3069 if (IP.getBlock()->end() != IP.getPoint())
3080 auto *CaseBB =
Loc.IP.getBlock();
3081 auto *CondBB = CaseBB->getSinglePredecessor()->getSinglePredecessor();
3082 auto *ExitBB = CondBB->getTerminator()->getSuccessor(1);
3088 Directive OMPD = Directive::OMPD_sections;
3091 return EmitOMPInlinedRegion(OMPD,
nullptr,
nullptr, BodyGenCB, FiniCBWrapper,
3102Value *OpenMPIRBuilder::getGPUThreadID() {
3105 OMPRTL___kmpc_get_hardware_thread_id_in_block),
3109Value *OpenMPIRBuilder::getGPUWarpSize() {
3114Value *OpenMPIRBuilder::getNVPTXWarpID() {
3115 unsigned LaneIDBits =
Log2_32(
Config.getGridValue().GV_Warp_Size);
3116 return Builder.CreateAShr(getGPUThreadID(), LaneIDBits,
"nvptx_warp_id");
3119Value *OpenMPIRBuilder::getNVPTXLaneID() {
3120 unsigned LaneIDBits =
Log2_32(
Config.getGridValue().GV_Warp_Size);
3121 assert(LaneIDBits < 32 &&
"Invalid LaneIDBits size in NVPTX device.");
3122 unsigned LaneIDMask = ~0
u >> (32u - LaneIDBits);
3123 return Builder.CreateAnd(getGPUThreadID(),
Builder.getInt32(LaneIDMask),
3130 uint64_t FromSize =
M.getDataLayout().getTypeStoreSize(FromType);
3131 uint64_t ToSize =
M.getDataLayout().getTypeStoreSize(ToType);
3132 assert(FromSize > 0 &&
"From size must be greater than zero");
3133 assert(ToSize > 0 &&
"To size must be greater than zero");
3134 if (FromType == ToType)
3136 if (FromSize == ToSize)
3137 return Builder.CreateBitCast(From, ToType);
3139 return Builder.CreateIntCast(From, ToType,
true);
3145 Value *ValCastItem =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3146 CastItem,
Builder.getPtrTy(0));
3147 Builder.CreateStore(From, ValCastItem);
3148 return Builder.CreateLoad(ToType, CastItem);
3155 uint64_t
Size =
M.getDataLayout().getTypeStoreSize(ElementType);
3156 assert(
Size <= 8 &&
"Unsupported bitwidth in shuffle instruction");
3160 Value *ElemCast = castValueToType(AllocaIP, Element, CastTy);
3162 Builder.CreateIntCast(getGPUWarpSize(),
Builder.getInt16Ty(),
true);
3164 Size <= 4 ? RuntimeFunction::OMPRTL___kmpc_shuffle_int32
3165 : RuntimeFunction::OMPRTL___kmpc_shuffle_int64);
3166 Value *WarpSizeCast =
3168 Value *ShuffleCall =
3170 return castValueToType(AllocaIP, ShuffleCall, CastTy);
3177 uint64_t
Size =
M.getDataLayout().getTypeStoreSize(ElemType);
3189 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
3190 Value *ElemPtr = DstAddr;
3191 Value *Ptr = SrcAddr;
3192 for (
unsigned IntSize = 8; IntSize >= 1; IntSize /= 2) {
3196 Ptr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3199 Builder.CreateGEP(ElemType, SrcAddr, {ConstantInt::get(IndexTy, 1)});
3200 ElemPtr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3204 if ((
Size / IntSize) > 1) {
3205 Value *PtrEnd =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3206 SrcAddrGEP,
Builder.getPtrTy());
3223 Builder.CreatePointerBitCastOrAddrSpaceCast(Ptr,
Builder.getPtrTy()));
3225 Builder.CreateICmpSGT(PtrDiff,
Builder.getInt64(IntSize - 1)), ThenBB,
3228 Value *Res = createRuntimeShuffleFunction(
3231 IntType, Ptr,
M.getDataLayout().getPrefTypeAlign(ElemType)),
3233 Builder.CreateAlignedStore(Res, ElemPtr,
3234 M.getDataLayout().getPrefTypeAlign(ElemType));
3236 Builder.CreateGEP(IntType, Ptr, {ConstantInt::get(IndexTy, 1)});
3237 Value *LocalElemPtr =
3238 Builder.CreateGEP(IntType, ElemPtr, {ConstantInt::get(IndexTy, 1)});
3244 Value *Res = createRuntimeShuffleFunction(
3245 AllocaIP,
Builder.CreateLoad(IntType, Ptr), IntType,
Offset);
3248 Res =
Builder.CreateTrunc(Res, ElemType);
3249 Builder.CreateStore(Res, ElemPtr);
3250 Ptr =
Builder.CreateGEP(IntType, Ptr, {ConstantInt::get(IndexTy, 1)});
3252 Builder.CreateGEP(IntType, ElemPtr, {ConstantInt::get(IndexTy, 1)});
3258Error OpenMPIRBuilder::emitReductionListCopy(
3263 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
3264 Value *RemoteLaneOffset = CopyOptions.RemoteLaneOffset;
3268 for (
auto En :
enumerate(ReductionInfos)) {
3270 Value *SrcElementAddr =
nullptr;
3271 AllocaInst *DestAlloca =
nullptr;
3272 Value *DestElementAddr =
nullptr;
3273 Value *DestElementPtrAddr =
nullptr;
3275 bool ShuffleInElement =
false;
3278 bool UpdateDestListPtr =
false;
3282 ReductionArrayTy, SrcBase,
3283 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3284 SrcElementAddr =
Builder.CreateLoad(
Builder.getPtrTy(), SrcElementPtrAddr);
3288 DestElementPtrAddr =
Builder.CreateInBoundsGEP(
3289 ReductionArrayTy, DestBase,
3290 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3291 bool IsByRefElem = (!IsByRef.
empty() && IsByRef[En.index()]);
3297 Type *DestAllocaType =
3298 IsByRefElem ? RI.ByRefAllocatedType : RI.ElementType;
3299 DestAlloca =
Builder.CreateAlloca(DestAllocaType,
nullptr,
3300 ".omp.reduction.element");
3302 M.getDataLayout().getPrefTypeAlign(DestAllocaType));
3303 DestElementAddr = DestAlloca;
3306 DestElementAddr->
getName() +
".ascast");
3308 ShuffleInElement =
true;
3309 UpdateDestListPtr =
true;
3321 if (ShuffleInElement) {
3322 Type *ShuffleType = RI.ElementType;
3323 Value *ShuffleSrcAddr = SrcElementAddr;
3324 Value *ShuffleDestAddr = DestElementAddr;
3325 AllocaInst *LocalStorage =
nullptr;
3328 assert(RI.ByRefElementType &&
"Expected by-ref element type to be set");
3329 assert(RI.ByRefAllocatedType &&
3330 "Expected by-ref allocated type to be set");
3335 ShuffleType = RI.ByRefElementType;
3337 if (RI.DataPtrPtrGen) {
3340 Builder.saveIP(), ShuffleSrcAddr, ShuffleSrcAddr);
3343 return GenResult.takeError();
3352 LocalStorage =
Builder.CreateAlloca(ShuffleType);
3354 ShuffleDestAddr = LocalStorage;
3359 ShuffleDestAddr = DestElementAddr;
3363 shuffleAndStore(AllocaIP, ShuffleSrcAddr, ShuffleDestAddr, ShuffleType,
3364 RemoteLaneOffset, ReductionArrayTy, IsByRefElem);
3366 if (IsByRefElem && RI.DataPtrPtrGen) {
3368 Value *DestDescriptorAddr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3369 DestAlloca,
Builder.getPtrTy(),
".ascast");
3372 DestDescriptorAddr, LocalStorage, SrcElementAddr,
3373 RI.ByRefAllocatedType, RI.DataPtrPtrGen);
3376 return GenResult.takeError();
3379 switch (RI.EvaluationKind) {
3381 Value *Elem =
Builder.CreateLoad(RI.ElementType, SrcElementAddr);
3383 Builder.CreateStore(Elem, DestElementAddr);
3387 Value *SrcRealPtr =
Builder.CreateConstInBoundsGEP2_32(
3388 RI.ElementType, SrcElementAddr, 0, 0,
".realp");
3390 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
3392 RI.ElementType, SrcElementAddr, 0, 1,
".imagp");
3394 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
3396 Value *DestRealPtr =
Builder.CreateConstInBoundsGEP2_32(
3397 RI.ElementType, DestElementAddr, 0, 0,
".realp");
3398 Value *DestImgPtr =
Builder.CreateConstInBoundsGEP2_32(
3399 RI.ElementType, DestElementAddr, 0, 1,
".imagp");
3400 Builder.CreateStore(SrcReal, DestRealPtr);
3401 Builder.CreateStore(SrcImg, DestImgPtr);
3406 M.getDataLayout().getTypeStoreSize(RI.ElementType));
3408 DestElementAddr,
M.getDataLayout().getPrefTypeAlign(RI.ElementType),
3409 SrcElementAddr,
M.getDataLayout().getPrefTypeAlign(RI.ElementType),
3421 if (UpdateDestListPtr) {
3422 Value *CastDestAddr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3423 DestElementAddr,
Builder.getPtrTy(),
3424 DestElementAddr->
getName() +
".ascast");
3425 Builder.CreateStore(CastDestAddr, DestElementPtrAddr);
3432Expected<Function *> OpenMPIRBuilder::emitInterWarpCopyFunction(
3436 LLVMContext &Ctx =
M.getContext();
3438 Builder.getVoidTy(), {Builder.getPtrTy(), Builder.getInt32Ty()},
3442 "_omp_reduction_inter_warp_copy_func", &
M);
3448 Builder.SetInsertPoint(EntryBB);
3465 StringRef TransferMediumName =
3466 "__openmp_nvptx_data_transfer_temporary_storage";
3467 GlobalVariable *TransferMedium =
M.getGlobalVariable(TransferMediumName);
3468 unsigned WarpSize =
Config.getGridValue().GV_Warp_Size;
3470 if (!TransferMedium) {
3471 TransferMedium =
new GlobalVariable(
3479 Value *GPUThreadID = getGPUThreadID();
3481 Value *LaneID = getNVPTXLaneID();
3483 Value *WarpID = getNVPTXWarpID();
3487 Builder.GetInsertBlock()->getFirstInsertionPt());
3491 AllocaInst *ReduceListAlloca =
Builder.CreateAlloca(
3492 Arg0Type,
nullptr, ReduceListArg->
getName() +
".addr");
3493 AllocaInst *NumWarpsAlloca =
3494 Builder.CreateAlloca(Arg1Type,
nullptr, NumWarpsArg->
getName() +
".addr");
3495 Value *ReduceListAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3496 ReduceListAlloca, Arg0Type, ReduceListAlloca->
getName() +
".ascast");
3497 Value *NumWarpsAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3498 NumWarpsAlloca,
Builder.getPtrTy(0),
3499 NumWarpsAlloca->
getName() +
".ascast");
3500 Builder.CreateStore(ReduceListArg, ReduceListAddrCast);
3501 Builder.CreateStore(NumWarpsArg, NumWarpsAddrCast);
3510 for (
auto En :
enumerate(ReductionInfos)) {
3516 bool IsByRefElem = !IsByRef.
empty() && IsByRef[En.index()];
3517 unsigned RealTySize =
M.getDataLayout().getTypeAllocSize(
3518 IsByRefElem ? RI.ByRefElementType : RI.ElementType);
3519 for (
unsigned TySize = 4; TySize > 0 && RealTySize > 0; TySize /= 2) {
3522 unsigned NumIters = RealTySize / TySize;
3525 Value *Cnt =
nullptr;
3526 Value *CntAddr =
nullptr;
3533 Builder.CreateAlloca(
Builder.getInt32Ty(),
nullptr,
".cnt.addr");
3535 CntAddr =
Builder.CreateAddrSpaceCast(CntAddr,
Builder.getPtrTy(),
3536 CntAddr->
getName() +
".ascast");
3548 Cnt, ConstantInt::get(
Builder.getInt32Ty(), NumIters));
3549 Builder.CreateCondBr(Cmp, BodyBB, ExitBB);
3556 omp::Directive::OMPD_unknown,
3560 return BarrierIP1.takeError();
3566 Value *IsWarpMaster =
Builder.CreateIsNull(LaneID,
"warp_master");
3567 Builder.CreateCondBr(IsWarpMaster, ThenBB, ElseBB);
3571 auto *RedListArrayTy =
3574 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
3576 Builder.CreateInBoundsGEP(RedListArrayTy, ReduceList,
3577 {ConstantInt::get(IndexTy, 0),
3578 ConstantInt::get(IndexTy, En.index())});
3582 if (IsByRefElem && RI.DataPtrPtrGen) {
3584 RI.DataPtrPtrGen(
Builder.saveIP(), ElemPtr, ElemPtr);
3587 return GenRes.takeError();
3598 ArrayTy, TransferMedium, {
Builder.getInt64(0), WarpID});
3603 Builder.CreateStore(Elem, MediumPtr,
3615 omp::Directive::OMPD_unknown,
3619 return BarrierIP2.takeError();
3626 Value *NumWarpsVal =
3629 Value *IsActiveThread =
3630 Builder.CreateICmpULT(GPUThreadID, NumWarpsVal,
"is_active_thread");
3631 Builder.CreateCondBr(IsActiveThread, W0ThenBB, W0ElseBB);
3638 ArrayTy, TransferMedium, {
Builder.getInt64(0), GPUThreadID});
3640 Value *TargetElemPtrPtr =
3641 Builder.CreateInBoundsGEP(RedListArrayTy, ReduceList,
3642 {ConstantInt::get(IndexTy, 0),
3643 ConstantInt::get(IndexTy, En.index())});
3644 Value *TargetElemPtrVal =
3646 Value *TargetElemPtr = TargetElemPtrVal;
3648 if (IsByRefElem && RI.DataPtrPtrGen) {
3650 RI.DataPtrPtrGen(
Builder.saveIP(), TargetElemPtr, TargetElemPtr);
3653 return GenRes.takeError();
3655 TargetElemPtr =
Builder.CreateLoad(
Builder.getPtrTy(), TargetElemPtr);
3663 Value *SrcMediumValue =
3664 Builder.CreateLoad(CType, SrcMediumPtrVal,
true);
3665 Builder.CreateStore(SrcMediumValue, TargetElemPtr);
3675 Cnt, ConstantInt::get(
Builder.getInt32Ty(), 1));
3676 Builder.CreateStore(Cnt, CntAddr,
false);
3678 auto *CurFn =
Builder.GetInsertBlock()->getParent();
3682 RealTySize %= TySize;
3692Expected<Function *> OpenMPIRBuilder::emitShuffleAndReduceFunction(
3695 LLVMContext &Ctx =
M.getContext();
3696 FunctionType *FuncTy =
3698 {Builder.getPtrTy(), Builder.getInt16Ty(),
3699 Builder.getInt16Ty(), Builder.getInt16Ty()},
3703 "_omp_reduction_shuffle_and_reduce_func", &
M);
3714 Builder.SetInsertPoint(EntryBB);
3725 Type *ReduceListArgType = ReduceListArg->
getType();
3729 ReduceListArgType,
nullptr, ReduceListArg->
getName() +
".addr");
3730 Value *LaneIdAlloca =
Builder.CreateAlloca(LaneIDArgType,
nullptr,
3731 LaneIDArg->
getName() +
".addr");
3733 LaneIDArgType,
nullptr, RemoteLaneOffsetArg->
getName() +
".addr");
3734 Value *AlgoVerAlloca =
Builder.CreateAlloca(LaneIDArgType,
nullptr,
3735 AlgoVerArg->
getName() +
".addr");
3742 RedListArrayTy,
nullptr,
".omp.reduction.remote_reduce_list");
3744 Value *ReduceListAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3745 ReduceListAlloca, ReduceListArgType,
3746 ReduceListAlloca->
getName() +
".ascast");
3747 Value *LaneIdAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3748 LaneIdAlloca, LaneIDArgPtrType, LaneIdAlloca->
getName() +
".ascast");
3749 Value *RemoteLaneOffsetAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3750 RemoteLaneOffsetAlloca, LaneIDArgPtrType,
3751 RemoteLaneOffsetAlloca->
getName() +
".ascast");
3752 Value *AlgoVerAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3753 AlgoVerAlloca, LaneIDArgPtrType, AlgoVerAlloca->
getName() +
".ascast");
3754 Value *RemoteListAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3755 RemoteReductionListAlloca,
Builder.getPtrTy(),
3756 RemoteReductionListAlloca->
getName() +
".ascast");
3758 Builder.CreateStore(ReduceListArg, ReduceListAddrCast);
3759 Builder.CreateStore(LaneIDArg, LaneIdAddrCast);
3760 Builder.CreateStore(RemoteLaneOffsetArg, RemoteLaneOffsetAddrCast);
3761 Builder.CreateStore(AlgoVerArg, AlgoVerAddrCast);
3763 Value *ReduceList =
Builder.CreateLoad(ReduceListArgType, ReduceListAddrCast);
3764 Value *LaneId =
Builder.CreateLoad(LaneIDArgType, LaneIdAddrCast);
3765 Value *RemoteLaneOffset =
3766 Builder.CreateLoad(LaneIDArgType, RemoteLaneOffsetAddrCast);
3767 Value *AlgoVer =
Builder.CreateLoad(LaneIDArgType, AlgoVerAddrCast);
3774 Error EmitRedLsCpRes = emitReductionListCopy(
3776 ReduceList, RemoteListAddrCast, IsByRef,
3777 {RemoteLaneOffset,
nullptr,
nullptr});
3780 return EmitRedLsCpRes;
3805 Value *LaneComp =
Builder.CreateICmpULT(LaneId, RemoteLaneOffset);
3810 Value *Algo2AndLaneIdComp =
Builder.CreateAnd(Algo2, LaneIdComp);
3811 Value *RemoteOffsetComp =
3813 Value *CondAlgo2 =
Builder.CreateAnd(Algo2AndLaneIdComp, RemoteOffsetComp);
3814 Value *CA0OrCA1 =
Builder.CreateOr(CondAlgo0, CondAlgo1);
3815 Value *CondReduce =
Builder.CreateOr(CA0OrCA1, CondAlgo2);
3821 Builder.CreateCondBr(CondReduce, ThenBB, ElseBB);
3823 Value *LocalReduceListPtr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3824 ReduceList,
Builder.getPtrTy());
3825 Value *RemoteReduceListPtr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3826 RemoteListAddrCast,
Builder.getPtrTy());
3828 ->addFnAttr(Attribute::NoUnwind);
3839 Value *LaneIdGtOffset =
Builder.CreateICmpUGE(LaneId, RemoteLaneOffset);
3840 Value *CondCopy =
Builder.CreateAnd(Algo1, LaneIdGtOffset);
3845 Builder.CreateCondBr(CondCopy, CpyThenBB, CpyElseBB);
3849 EmitRedLsCpRes = emitReductionListCopy(
3851 RemoteListAddrCast, ReduceList, IsByRef);
3854 return EmitRedLsCpRes;
3869OpenMPIRBuilder::generateReductionDescriptor(
3871 Type *DescriptorType,
3877 Value *DescriptorSize =
3878 Builder.getInt64(
M.getDataLayout().getTypeStoreSize(DescriptorType));
3880 DescriptorAddr,
M.getDataLayout().getPrefTypeAlign(DescriptorType),
3881 SrcDescriptorAddr,
M.getDataLayout().getPrefTypeAlign(DescriptorType),
3885 Value *DataPtrField;
3887 DataPtrPtrGen(
Builder.saveIP(), DescriptorAddr, DataPtrField);
3890 return GenResult.takeError();
3893 DataPtr,
Builder.getPtrTy(),
".ascast"),
3899Expected<Function *> OpenMPIRBuilder::emitListToGlobalCopyFunction(
3903 LLVMContext &Ctx =
M.getContext();
3906 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3910 "_omp_reduction_list_to_global_copy_func", &
M);
3917 Builder.SetInsertPoint(EntryBlock);
3927 BufferArg->
getName() +
".addr");
3931 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3932 Value *BufferArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3933 BufferArgAlloca,
Builder.getPtrTy(),
3934 BufferArgAlloca->
getName() +
".ascast");
3935 Value *IdxArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3936 IdxArgAlloca,
Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3937 Value *ReduceListArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3938 ReduceListArgAlloca,
Builder.getPtrTy(),
3939 ReduceListArgAlloca->
getName() +
".ascast");
3941 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3942 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3943 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3945 Value *LocalReduceList =
3947 Value *BufferArgVal =
3951 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
3952 for (
auto En :
enumerate(ReductionInfos)) {
3954 auto *RedListArrayTy =
3958 RedListArrayTy, LocalReduceList,
3959 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3965 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferArgVal, Idxs);
3967 ReductionsBufferTy, BufferVD, 0, En.index());
3969 switch (RI.EvaluationKind) {
3971 Value *TargetElement;
3973 if (IsByRef.
empty() || !IsByRef[En.index()]) {
3974 TargetElement =
Builder.CreateLoad(RI.ElementType, ElemPtr);
3976 if (RI.DataPtrPtrGen) {
3978 RI.DataPtrPtrGen(
Builder.saveIP(), ElemPtr, ElemPtr);
3981 return GenResult.takeError();
3985 TargetElement =
Builder.CreateLoad(RI.ByRefElementType, ElemPtr);
3988 Builder.CreateStore(TargetElement, GlobVal);
3992 Value *SrcRealPtr =
Builder.CreateConstInBoundsGEP2_32(
3993 RI.ElementType, ElemPtr, 0, 0,
".realp");
3995 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
3997 RI.ElementType, ElemPtr, 0, 1,
".imagp");
3999 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
4001 Value *DestRealPtr =
Builder.CreateConstInBoundsGEP2_32(
4002 RI.ElementType, GlobVal, 0, 0,
".realp");
4003 Value *DestImgPtr =
Builder.CreateConstInBoundsGEP2_32(
4004 RI.ElementType, GlobVal, 0, 1,
".imagp");
4005 Builder.CreateStore(SrcReal, DestRealPtr);
4006 Builder.CreateStore(SrcImg, DestImgPtr);
4011 Builder.getInt64(
M.getDataLayout().getTypeStoreSize(RI.ElementType));
4013 GlobVal,
M.getDataLayout().getPrefTypeAlign(RI.ElementType), ElemPtr,
4014 M.getDataLayout().getPrefTypeAlign(RI.ElementType), SizeVal,
false);
4025Expected<Function *> OpenMPIRBuilder::emitListToGlobalReduceFunction(
4029 LLVMContext &Ctx =
M.getContext();
4032 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
4036 "_omp_reduction_list_to_global_reduce_func", &
M);
4043 Builder.SetInsertPoint(EntryBlock);
4053 BufferArg->
getName() +
".addr");
4057 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
4058 auto *RedListArrayTy =
4063 Value *LocalReduceList =
4064 Builder.CreateAlloca(RedListArrayTy,
nullptr,
".omp.reduction.red_list");
4068 Value *BufferArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4069 BufferArgAlloca,
Builder.getPtrTy(),
4070 BufferArgAlloca->
getName() +
".ascast");
4071 Value *IdxArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4072 IdxArgAlloca,
Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
4073 Value *ReduceListArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4074 ReduceListArgAlloca,
Builder.getPtrTy(),
4075 ReduceListArgAlloca->
getName() +
".ascast");
4076 Value *LocalReduceListAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4077 LocalReduceList,
Builder.getPtrTy(),
4078 LocalReduceList->
getName() +
".ascast");
4080 Builder.CreateStore(BufferArg, BufferArgAddrCast);
4081 Builder.CreateStore(IdxArg, IdxArgAddrCast);
4082 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
4087 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
4088 for (
auto En :
enumerate(ReductionInfos)) {
4091 Value *TargetElementPtrPtr =
Builder.CreateInBoundsGEP(
4092 RedListArrayTy, LocalReduceListAddrCast,
4093 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
4095 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
4097 Value *GlobValPtr =
Builder.CreateConstInBoundsGEP2_32(
4098 ReductionsBufferTy, BufferVD, 0, En.index());
4100 if (!IsByRef.
empty() && IsByRef[En.index()] && RI.DataPtrPtrGen) {
4104 Value *ByRefAlloc =
Builder.CreateAlloca(RI.ByRefAllocatedType);
4105 ByRefAlloc =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4106 ByRefAlloc,
Builder.getPtrTy(), ByRefAlloc->
getName() +
".ascast");
4113 Value *SrcElementPtrPtr =
4114 Builder.CreateInBoundsGEP(RedListArrayTy, ReduceList,
4115 {ConstantInt::get(IndexTy, 0),
4116 ConstantInt::get(IndexTy, En.index())});
4117 Value *SrcDescriptorAddr =
4122 generateReductionDescriptor(ByRefAlloc, GlobValPtr, SrcDescriptorAddr,
4123 RI.ByRefAllocatedType, RI.DataPtrPtrGen);
4126 return GenResult.takeError();
4128 Builder.CreateStore(ByRefAlloc, TargetElementPtrPtr);
4130 Builder.CreateStore(GlobValPtr, TargetElementPtrPtr);
4138 ->addFnAttr(Attribute::NoUnwind);
4144Expected<Function *> OpenMPIRBuilder::emitGlobalToListCopyFunction(
4148 LLVMContext &Ctx =
M.getContext();
4151 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
4155 "_omp_reduction_global_to_list_copy_func", &
M);
4162 Builder.SetInsertPoint(EntryBlock);
4172 BufferArg->
getName() +
".addr");
4176 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
4177 Value *BufferArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4178 BufferArgAlloca,
Builder.getPtrTy(),
4179 BufferArgAlloca->
getName() +
".ascast");
4180 Value *IdxArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4181 IdxArgAlloca,
Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
4182 Value *ReduceListArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4183 ReduceListArgAlloca,
Builder.getPtrTy(),
4184 ReduceListArgAlloca->
getName() +
".ascast");
4185 Builder.CreateStore(BufferArg, BufferArgAddrCast);
4186 Builder.CreateStore(IdxArg, IdxArgAddrCast);
4187 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
4189 Value *LocalReduceList =
4194 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
4195 for (
auto En :
enumerate(ReductionInfos)) {
4196 const OpenMPIRBuilder::ReductionInfo &RI = En.value();
4197 auto *RedListArrayTy =
4201 RedListArrayTy, LocalReduceList,
4202 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
4207 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
4208 Value *GlobValPtr =
Builder.CreateConstInBoundsGEP2_32(
4209 ReductionsBufferTy, BufferVD, 0, En.index());
4215 if (!IsByRef.
empty() && IsByRef[En.index()]) {
4222 return GenResult.takeError();
4228 Value *TargetElement =
Builder.CreateLoad(ElemType, GlobValPtr);
4229 Builder.CreateStore(TargetElement, ElemPtr);
4233 Value *SrcRealPtr =
Builder.CreateConstInBoundsGEP2_32(
4242 Value *DestRealPtr =
Builder.CreateConstInBoundsGEP2_32(
4244 Value *DestImgPtr =
Builder.CreateConstInBoundsGEP2_32(
4246 Builder.CreateStore(SrcReal, DestRealPtr);
4247 Builder.CreateStore(SrcImg, DestImgPtr);
4254 ElemPtr,
M.getDataLayout().getPrefTypeAlign(RI.
ElementType),
4255 GlobValPtr,
M.getDataLayout().getPrefTypeAlign(RI.
ElementType),
4267Expected<Function *> OpenMPIRBuilder::emitGlobalToListReduceFunction(
4271 LLVMContext &Ctx =
M.getContext();
4274 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
4278 "_omp_reduction_global_to_list_reduce_func", &
M);
4285 Builder.SetInsertPoint(EntryBlock);
4295 BufferArg->
getName() +
".addr");
4299 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
4305 Value *LocalReduceList =
4306 Builder.CreateAlloca(RedListArrayTy,
nullptr,
".omp.reduction.red_list");
4310 Value *BufferArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4311 BufferArgAlloca,
Builder.getPtrTy(),
4312 BufferArgAlloca->
getName() +
".ascast");
4313 Value *IdxArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4314 IdxArgAlloca,
Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
4315 Value *ReduceListArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4316 ReduceListArgAlloca,
Builder.getPtrTy(),
4317 ReduceListArgAlloca->
getName() +
".ascast");
4318 Value *ReductionList =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4319 LocalReduceList,
Builder.getPtrTy(),
4320 LocalReduceList->
getName() +
".ascast");
4322 Builder.CreateStore(BufferArg, BufferArgAddrCast);
4323 Builder.CreateStore(IdxArg, IdxArgAddrCast);
4324 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
4329 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
4330 for (
auto En :
enumerate(ReductionInfos)) {
4333 Value *TargetElementPtrPtr =
Builder.CreateInBoundsGEP(
4334 RedListArrayTy, ReductionList,
4335 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
4338 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
4339 Value *GlobValPtr =
Builder.CreateConstInBoundsGEP2_32(
4340 ReductionsBufferTy, BufferVD, 0, En.index());
4342 if (!IsByRef.
empty() && IsByRef[En.index()] && RI.DataPtrPtrGen) {
4346 Value *ByRefAlloc =
Builder.CreateAlloca(RI.ByRefAllocatedType);
4347 ByRefAlloc =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4348 ByRefAlloc,
Builder.getPtrTy(), ByRefAlloc->
getName() +
".ascast");
4353 Value *ReduceListVal =
4355 Value *SrcElementPtrPtr =
4356 Builder.CreateInBoundsGEP(RedListArrayTy, ReduceListVal,
4357 {ConstantInt::get(IndexTy, 0),
4358 ConstantInt::get(IndexTy, En.index())});
4359 Value *SrcDescriptorAddr =
4364 generateReductionDescriptor(ByRefAlloc, GlobValPtr, SrcDescriptorAddr,
4365 RI.ByRefAllocatedType, RI.DataPtrPtrGen);
4367 return GenResult.takeError();
4369 Builder.CreateStore(ByRefAlloc, TargetElementPtrPtr);
4371 Builder.CreateStore(GlobValPtr, TargetElementPtrPtr);
4379 ->addFnAttr(Attribute::NoUnwind);
4385std::string OpenMPIRBuilder::getReductionFuncName(StringRef Name)
const {
4386 std::string Suffix =
4388 return (Name + Suffix).str();
4391Expected<Function *> OpenMPIRBuilder::createReductionFunction(
4394 AttributeList FuncAttrs) {
4396 {Builder.getPtrTy(), Builder.getPtrTy()},
4398 std::string
Name = getReductionFuncName(ReducerName);
4407 Builder.SetInsertPoint(EntryBB);
4411 Value *LHSArrayPtr =
nullptr;
4412 Value *RHSArrayPtr =
nullptr;
4419 Builder.CreateAlloca(Arg0Type,
nullptr, Arg0->
getName() +
".addr");
4421 Builder.CreateAlloca(Arg1Type,
nullptr, Arg1->
getName() +
".addr");
4422 Value *LHSAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4423 LHSAlloca, Arg0Type, LHSAlloca->
getName() +
".ascast");
4424 Value *RHSAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4425 RHSAlloca, Arg1Type, RHSAlloca->
getName() +
".ascast");
4426 Builder.CreateStore(Arg0, LHSAddrCast);
4427 Builder.CreateStore(Arg1, RHSAddrCast);
4428 LHSArrayPtr =
Builder.CreateLoad(Arg0Type, LHSAddrCast);
4429 RHSArrayPtr =
Builder.CreateLoad(Arg1Type, RHSAddrCast);
4433 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
4435 for (
auto En :
enumerate(ReductionInfos)) {
4438 RedArrayTy, RHSArrayPtr,
4439 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
4441 Value *RHSPtr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4442 RHSI8Ptr, RI.PrivateVariable->getType(),
4443 RHSI8Ptr->
getName() +
".ascast");
4446 RedArrayTy, LHSArrayPtr,
4447 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
4449 Value *LHSPtr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4450 LHSI8Ptr, RI.Variable->getType(), LHSI8Ptr->
getName() +
".ascast");
4459 if (!IsByRef.
empty() && !IsByRef[En.index()]) {
4460 LHS =
Builder.CreateLoad(RI.ElementType, LHSPtr);
4461 RHS =
Builder.CreateLoad(RI.ElementType, RHSPtr);
4468 return AfterIP.takeError();
4469 if (!
Builder.GetInsertBlock())
4470 return ReductionFunc;
4474 if (!IsByRef.
empty() && !IsByRef[En.index()])
4475 Builder.CreateStore(Reduced, LHSPtr);
4480 for (
auto En :
enumerate(ReductionInfos)) {
4481 unsigned Index = En.index();
4483 Value *LHSFixupPtr, *RHSFixupPtr;
4484 Builder.restoreIP(RI.ReductionGenClang(
4485 Builder.saveIP(), Index, &LHSFixupPtr, &RHSFixupPtr, ReductionFunc));
4490 LHSPtrs[Index], [ReductionFunc](
const Use &U) {
4495 RHSPtrs[Index], [ReductionFunc](
const Use &U) {
4509 return ReductionFunc;
4517 assert(RI.Variable &&
"expected non-null variable");
4518 assert(RI.PrivateVariable &&
"expected non-null private variable");
4519 assert((RI.ReductionGen || RI.ReductionGenClang) &&
4520 "expected non-null reduction generator callback");
4523 RI.Variable->getType() == RI.PrivateVariable->getType() &&
4524 "expected variables and their private equivalents to have the same "
4527 assert(RI.Variable->getType()->isPointerTy() &&
4528 "expected variables to be pointers");
4537 unsigned ReductionBufNum,
Value *SrcLocInfo) {
4551 if (ReductionInfos.
size() == 0)
4561 Builder.SetInsertPoint(InsertBlock, InsertBlock->
end());
4565 AttributeList FuncAttrs;
4566 AttrBuilder AttrBldr(Ctx);
4568 AttrBldr.addAttribute(Attr);
4569 AttrBldr.removeAttribute(Attribute::OptimizeNone);
4570 FuncAttrs = FuncAttrs.addFnAttributes(Ctx, AttrBldr);
4574 Builder.GetInsertBlock()->getParent()->getName(), ReductionInfos, IsByRef,
4576 if (!ReductionResult)
4578 Function *ReductionFunc = *ReductionResult;
4582 if (GridValue.has_value())
4583 Config.setGridValue(GridValue.value());
4598 Builder.getPtrTy(
M.getDataLayout().getProgramAddressSpace());
4602 Value *ReductionListAlloca =
4603 Builder.CreateAlloca(RedArrayTy,
nullptr,
".omp.reduction.red_list");
4604 Value *ReductionList =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4605 ReductionListAlloca, PtrTy, ReductionListAlloca->
getName() +
".ascast");
4608 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
4609 for (
auto En :
enumerate(ReductionInfos)) {
4612 RedArrayTy, ReductionList,
4613 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
4616 bool IsByRefElem = !IsByRef.
empty() && IsByRef[En.index()];
4621 Builder.CreatePointerBitCastOrAddrSpaceCast(PrivateVar, PtrTy);
4622 Builder.CreateStore(CastElem, ElemPtr);
4626 ReductionInfos, ReductionFunc, FuncAttrs, IsByRef);
4632 emitInterWarpCopyFunction(
Loc, ReductionInfos, FuncAttrs, IsByRef);
4638 Value *RL =
Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList, PtrTy);
4650 unsigned MaxDataSize = 0;
4652 for (
auto En :
enumerate(ReductionInfos)) {
4656 Type *RedTypeArg = (!IsByRef.
empty() && IsByRef[En.index()])
4657 ? En.value().ByRefElementType
4658 : En.value().ElementType;
4659 auto Size =
M.getDataLayout().getTypeStoreSize(RedTypeArg);
4660 if (
Size > MaxDataSize)
4664 Value *ReductionDataSize =
4665 Builder.getInt64(MaxDataSize * ReductionInfos.
size());
4666 if (!IsTeamsReduction) {
4667 Value *SarFuncCast =
4668 Builder.CreatePointerBitCastOrAddrSpaceCast(*SarFunc, FuncPtrTy);
4670 Builder.CreatePointerBitCastOrAddrSpaceCast(WcFunc, FuncPtrTy);
4671 Value *Args[] = {SrcLocInfo, ReductionDataSize, RL, SarFuncCast,
4674 RuntimeFunction::OMPRTL___kmpc_nvptx_parallel_reduce_nowait_v2);
4679 Ctx, ReductionTypeArgs,
"struct._globalized_locals_ty");
4681 RuntimeFunction::OMPRTL___kmpc_reduction_get_fixed_buffer);
4684 ReductionInfos, ReductionsBufferTy, FuncAttrs, IsByRef);
4689 ReductionInfos, ReductionFunc, ReductionsBufferTy, FuncAttrs, IsByRef);
4694 ReductionInfos, ReductionsBufferTy, FuncAttrs, IsByRef);
4699 ReductionInfos, ReductionFunc, ReductionsBufferTy, FuncAttrs, IsByRef);
4706 RedFixedBufferFn, {},
"_openmp_teams_reductions_buffer_$_$ptr");
4708 Value *Args3[] = {SrcLocInfo,
4709 KernelTeamsReductionPtr,
4710 Builder.getInt32(ReductionBufNum),
4721 RuntimeFunction::OMPRTL___kmpc_nvptx_teams_reduce_nowait_v2);
4738 for (
auto En :
enumerate(ReductionInfos)) {
4746 Value *LHSPtr, *RHSPtr;
4748 &LHSPtr, &RHSPtr, CurFunc));
4754 RedValue =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4756 if (RHSPtr->
getType() != RHS->getType())
4758 Builder.CreatePointerBitCastOrAddrSpaceCast(RHS, RHSPtr->
getType());
4769 if (IsByRef.
empty() || !IsByRef[En.index()]) {
4771 "red.value." +
Twine(En.index()));
4782 if (!IsByRef.
empty() && !IsByRef[En.index()])
4787 if (ContinuationBlock) {
4788 Builder.CreateBr(ContinuationBlock);
4789 Builder.SetInsertPoint(ContinuationBlock);
4791 Config.setEmitLLVMUsed();
4802 ".omp.reduction.func", &M);
4812 Builder.SetInsertPoint(ReductionFuncBlock);
4813 Value *LHSArrayPtr =
nullptr;
4814 Value *RHSArrayPtr =
nullptr;
4825 Builder.CreateAlloca(Arg0Type,
nullptr, Arg0->
getName() +
".addr");
4827 Builder.CreateAlloca(Arg1Type,
nullptr, Arg1->
getName() +
".addr");
4828 Value *LHSAddrCast =
4829 Builder.CreatePointerBitCastOrAddrSpaceCast(LHSAlloca, Arg0Type);
4830 Value *RHSAddrCast =
4831 Builder.CreatePointerBitCastOrAddrSpaceCast(RHSAlloca, Arg1Type);
4832 Builder.CreateStore(Arg0, LHSAddrCast);
4833 Builder.CreateStore(Arg1, RHSAddrCast);
4834 LHSArrayPtr = Builder.CreateLoad(Arg0Type, LHSAddrCast);
4835 RHSArrayPtr = Builder.CreateLoad(Arg1Type, RHSAddrCast);
4837 LHSArrayPtr = ReductionFunc->
getArg(0);
4838 RHSArrayPtr = ReductionFunc->
getArg(1);
4841 unsigned NumReductions = ReductionInfos.
size();
4844 for (
auto En :
enumerate(ReductionInfos)) {
4846 Value *LHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
4847 RedArrayTy, LHSArrayPtr, 0, En.index());
4848 Value *LHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), LHSI8PtrPtr);
4849 Value *LHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
4852 Value *RHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
4853 RedArrayTy, RHSArrayPtr, 0, En.index());
4854 Value *RHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), RHSI8PtrPtr);
4855 Value *RHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
4864 Builder.restoreIP(*AfterIP);
4866 if (!Builder.GetInsertBlock())
4870 if (!IsByRef[En.index()])
4871 Builder.CreateStore(Reduced, LHSPtr);
4873 Builder.CreateRetVoid();
4880 bool IsNoWait,
bool IsTeamsReduction) {
4884 IsByRef, IsNoWait, IsTeamsReduction);
4891 if (ReductionInfos.
size() == 0)
4901 unsigned NumReductions = ReductionInfos.
size();
4904 Value *RedArray =
Builder.CreateAlloca(RedArrayTy,
nullptr,
"red.array");
4906 Builder.SetInsertPoint(InsertBlock, InsertBlock->
end());
4908 for (
auto En :
enumerate(ReductionInfos)) {
4909 unsigned Index = En.index();
4911 Value *RedArrayElemPtr =
Builder.CreateConstInBoundsGEP2_64(
4912 RedArrayTy, RedArray, 0, Index,
"red.array.elem." +
Twine(Index));
4919 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
4929 ? IdentFlag::OMP_IDENT_FLAG_ATOMIC_REDUCE
4934 unsigned RedArrayByteSize =
DL.getTypeStoreSize(RedArrayTy);
4935 Constant *RedArraySize = ConstantInt::get(IndexTy, RedArrayByteSize);
4937 Value *Lock = getOMPCriticalRegionLock(
".reduction");
4939 IsNoWait ? RuntimeFunction::OMPRTL___kmpc_reduce_nowait
4940 : RuntimeFunction::OMPRTL___kmpc_reduce);
4943 {Ident, ThreadId, NumVariables, RedArraySize,
4944 RedArray, ReductionFunc, Lock},
4955 Builder.CreateSwitch(ReduceCall, ContinuationBlock, 2);
4956 Switch->addCase(
Builder.getInt32(1), NonAtomicRedBlock);
4957 Switch->addCase(
Builder.getInt32(2), AtomicRedBlock);
4962 Builder.SetInsertPoint(NonAtomicRedBlock);
4963 for (
auto En :
enumerate(ReductionInfos)) {
4969 if (!IsByRef[En.index()]) {
4971 "red.value." +
Twine(En.index()));
4973 Value *PrivateRedValue =
4975 "red.private.value." +
Twine(En.index()));
4983 if (!
Builder.GetInsertBlock())
4986 if (!IsByRef[En.index()])
4990 IsNoWait ? RuntimeFunction::OMPRTL___kmpc_end_reduce_nowait
4991 : RuntimeFunction::OMPRTL___kmpc_end_reduce);
4993 Builder.CreateBr(ContinuationBlock);
4998 Builder.SetInsertPoint(AtomicRedBlock);
4999 if (CanGenerateAtomic &&
llvm::none_of(IsByRef, [](
bool P) {
return P; })) {
5006 if (!
Builder.GetInsertBlock())
5009 Builder.CreateBr(ContinuationBlock);
5022 if (!
Builder.GetInsertBlock())
5025 Builder.SetInsertPoint(ContinuationBlock);
5036 Directive OMPD = Directive::OMPD_master;
5041 Value *Args[] = {Ident, ThreadId};
5049 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
5060 Directive OMPD = Directive::OMPD_masked;
5066 Value *ArgsEnd[] = {Ident, ThreadId};
5074 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
5084 Call->setDoesNotThrow();
5099 bool IsInclusive,
ScanInfo *ScanRedInfo) {
5101 llvm::Error Err = emitScanBasedDirectiveDeclsIR(AllocaIP, ScanVars,
5102 ScanVarsType, ScanRedInfo);
5113 for (
size_t i = 0; i < ScanVars.
size(); i++) {
5116 Type *DestTy = ScanVarsType[i];
5117 Value *Val =
Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
5120 Builder.CreateStore(Src, Val);
5125 Builder.GetInsertBlock()->getParent());
5128 IV = ScanRedInfo->
IV;
5131 for (
size_t i = 0; i < ScanVars.
size(); i++) {
5134 Type *DestTy = ScanVarsType[i];
5136 Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
5138 Builder.CreateStore(Src, ScanVars[i]);
5152 Builder.GetInsertBlock()->getParent());
5157Error OpenMPIRBuilder::emitScanBasedDirectiveDeclsIR(
5161 Builder.restoreIP(AllocaIP);
5163 for (
size_t i = 0; i < ScanVars.
size(); i++) {
5165 Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
"vla");
5172 Builder.restoreIP(CodeGenIP);
5174 Builder.CreateAdd(ScanRedInfo->
Span, Builder.getInt32(1));
5175 for (
size_t i = 0; i < ScanVars.
size(); i++) {
5179 Value *Buff = Builder.CreateMalloc(IntPtrTy, ScanVarsType[i], Allocsize,
5180 AllocSpan,
nullptr,
"arr");
5181 Builder.CreateStore(Buff, (*(ScanRedInfo->
ScanBuffPtrs))[ScanVars[i]]);
5199 Builder.SetInsertPoint(
Builder.GetInsertBlock()->getTerminator());
5208Error OpenMPIRBuilder::emitScanBasedDirectiveFinalsIR(
5214 Value *PrivateVar = RedInfo.PrivateVariable;
5215 Value *OrigVar = RedInfo.Variable;
5219 Type *SrcTy = RedInfo.ElementType;
5224 Builder.CreateStore(Src, OrigVar);
5247 Builder.SetInsertPoint(
Builder.GetInsertBlock()->getTerminator());
5272 Builder.GetInsertBlock()->getModule(),
5279 Builder.GetInsertBlock()->getModule(),
5285 llvm::ConstantInt::get(ScanRedInfo->
Span->
getType(), 1));
5286 Builder.SetInsertPoint(InputBB);
5289 Builder.SetInsertPoint(LoopBB);
5305 Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
5307 Builder.SetInsertPoint(InnerLoopBB);
5311 Value *ReductionVal = RedInfo.PrivateVariable;
5314 Type *DestTy = RedInfo.ElementType;
5317 Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
5320 Builder.CreateInBoundsGEP(DestTy, Buff, OffsetIval,
"arrayOffset");
5325 RedInfo.ReductionGen(
Builder.saveIP(), LHS, RHS, Result);
5328 Builder.CreateStore(Result, LHSPtr);
5331 IVal, llvm::ConstantInt::get(
Builder.getInt32Ty(), 1));
5333 CmpI =
Builder.CreateICmpUGE(NextIVal, Pow2K);
5334 Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
5337 Counter, llvm::ConstantInt::get(Counter->
getType(), 1));
5343 Builder.CreateCondBr(Cmp, LoopBB, ExitBB);
5364 Error Err = emitScanBasedDirectiveFinalsIR(ReductionInfos, ScanRedInfo);
5371Error OpenMPIRBuilder::emitScanBasedDirectiveIR(
5383 Error Err = InputLoopGen();
5394 Error Err = ScanLoopGen(Builder.saveIP());
5401void OpenMPIRBuilder::createScanBBs(ScanInfo *ScanRedInfo) {
5438 Builder.SetInsertPoint(Preheader);
5441 Builder.SetInsertPoint(Header);
5442 PHINode *IndVarPHI =
Builder.CreatePHI(IndVarTy, 2,
"omp_" + Name +
".iv");
5443 IndVarPHI->
addIncoming(ConstantInt::get(IndVarTy, 0), Preheader);
5448 Builder.CreateICmpULT(IndVarPHI, TripCount,
"omp_" + Name +
".cmp");
5449 Builder.CreateCondBr(Cmp, Body, Exit);
5454 Builder.SetInsertPoint(Latch);
5456 "omp_" + Name +
".next",
true);
5467 CL->Header = Header;
5486 NextBB, NextBB, Name);
5518 Value *Start,
Value *Stop,
Value *Step,
bool IsSigned,
bool InclusiveStop,
5527 ComputeLoc, Start, Stop, Step, IsSigned, InclusiveStop, Name);
5528 ScanRedInfo->
Span = TripCount;
5534 ScanRedInfo->
IV =
IV;
5535 createScanBBs(ScanRedInfo);
5538 assert(Terminator->getNumSuccessors() == 1);
5539 BasicBlock *ContinueBlock = Terminator->getSuccessor(0);
5542 Builder.GetInsertBlock()->getParent());
5545 Builder.GetInsertBlock()->getParent());
5546 Builder.CreateBr(ContinueBlock);
5552 const auto &&InputLoopGen = [&]() ->
Error {
5554 Builder.saveIP(), BodyGen, Start, Stop, Step, IsSigned, InclusiveStop,
5555 ComputeIP, Name,
true, ScanRedInfo);
5559 Builder.restoreIP((*LoopInfo)->getAfterIP());
5565 InclusiveStop, ComputeIP, Name,
true, ScanRedInfo);
5569 Builder.restoreIP((*LoopInfo)->getAfterIP());
5573 Error Err = emitScanBasedDirectiveIR(InputLoopGen, ScanLoopGen, ScanRedInfo);
5581 bool IsSigned,
bool InclusiveStop,
const Twine &Name) {
5591 assert(IndVarTy == Stop->
getType() &&
"Stop type mismatch");
5592 assert(IndVarTy == Step->
getType() &&
"Step type mismatch");
5596 ConstantInt *Zero = ConstantInt::get(IndVarTy, 0);
5612 Incr =
Builder.CreateSelect(IsNeg,
Builder.CreateNeg(Step), Step);
5615 Span =
Builder.CreateSub(UB, LB,
"",
false,
true);
5619 Span =
Builder.CreateSub(Stop, Start,
"",
true);
5624 Value *CountIfLooping;
5625 if (InclusiveStop) {
5626 CountIfLooping =
Builder.CreateAdd(
Builder.CreateUDiv(Span, Incr), One);
5632 CountIfLooping =
Builder.CreateSelect(OneCmp, One, CountIfTwo);
5635 return Builder.CreateSelect(ZeroCmp, Zero, CountIfLooping,
5636 "omp_" + Name +
".tripcount");
5641 Value *Start,
Value *Stop,
Value *Step,
bool IsSigned,
bool InclusiveStop,
5648 ComputeLoc, Start, Stop, Step, IsSigned, InclusiveStop, Name);
5655 ScanRedInfo->
IV = IndVar;
5656 return BodyGenCB(
Builder.saveIP(), IndVar);
5662 Builder.getCurrentDebugLocation());
5673 unsigned Bitwidth = Ty->getIntegerBitWidth();
5676 M, omp::RuntimeFunction::OMPRTL___kmpc_dist_for_static_init_4u);
5679 M, omp::RuntimeFunction::OMPRTL___kmpc_dist_for_static_init_8u);
5689 unsigned Bitwidth = Ty->getIntegerBitWidth();
5692 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_4u);
5695 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_8u);
5703 assert(CLI->
isValid() &&
"Requires a valid canonical loop");
5705 "Require dedicated allocate IP");
5711 uint32_t SrcLocStrSize;
5715 case WorksharingLoopType::ForStaticLoop:
5716 Flag = OMP_IDENT_FLAG_WORK_LOOP;
5718 case WorksharingLoopType::DistributeStaticLoop:
5719 Flag = OMP_IDENT_FLAG_WORK_DISTRIBUTE;
5721 case WorksharingLoopType::DistributeForStaticLoop:
5722 Flag = OMP_IDENT_FLAG_WORK_DISTRIBUTE | OMP_IDENT_FLAG_WORK_LOOP;
5729 Type *IVTy =
IV->getType();
5730 FunctionCallee StaticInit =
5731 LoopType == WorksharingLoopType::DistributeForStaticLoop
5734 FunctionCallee StaticFini =
5738 Builder.SetInsertPoint(AllocaIP.getBlock()->getFirstNonPHIOrDbgOrAlloca());
5741 Value *PLastIter =
Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
5742 Value *PLowerBound =
Builder.CreateAlloca(IVTy,
nullptr,
"p.lowerbound");
5743 Value *PUpperBound =
Builder.CreateAlloca(IVTy,
nullptr,
"p.upperbound");
5744 Value *PStride =
Builder.CreateAlloca(IVTy,
nullptr,
"p.stride");
5753 Constant *One = ConstantInt::get(IVTy, 1);
5754 Builder.CreateStore(Zero, PLowerBound);
5756 Builder.CreateStore(UpperBound, PUpperBound);
5757 Builder.CreateStore(One, PStride);
5763 (LoopType == WorksharingLoopType::DistributeStaticLoop)
5764 ? OMPScheduleType::OrderedDistribute
5767 ConstantInt::get(I32Type,
static_cast<int>(SchedType));
5771 auto BuildInitCall = [LoopType, SrcLoc, ThreadNum, PLastIter, PLowerBound,
5772 PUpperBound, IVTy, PStride, One,
Zero, StaticInit,
5775 PLowerBound, PUpperBound});
5776 if (LoopType == WorksharingLoopType::DistributeForStaticLoop) {
5777 Value *PDistUpperBound =
5778 Builder.CreateAlloca(IVTy,
nullptr,
"p.distupperbound");
5779 Args.push_back(PDistUpperBound);
5784 BuildInitCall(SchedulingType,
Builder);
5785 if (HasDistSchedule &&
5786 LoopType != WorksharingLoopType::DistributeStaticLoop) {
5787 Constant *DistScheduleSchedType = ConstantInt::get(
5792 BuildInitCall(DistScheduleSchedType,
Builder);
5794 Value *LowerBound =
Builder.CreateLoad(IVTy, PLowerBound);
5795 Value *InclusiveUpperBound =
Builder.CreateLoad(IVTy, PUpperBound);
5796 Value *TripCountMinusOne =
Builder.CreateSub(InclusiveUpperBound, LowerBound);
5797 Value *TripCount =
Builder.CreateAdd(TripCountMinusOne, One);
5798 CLI->setTripCount(TripCount);
5804 CLI->mapIndVar([&](Instruction *OldIV) ->
Value * {
5808 return Builder.CreateAdd(OldIV, LowerBound);
5820 omp::Directive::OMPD_for,
false,
5823 return BarrierIP.takeError();
5850 Reachable.insert(
Block);
5860 Ctx, {
MDString::get(Ctx,
"llvm.loop.parallel_accesses"), AccessGroup}));
5864OpenMPIRBuilder::applyStaticChunkedWorkshareLoop(
5868 assert(CLI->
isValid() &&
"Requires a valid canonical loop");
5869 assert((ChunkSize || DistScheduleChunkSize) &&
"Chunk size is required");
5874 Type *IVTy =
IV->getType();
5876 "Max supported tripcount bitwidth is 64 bits");
5878 :
Type::getInt64Ty(Ctx);
5881 Constant *One = ConstantInt::get(InternalIVTy, 1);
5887 for (BasicBlock &BB : *
F)
5888 if (!BB.hasTerminator())
5889 UIs.
push_back(
new UnreachableInst(
F->getContext(), &BB));
5894 LoopInfo &&LI = LIA.
run(*
F,
FAM);
5895 for (Instruction *
I : UIs)
5896 I->eraseFromParent();
5899 if (ChunkSize || DistScheduleChunkSize)
5904 FunctionCallee StaticInit =
5906 FunctionCallee StaticFini =
5912 Value *PLastIter =
Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
5913 Value *PLowerBound =
5914 Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.lowerbound");
5915 Value *PUpperBound =
5916 Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.upperbound");
5917 Value *PStride =
Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.stride");
5926 ChunkSize ? ChunkSize : Zero, InternalIVTy,
"chunksize");
5927 Value *CastedDistScheduleChunkSize =
Builder.CreateZExtOrTrunc(
5928 DistScheduleChunkSize ? DistScheduleChunkSize : Zero, InternalIVTy,
5929 "distschedulechunksize");
5930 Value *CastedTripCount =
5931 Builder.CreateZExt(OrigTripCount, InternalIVTy,
"tripcount");
5934 ConstantInt::get(I32Type,
static_cast<int>(SchedType));
5936 ConstantInt::get(I32Type,
static_cast<int>(DistScheduleSchedType));
5937 Builder.CreateStore(Zero, PLowerBound);
5938 Value *OrigUpperBound =
Builder.CreateSub(CastedTripCount, One);
5939 Value *IsTripCountZero =
Builder.CreateICmpEQ(CastedTripCount, Zero);
5941 Builder.CreateSelect(IsTripCountZero, Zero, OrigUpperBound);
5942 Builder.CreateStore(UpperBound, PUpperBound);
5943 Builder.CreateStore(One, PStride);
5947 uint32_t SrcLocStrSize;
5950 if (DistScheduleSchedType != OMPScheduleType::None) {
5951 Flag |= OMP_IDENT_FLAG_WORK_DISTRIBUTE;
5956 auto BuildInitCall = [StaticInit, SrcLoc, ThreadNum, PLastIter, PLowerBound,
5957 PUpperBound, PStride, One,
5958 this](
Value *SchedulingType,
Value *ChunkSize,
5961 StaticInit, {SrcLoc, ThreadNum,
5962 SchedulingType, PLastIter,
5963 PLowerBound, PUpperBound,
5967 BuildInitCall(SchedulingType, CastedChunkSize,
Builder);
5968 if (DistScheduleSchedType != OMPScheduleType::None &&
5969 SchedType != OMPScheduleType::OrderedDistributeChunked &&
5970 SchedType != OMPScheduleType::OrderedDistribute) {
5974 BuildInitCall(DistSchedulingType, CastedDistScheduleChunkSize,
Builder);
5978 Value *FirstChunkStart =
5979 Builder.CreateLoad(InternalIVTy, PLowerBound,
"omp_firstchunk.lb");
5980 Value *FirstChunkStop =
5981 Builder.CreateLoad(InternalIVTy, PUpperBound,
"omp_firstchunk.ub");
5982 Value *FirstChunkEnd =
Builder.CreateAdd(FirstChunkStop, One);
5984 Builder.CreateSub(FirstChunkEnd, FirstChunkStart,
"omp_chunk.range");
5985 Value *NextChunkStride =
5986 Builder.CreateLoad(InternalIVTy, PStride,
"omp_dispatch.stride");
5990 Value *DispatchCounter;
5998 DispatchCounter = Counter;
6001 FirstChunkStart, CastedTripCount, NextChunkStride,
6024 Value *ChunkEnd =
Builder.CreateAdd(DispatchCounter, ChunkRange);
6025 Value *IsLastChunk =
6026 Builder.CreateICmpUGE(ChunkEnd, CastedTripCount,
"omp_chunk.is_last");
6027 Value *CountUntilOrigTripCount =
6028 Builder.CreateSub(CastedTripCount, DispatchCounter);
6030 IsLastChunk, CountUntilOrigTripCount, ChunkRange,
"omp_chunk.tripcount");
6031 Value *BackcastedChunkTC =
6032 Builder.CreateTrunc(ChunkTripCount, IVTy,
"omp_chunk.tripcount.trunc");
6033 CLI->setTripCount(BackcastedChunkTC);
6038 Value *BackcastedDispatchCounter =
6039 Builder.CreateTrunc(DispatchCounter, IVTy,
"omp_dispatch.iv.trunc");
6040 CLI->mapIndVar([&](Instruction *) ->
Value * {
6042 return Builder.CreateAdd(
IV, BackcastedDispatchCounter);
6055 return AfterIP.takeError();
6070static FunctionCallee
6073 unsigned Bitwidth = Ty->getIntegerBitWidth();
6076 case WorksharingLoopType::ForStaticLoop:
6079 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_loop_4u);
6082 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_loop_8u);
6084 case WorksharingLoopType::DistributeStaticLoop:
6087 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_static_loop_4u);
6090 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_static_loop_8u);
6092 case WorksharingLoopType::DistributeForStaticLoop:
6095 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_for_static_loop_4u);
6098 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_for_static_loop_8u);
6101 if (Bitwidth != 32 && Bitwidth != 64) {
6113 Function &LoopBodyFn,
bool NoLoop) {
6124 if (LoopType == WorksharingLoopType::DistributeStaticLoop) {
6125 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
6126 RealArgs.
push_back(ConstantInt::get(Builder.getInt8Ty(), 0));
6127 Builder.restoreIP({InsertBlock, std::prev(InsertBlock->
end())});
6132 M, omp::RuntimeFunction::OMPRTL_omp_get_num_threads);
6133 Builder.restoreIP({InsertBlock, std::prev(InsertBlock->
end())});
6137 Builder.CreateZExtOrTrunc(NumThreads, TripCountTy,
"num.threads.cast"));
6138 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
6139 if (LoopType == WorksharingLoopType::DistributeForStaticLoop) {
6140 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
6141 RealArgs.
push_back(ConstantInt::get(Builder.getInt8Ty(), NoLoop));
6143 RealArgs.
push_back(ConstantInt::get(Builder.getInt8Ty(), 0));
6167 Builder.restoreIP({Preheader, Preheader->
end()});
6170 Builder.CreateBr(CLI->
getExit());
6178 CleanUpInfo.
collectBlocks(RegionBlockSet, BlocksToBeRemoved);
6186 "Expected unique undroppable user of outlined function");
6188 assert(OutlinedFnCallInstruction &&
"Expected outlined function call");
6190 "Expected outlined function call to be located in loop preheader");
6192 if (OutlinedFnCallInstruction->
arg_size() > 1)
6199 LoopBodyArg, TripCount, OutlinedFn, NoLoop);
6201 for (
auto &ToBeDeletedItem : ToBeDeleted)
6202 ToBeDeletedItem->eraseFromParent();
6209 uint32_t SrcLocStrSize;
6213 case WorksharingLoopType::ForStaticLoop:
6214 Flag = OMP_IDENT_FLAG_WORK_LOOP;
6216 case WorksharingLoopType::DistributeStaticLoop:
6217 Flag = OMP_IDENT_FLAG_WORK_DISTRIBUTE;
6219 case WorksharingLoopType::DistributeForStaticLoop:
6220 Flag = OMP_IDENT_FLAG_WORK_DISTRIBUTE | OMP_IDENT_FLAG_WORK_LOOP;
6225 auto OI = std::make_unique<OutlineInfo>();
6230 SmallVector<Instruction *, 4> ToBeDeleted;
6232 OI->OuterAllocBB = AllocaIP.getBlock();
6255 SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet;
6257 OI->collectBlocks(ParallelRegionBlockSet, Blocks);
6259 CodeExtractorAnalysisCache CEAC(*OuterFn);
6260 CodeExtractor Extractor(Blocks,
6274 SetVector<Value *> SinkingCands, HoistingCands;
6278 Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
6285 for (
auto Use :
Users) {
6287 if (ParallelRegionBlockSet.
count(Inst->getParent())) {
6288 Inst->replaceUsesOfWith(CLI->
getIndVar(), NewLoopCntLoad);
6294 OI->ExcludeArgsFromAggregate.push_back(NewLoopCntLoad);
6301 OI->PostOutlineCB = [=, ToBeDeletedVec =
6302 std::move(ToBeDeleted)](
Function &OutlinedFn) {
6312 bool NeedsBarrier, omp::ScheduleKind SchedKind,
Value *ChunkSize,
6313 bool HasSimdModifier,
bool HasMonotonicModifier,
6314 bool HasNonmonotonicModifier,
bool HasOrderedClause,
6316 Value *DistScheduleChunkSize) {
6317 if (
Config.isTargetDevice())
6318 return applyWorkshareLoopTarget(
DL, CLI, AllocaIP, LoopType, NoLoop);
6320 SchedKind, ChunkSize, HasSimdModifier, HasMonotonicModifier,
6321 HasNonmonotonicModifier, HasOrderedClause, DistScheduleChunkSize);
6323 bool IsOrdered = (EffectiveScheduleType & OMPScheduleType::ModifierOrdered) ==
6324 OMPScheduleType::ModifierOrdered;
6326 if (HasDistSchedule) {
6327 DistScheduleSchedType = DistScheduleChunkSize
6328 ? OMPScheduleType::OrderedDistributeChunked
6329 : OMPScheduleType::OrderedDistribute;
6331 switch (EffectiveScheduleType & ~OMPScheduleType::ModifierMask) {
6332 case OMPScheduleType::BaseStatic:
6333 case OMPScheduleType::BaseDistribute:
6334 assert((!ChunkSize || !DistScheduleChunkSize) &&
6335 "No chunk size with static-chunked schedule");
6336 if (IsOrdered && !HasDistSchedule)
6337 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
6338 NeedsBarrier, ChunkSize);
6340 if (DistScheduleChunkSize)
6341 return applyStaticChunkedWorkshareLoop(
6342 DL, CLI, AllocaIP, NeedsBarrier, ChunkSize, EffectiveScheduleType,
6343 DistScheduleChunkSize, DistScheduleSchedType);
6344 return applyStaticWorkshareLoop(
DL, CLI, AllocaIP, LoopType, NeedsBarrier,
6347 case OMPScheduleType::BaseStaticChunked:
6348 case OMPScheduleType::BaseDistributeChunked:
6349 if (IsOrdered && !HasDistSchedule)
6350 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
6351 NeedsBarrier, ChunkSize);
6353 return applyStaticChunkedWorkshareLoop(
6354 DL, CLI, AllocaIP, NeedsBarrier, ChunkSize, EffectiveScheduleType,
6355 DistScheduleChunkSize, DistScheduleSchedType);
6357 case OMPScheduleType::BaseRuntime:
6358 case OMPScheduleType::BaseAuto:
6359 case OMPScheduleType::BaseGreedy:
6360 case OMPScheduleType::BaseBalanced:
6361 case OMPScheduleType::BaseSteal:
6362 case OMPScheduleType::BaseRuntimeSimd:
6364 "schedule type does not support user-defined chunk sizes");
6366 case OMPScheduleType::BaseGuidedSimd:
6367 case OMPScheduleType::BaseDynamicChunked:
6368 case OMPScheduleType::BaseGuidedChunked:
6369 case OMPScheduleType::BaseGuidedIterativeChunked:
6370 case OMPScheduleType::BaseGuidedAnalyticalChunked:
6371 case OMPScheduleType::BaseStaticBalancedChunked:
6372 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
6373 NeedsBarrier, ChunkSize);
6386 unsigned Bitwidth = Ty->getIntegerBitWidth();
6389 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_4u);
6392 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_8u);
6400static FunctionCallee
6402 unsigned Bitwidth = Ty->getIntegerBitWidth();
6405 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_4u);
6408 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_8u);
6415static FunctionCallee
6417 unsigned Bitwidth = Ty->getIntegerBitWidth();
6420 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_4u);
6423 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_8u);
6428OpenMPIRBuilder::applyDynamicWorkshareLoop(
DebugLoc DL, CanonicalLoopInfo *CLI,
6431 bool NeedsBarrier,
Value *Chunk) {
6432 assert(CLI->
isValid() &&
"Requires a valid canonical loop");
6434 "Require dedicated allocate IP");
6436 "Require valid schedule type");
6438 bool Ordered = (SchedType & OMPScheduleType::ModifierOrdered) ==
6439 OMPScheduleType::ModifierOrdered;
6444 uint32_t SrcLocStrSize;
6451 Type *IVTy =
IV->getType();
6456 Builder.SetInsertPoint(AllocaIP.getBlock()->getFirstNonPHIOrDbgOrAlloca());
6458 Value *PLastIter =
Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
6459 Value *PLowerBound =
Builder.CreateAlloca(IVTy,
nullptr,
"p.lowerbound");
6460 Value *PUpperBound =
Builder.CreateAlloca(IVTy,
nullptr,
"p.upperbound");
6461 Value *PStride =
Builder.CreateAlloca(IVTy,
nullptr,
"p.stride");
6470 Constant *One = ConstantInt::get(IVTy, 1);
6471 Builder.CreateStore(One, PLowerBound);
6473 Builder.CreateStore(UpperBound, PUpperBound);
6474 Builder.CreateStore(One, PStride);
6492 ConstantInt::get(I32Type,
static_cast<int>(SchedType));
6504 Builder.SetInsertPoint(OuterCond, OuterCond->getFirstInsertionPt());
6507 {SrcLoc, ThreadNum, PLastIter, PLowerBound, PUpperBound, PStride});
6508 Constant *Zero32 = ConstantInt::get(I32Type, 0);
6511 Builder.CreateSub(
Builder.CreateLoad(IVTy, PLowerBound), One,
"lb");
6512 Builder.CreateCondBr(MoreWork, Header, Exit);
6518 PI->setIncomingBlock(0, OuterCond);
6519 PI->setIncomingValue(0, LowerBound);
6524 Br->setSuccessor(OuterCond);
6530 UpperBound =
Builder.CreateLoad(IVTy, PUpperBound,
"ub");
6533 CI->setOperand(1, UpperBound);
6537 assert(BI->getSuccessor(1) == Exit);
6538 BI->setSuccessor(1, OuterCond);
6552 omp::Directive::OMPD_for,
false,
6555 return BarrierIP.takeError();
6607 assert(
Loops.size() >= 1 &&
"At least one loop required");
6608 size_t NumLoops =
Loops.size();
6612 return Loops.front();
6624 Loop->collectControlBlocks(OldControlBBs);
6628 if (ComputeIP.
isSet())
6635 Value *CollapsedTripCount =
nullptr;
6638 "All loops to collapse must be valid canonical loops");
6639 Value *OrigTripCount = L->getTripCount();
6640 if (!CollapsedTripCount) {
6641 CollapsedTripCount = OrigTripCount;
6646 CollapsedTripCount =
6647 Builder.CreateNUWMul(CollapsedTripCount, OrigTripCount);
6653 OrigPreheader->
getNextNode(), OrigAfter,
"collapsed");
6659 Builder.restoreIP(Result->getBodyIP());
6661 Value *Leftover = Result->getIndVar();
6663 NewIndVars.
resize(NumLoops);
6664 for (
int i = NumLoops - 1; i >= 1; --i) {
6665 Value *OrigTripCount =
Loops[i]->getTripCount();
6667 Value *NewIndVar =
Builder.CreateURem(Leftover, OrigTripCount);
6668 NewIndVars[i] = NewIndVar;
6670 Leftover =
Builder.CreateUDiv(Leftover, OrigTripCount);
6673 NewIndVars[0] = Leftover;
6682 BasicBlock *ContinueBlock = Result->getBody();
6684 auto ContinueWith = [&ContinueBlock, &ContinuePred,
DL](
BasicBlock *Dest,
6691 ContinueBlock =
nullptr;
6692 ContinuePred = NextSrc;
6699 for (
size_t i = 0; i < NumLoops - 1; ++i)
6700 ContinueWith(
Loops[i]->getBody(),
Loops[i + 1]->getHeader());
6706 for (
size_t i = NumLoops - 1; i > 0; --i)
6707 ContinueWith(
Loops[i]->getAfter(),
Loops[i - 1]->getLatch());
6710 ContinueWith(Result->getLatch(),
nullptr);
6717 for (
size_t i = 0; i < NumLoops; ++i)
6718 Loops[i]->getIndVar()->replaceAllUsesWith(NewIndVars[i]);
6732std::vector<CanonicalLoopInfo *>
6736 "Must pass as many tile sizes as there are loops");
6737 int NumLoops =
Loops.size();
6738 assert(NumLoops >= 1 &&
"At least one loop to tile required");
6750 Loop->collectControlBlocks(OldControlBBs);
6758 assert(L->isValid() &&
"All input loops must be valid canonical loops");
6759 OrigTripCounts.
push_back(L->getTripCount());
6770 for (
int i = 0; i < NumLoops - 1; ++i) {
6783 for (
int i = 0; i < NumLoops; ++i) {
6785 Value *OrigTripCount = OrigTripCounts[i];
6798 Value *FloorTripOverflow =
6799 Builder.CreateICmpNE(FloorTripRem, ConstantInt::get(IVType, 0));
6801 FloorTripOverflow =
Builder.CreateZExt(FloorTripOverflow, IVType);
6802 Value *FloorTripCount =
6803 Builder.CreateAdd(FloorCompleteTripCount, FloorTripOverflow,
6804 "omp_floor" +
Twine(i) +
".tripcount",
true);
6807 FloorCompleteCount.
push_back(FloorCompleteTripCount);
6813 std::vector<CanonicalLoopInfo *> Result;
6814 Result.reserve(NumLoops * 2);
6827 auto EmbeddNewLoop =
6828 [
this,
DL,
F, InnerEnter, &Enter, &
Continue, &OutroInsertBefore](
6831 DL, TripCount,
F, InnerEnter, OutroInsertBefore, Name);
6836 Enter = EmbeddedLoop->
getBody();
6838 OutroInsertBefore = EmbeddedLoop->
getLatch();
6839 return EmbeddedLoop;
6843 const Twine &NameBase) {
6846 EmbeddNewLoop(
P.value(), NameBase +
Twine(
P.index()));
6847 Result.push_back(EmbeddedLoop);
6851 EmbeddNewLoops(FloorCount,
"floor");
6857 for (
int i = 0; i < NumLoops; ++i) {
6861 Value *FloorIsEpilogue =
6863 Value *TileTripCount =
6870 EmbeddNewLoops(TileCounts,
"tile");
6875 for (std::pair<BasicBlock *, BasicBlock *>
P : InbetweenCode) {
6884 BodyEnter =
nullptr;
6885 BodyEntered = ExitBB;
6897 Builder.restoreIP(Result.back()->getBodyIP());
6898 for (
int i = 0; i < NumLoops; ++i) {
6901 Value *OrigIndVar = OrigIndVars[i];
6929 if (Properties.
empty())
6952 assert(
Loop->isValid() &&
"Expecting a valid CanonicalLoopInfo");
6956 assert(Latch &&
"A valid CanonicalLoopInfo must have a unique latch");
6964 if (
I.mayReadOrWriteMemory()) {
6968 I.setMetadata(LLVMContext::MD_access_group, AccessGroup);
6982 Loop->collectControlBlocks(oldControlBBs);
6987 assert(L->isValid() &&
"All input loops must be valid canonical loops");
6988 origTripCounts.
push_back(L->getTripCount());
6997 Builder.SetInsertPoint(TCBlock);
6998 Value *fusedTripCount =
nullptr;
7000 assert(L->isValid() &&
"All loops to fuse must be valid canonical loops");
7001 Value *origTripCount = L->getTripCount();
7002 if (!fusedTripCount) {
7003 fusedTripCount = origTripCount;
7006 Value *condTP =
Builder.CreateICmpSGT(fusedTripCount, origTripCount);
7007 fusedTripCount =
Builder.CreateSelect(condTP, fusedTripCount, origTripCount,
7021 for (
size_t i = 0; i <
Loops.size() - 1; ++i) {
7022 Loops[i]->getPreheader()->moveBefore(TCBlock);
7023 Loops[i]->getAfter()->moveBefore(TCBlock);
7027 for (
size_t i = 0; i <
Loops.size() - 1; ++i) {
7039 for (
size_t i = 0; i <
Loops.size(); ++i) {
7041 F->getContext(),
"omp.fused.inner.cond",
F,
Loops[i]->getBody());
7042 Builder.SetInsertPoint(condBlock);
7050 for (
size_t i = 0; i <
Loops.size() - 1; ++i) {
7051 Builder.SetInsertPoint(condBBs[i]);
7052 Builder.CreateCondBr(condValues[i],
Loops[i]->getBody(), condBBs[i + 1]);
7068 "omp.fused.pre_latch");
7101 const Twine &NamePrefix) {
7130 C, NamePrefix +
".if.then",
Cond->getParent(),
Cond->getNextNode());
7132 C, NamePrefix +
".if.else",
Cond->getParent(), CanonicalLoop->
getExit());
7135 Builder.SetInsertPoint(SplitBeforeIt);
7137 Builder.CreateCondBr(IfCond, ThenBlock, ElseBlock);
7140 spliceBB(IP, ThenBlock,
false, Builder.getCurrentDebugLocation());
7143 Builder.SetInsertPoint(ElseBlock);
7149 ExistingBlocks.
reserve(L->getNumBlocks() + 1);
7151 ExistingBlocks.
append(L->block_begin(), L->block_end());
7157 assert(LoopCond && LoopHeader &&
"Invalid loop structure");
7159 if (
Block == L->getLoopPreheader() ||
Block == L->getLoopLatch() ||
7166 if (
Block == ThenBlock)
7167 NewBB->
setName(NamePrefix +
".if.else");
7170 VMap[
Block] = NewBB;
7178 L->getLoopLatch()->splitBasicBlockBefore(
L->getLoopLatch()->begin(),
7179 NamePrefix +
".pre_latch");
7183 L->addBasicBlockToLoop(ThenBlock, LI);
7189 if (TargetTriple.
isX86()) {
7190 if (Features.
lookup(
"avx512f"))
7192 else if (Features.
lookup(
"avx"))
7196 if (TargetTriple.
isPPC())
7198 if (TargetTriple.
isWasm())
7205 Value *IfCond, OrderKind Order,
7215 if (!BB.hasTerminator())
7231 I->eraseFromParent();
7234 if (AlignedVars.
size()) {
7236 for (
auto &AlignedItem : AlignedVars) {
7237 Value *AlignedPtr = AlignedItem.first;
7238 Value *Alignment = AlignedItem.second;
7241 Builder.CreateAlignmentAssumption(
F->getDataLayout(), AlignedPtr,
7249 createIfVersion(CanonicalLoop, IfCond, VMap, LIA, LI, L,
"simd");
7262 Reachable.insert(
Block);
7272 if ((Safelen ==
nullptr) || (Order == OrderKind::OMP_ORDER_concurrent))
7288 Ctx, {
MDString::get(Ctx,
"llvm.loop.vectorize.enable"), BoolConst}));
7290 if (Simdlen || Safelen) {
7294 ConstantInt *VectorizeWidth = Simdlen ==
nullptr ? Safelen : Simdlen;
7320static std::unique_ptr<TargetMachine>
7324 StringRef CPU =
F->getFnAttribute(
"target-cpu").getValueAsString();
7325 StringRef Features =
F->getFnAttribute(
"target-features").getValueAsString();
7336 std::nullopt, OptLevel));
7354 if (!BB.hasTerminator())
7367 [&](
const Function &
F) {
return TM->getTargetTransformInfo(
F); });
7368 FAM.registerPass([&]() {
return TIRA; });
7382 I->eraseFromParent();
7385 assert(L &&
"Expecting CanonicalLoopInfo to be recognized as a loop");
7390 nullptr, ORE,
static_cast<int>(OptLevel),
7411 <<
" Threshold=" << UP.
Threshold <<
"\n"
7414 <<
" PartialOptSizeThreshold="
7434 Ptr = Load->getPointerOperand();
7436 Ptr = Store->getPointerOperand();
7443 if (Alloca->getParent() == &
F->getEntryBlock())
7463 int MaxTripCount = 0;
7464 bool MaxOrZero =
false;
7465 unsigned TripMultiple = 0;
7468 MaxTripCount, MaxOrZero, TripMultiple, UCE, UP, PP);
7469 unsigned Factor = UP.
Count;
7470 LLVM_DEBUG(
dbgs() <<
"Suggesting unroll factor of " << Factor <<
"\n");
7481 assert(Factor >= 0 &&
"Unroll factor must not be negative");
7497 Ctx, {
MDString::get(Ctx,
"llvm.loop.unroll.count"), FactorConst}));
7510 *UnrolledCLI =
Loop;
7515 "unrolling only makes sense with a factor of 2 or larger");
7517 Type *IndVarTy =
Loop->getIndVarType();
7524 std::vector<CanonicalLoopInfo *>
LoopNest =
7539 Ctx, {
MDString::get(Ctx,
"llvm.loop.unroll.count"), FactorConst})});
7542 (*UnrolledCLI)->assertOK();
7560 Value *Args[] = {Ident, ThreadId, BufSize, CpyBuf, CpyFn, DidItLD};
7579 if (!CPVars.
empty()) {
7584 Directive OMPD = Directive::OMPD_single;
7589 Value *Args[] = {Ident, ThreadId};
7598 if (
Error Err = FiniCB(IP))
7619 EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCBWrapper,
7626 for (
size_t I = 0, E = CPVars.
size();
I < E; ++
I)
7629 ConstantInt::get(Int64, 0), CPVars[
I],
7632 }
else if (!IsNowait) {
7635 omp::Directive::OMPD_unknown,
false,
7653 Directive::OMPD_scope,
nullptr,
nullptr,
7654 BodyGenCB, FiniCB,
false,
true,
7662 omp::Directive::OMPD_unknown,
7678 Directive OMPD = Directive::OMPD_critical;
7683 Value *LockVar = getOMPCriticalRegionLock(CriticalName);
7684 Value *Args[] = {Ident, ThreadId, LockVar};
7701 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
7709 const Twine &Name,
bool IsDependSource) {
7713 "OpenMP runtime requires depend vec with i64 type");
7726 for (
unsigned I = 0;
I < NumLoops; ++
I) {
7740 Value *Args[] = {Ident, ThreadId, DependBaseAddrGEP};
7758 Directive OMPD = Directive::OMPD_ordered;
7767 Value *Args[] = {Ident, ThreadId};
7777 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
7784 bool HasFinalize,
bool IsCancellable) {
7791 BasicBlock *EntryBB = Builder.GetInsertBlock();
7800 emitCommonDirectiveEntry(OMPD, EntryCall, ExitBB, Conditional);
7812 "Unexpected control flow graph state!!");
7814 emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize);
7816 return AfterIP.takeError();
7821 "Unexpected Insertion point location!");
7824 auto InsertBB = merged ? ExitPredBB : ExitBB;
7827 Builder.SetInsertPoint(InsertBB);
7829 return Builder.saveIP();
7833 Directive OMPD,
Value *EntryCall, BasicBlock *ExitBB,
bool Conditional) {
7835 if (!Conditional || !EntryCall)
7841 auto *UI =
new UnreachableInst(
Builder.getContext(), ThenBB);
7851 Builder.CreateCondBr(CallBool, ThenBB, ExitBB);
7855 UI->eraseFromParent();
7863 omp::Directive OMPD,
InsertPointTy FinIP, Instruction *ExitCall,
7871 "Unexpected finalization stack state!");
7874 assert(Fi.DK == OMPD &&
"Unexpected Directive for Finalization call!");
7876 if (
Error Err = Fi.mergeFiniBB(
Builder, FinIP.getBlock()))
7877 return std::move(Err);
7881 Builder.SetInsertPoint(FinIP.getBlock()->getTerminator());
7891 return IRBuilder<>::InsertPoint(ExitCall->
getParent(),
7925 "copyin.not.master.end");
7932 Builder.SetInsertPoint(OMP_Entry);
7933 Value *MasterPtr =
Builder.CreatePtrToInt(MasterAddr, IntPtrTy);
7934 Value *PrivatePtr =
Builder.CreatePtrToInt(PrivateAddr, IntPtrTy);
7935 Value *cmp =
Builder.CreateICmpNE(MasterPtr, PrivatePtr);
7936 Builder.CreateCondBr(cmp, CopyBegin, CopyEnd);
7938 Builder.SetInsertPoint(CopyBegin);
7956 Value *Args[] = {ThreadId,
Size, Allocator};
7979 return Builder.CreateCall(Fn, Args, Name);
7993 Value *Args[] = {ThreadId, Addr, Allocator};
8000 const Twine &Name) {
8008 M.getContext(),
M.getDataLayout().getPrefTypeAlign(Int64)));
8014 const Twine &Name) {
8016 Loc,
Builder.getInt64(
M.getDataLayout().getTypeAllocSize(VarType)), Name);
8021 const Twine &Name) {
8027 return Builder.CreateCall(Fn, Args, Name);
8032 const Twine &Name) {
8034 Loc, Addr,
Builder.getInt64(
M.getDataLayout().getTypeAllocSize(VarType)),
8041 Value *DependenceAddress,
bool HaveNowaitClause) {
8049 if (Device ==
nullptr)
8051 Constant *InteropTypeVal = ConstantInt::get(Int32, (
int)InteropType);
8052 if (NumDependences ==
nullptr) {
8053 NumDependences = ConstantInt::get(Int32, 0);
8057 Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause);
8059 Ident, ThreadId, InteropVar, InteropTypeVal,
8060 Device, NumDependences, DependenceAddress, HaveNowaitClauseVal};
8069 Value *NumDependences,
Value *DependenceAddress,
bool HaveNowaitClause) {
8077 if (Device ==
nullptr)
8079 if (NumDependences ==
nullptr) {
8080 NumDependences = ConstantInt::get(Int32, 0);
8084 Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause);
8086 Ident, ThreadId, InteropVar, Device,
8087 NumDependences, DependenceAddress, HaveNowaitClauseVal};
8096 Value *NumDependences,
8097 Value *DependenceAddress,
8098 bool HaveNowaitClause) {
8105 if (Device ==
nullptr)
8107 if (NumDependences ==
nullptr) {
8108 NumDependences = ConstantInt::get(Int32, 0);
8112 Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause);
8114 Ident, ThreadId, InteropVar, Device,
8115 NumDependences, DependenceAddress, HaveNowaitClauseVal};
8145 assert(!Attrs.MaxThreads.empty() && !Attrs.MaxTeams.empty() &&
8146 "expected num_threads and num_teams to be specified");
8166 const std::string DebugPrefix =
"_debug__";
8167 if (KernelName.
ends_with(DebugPrefix)) {
8168 KernelName = KernelName.
drop_back(DebugPrefix.length());
8169 Kernel =
M.getFunction(KernelName);
8175 if (Attrs.MinTeams > 1 || Attrs.MaxTeams.front() > 0)
8180 int32_t MaxThreadsVal = Attrs.MaxThreads.front();
8187 MaxThreadsVal = Attrs.MinThreads;
8191 if (MaxThreadsVal > 0)
8204 omp::RuntimeFunction::OMPRTL___kmpc_target_init);
8207 Twine DynamicEnvironmentName = KernelName +
"_dynamic_environment";
8208 Constant *DynamicEnvironmentInitializer =
8212 DynamicEnvironmentInitializer, DynamicEnvironmentName,
8214 DL.getDefaultGlobalsAddressSpace());
8218 DynamicEnvironmentGV->
getType() == DynamicEnvironmentPtr
8219 ? DynamicEnvironmentGV
8221 DynamicEnvironmentPtr);
8224 ConfigurationEnvironment, {
8225 UseGenericStateMachineVal,
8226 MayUseNestedParallelismVal,
8233 ReductionBufferLength,
8236 KernelEnvironment, {
8237 ConfigurationEnvironmentInitializer,
8241 std::string KernelEnvironmentName =
8242 (KernelName +
"_kernel_environment").str();
8245 KernelEnvironmentInitializer, KernelEnvironmentName,
8247 DL.getDefaultGlobalsAddressSpace());
8251 KernelEnvironmentGV->
getType() == KernelEnvironmentPtr
8252 ? KernelEnvironmentGV
8254 KernelEnvironmentPtr);
8255 Value *KernelLaunchEnvironment =
8258 KernelLaunchEnvironment =
8259 KernelLaunchEnvironment->
getType() == KernelLaunchEnvParamTy
8260 ? KernelLaunchEnvironment
8261 :
Builder.CreateAddrSpaceCast(KernelLaunchEnvironment,
8262 KernelLaunchEnvParamTy);
8264 Fn, {KernelEnvironment, KernelLaunchEnvironment});
8276 auto *UI =
Builder.CreateUnreachable();
8282 Builder.SetInsertPoint(WorkerExitBB);
8286 Builder.SetInsertPoint(CheckBBTI);
8287 Builder.CreateCondBr(ExecUserCode, UI->getParent(), WorkerExitBB);
8289 CheckBBTI->eraseFromParent();
8290 UI->eraseFromParent();
8298 int32_t TeamsReductionDataSize,
8299 int32_t TeamsReductionBufferLength) {
8304 omp::RuntimeFunction::OMPRTL___kmpc_target_deinit);
8308 if (!TeamsReductionBufferLength || !TeamsReductionDataSize)
8314 const std::string DebugPrefix =
"_debug__";
8316 KernelName = KernelName.
drop_back(DebugPrefix.length());
8317 auto *KernelEnvironmentGV =
8318 M.getNamedGlobal((KernelName +
"_kernel_environment").str());
8319 assert(KernelEnvironmentGV &&
"Expected kernel environment global\n");
8320 auto *KernelEnvironmentInitializer = KernelEnvironmentGV->getInitializer();
8322 KernelEnvironmentInitializer,
8323 ConstantInt::get(Int32, TeamsReductionDataSize), {0, 7});
8325 NewInitializer, ConstantInt::get(Int32, TeamsReductionBufferLength),
8327 KernelEnvironmentGV->setInitializer(NewInitializer);
8332 if (
Kernel.hasFnAttribute(Name)) {
8333 int32_t OldLimit =
Kernel.getFnAttributeAsParsedInteger(Name);
8339std::pair<int32_t, int32_t>
8341 int32_t ThreadLimit =
8342 Kernel.getFnAttributeAsParsedInteger(
"omp_target_thread_limit");
8345 const auto &Attr =
Kernel.getFnAttribute(
"amdgpu-flat-work-group-size");
8346 if (!Attr.isValid() || !Attr.isStringAttribute())
8347 return {0, ThreadLimit};
8348 auto [LBStr, UBStr] = Attr.getValueAsString().split(
',');
8351 return {0, ThreadLimit};
8352 UB = ThreadLimit ? std::min(ThreadLimit, UB) : UB;
8360 return {0, ThreadLimit ? std::min(ThreadLimit, UB) : UB};
8362 return {0, ThreadLimit};
8368 Kernel.addFnAttr(
"omp_target_thread_limit", std::to_string(UB));
8371 Kernel.addFnAttr(
"amdgpu-flat-work-group-size",
8379std::pair<int32_t, int32_t>
8382 return {0,
Kernel.getFnAttributeAsParsedInteger(
"omp_target_num_teams")};
8386 int32_t LB, int32_t UB) {
8394 Kernel.addFnAttr(
"omp_target_num_teams", std::to_string(LB));
8397void OpenMPIRBuilder::setOutlinedTargetRegionFunctionAttributes(
8406 else if (
T.isNVPTX())
8408 else if (
T.isSPIRV())
8413Constant *OpenMPIRBuilder::createOutlinedFunctionID(Function *OutlinedFn,
8414 StringRef EntryFnIDName) {
8415 if (
Config.isTargetDevice()) {
8416 assert(OutlinedFn &&
"The outlined function must exist if embedded");
8420 return new GlobalVariable(
8425Constant *OpenMPIRBuilder::createTargetRegionEntryAddr(Function *OutlinedFn,
8426 StringRef EntryFnName) {
8430 assert(!
M.getGlobalVariable(EntryFnName,
true) &&
8431 "Named kernel already exists?");
8432 return new GlobalVariable(
8445 if (
Config.isTargetDevice() || !
Config.openMPOffloadMandatory()) {
8449 OutlinedFn = *CBResult;
8451 OutlinedFn =
nullptr;
8457 if (!IsOffloadEntry)
8460 std::string EntryFnIDName =
8462 ? std::string(EntryFnName)
8466 EntryFnName, EntryFnIDName);
8474 setOutlinedTargetRegionFunctionAttributes(OutlinedFn);
8475 auto OutlinedFnID = createOutlinedFunctionID(OutlinedFn, EntryFnIDName);
8476 auto EntryAddr = createTargetRegionEntryAddr(OutlinedFn, EntryFnName);
8478 EntryInfo, EntryAddr, OutlinedFnID,
8480 return OutlinedFnID;
8498 bool IsStandAlone = !BodyGenCB;
8505 MapInfo = &GenMapInfoCB(
Builder.saveIP());
8507 AllocaIP,
Builder.saveIP(), *MapInfo, Info, CustomMapperCB,
8508 true, DeviceAddrCB))
8515 Value *PointerNum =
Builder.getInt32(Info.NumberOfPtrs);
8525 SrcLocInfo, DeviceID,
8532 assert(MapperFunc &&
"MapperFunc missing for standalone target data");
8536 if (Info.HasNoWait) {
8546 if (Info.HasNoWait) {
8550 emitBlock(OffloadContBlock, CurFn,
true);
8556 bool RequiresOuterTargetTask = Info.HasNoWait;
8557 if (!RequiresOuterTargetTask)
8558 cantFail(TaskBodyCB(
nullptr,
nullptr,
8562 {}, RTArgs, Info.HasNoWait));
8565 omp::OMPRTL___tgt_target_data_begin_mapper);
8569 for (
auto DeviceMap : Info.DevicePtrInfoMap) {
8573 Builder.CreateStore(LI, DeviceMap.second.second);
8610 Value *PointerNum =
Builder.getInt32(Info.NumberOfPtrs);
8619 Value *OffloadingArgs[] = {SrcLocInfo, DeviceID,
8642 return emitIfClause(IfCond, BeginThenGen, BeginElseGen, AllocaIP);
8643 return BeginThenGen(AllocaIP,
Builder.saveIP(), DeallocBlocks);
8658 return emitIfClause(IfCond, EndThenGen, EndElseGen, AllocaIP);
8659 return EndThenGen(AllocaIP,
Builder.saveIP(), DeallocBlocks);
8662 return emitIfClause(IfCond, BeginThenGen, EndElseGen, AllocaIP);
8663 return BeginThenGen(AllocaIP,
Builder.saveIP(), DeallocBlocks);
8674 bool IsGPUDistribute) {
8675 assert((IVSize == 32 || IVSize == 64) &&
8676 "IV size is not compatible with the omp runtime");
8678 if (IsGPUDistribute)
8680 ? (IVSigned ? omp::OMPRTL___kmpc_distribute_static_init_4
8681 : omp::OMPRTL___kmpc_distribute_static_init_4u)
8682 : (IVSigned ? omp::OMPRTL___kmpc_distribute_static_init_8
8683 : omp::OMPRTL___kmpc_distribute_static_init_8u);
8685 Name = IVSize == 32 ? (IVSigned ? omp::OMPRTL___kmpc_for_static_init_4
8686 : omp::OMPRTL___kmpc_for_static_init_4u)
8687 : (IVSigned ? omp::OMPRTL___kmpc_for_static_init_8
8688 : omp::OMPRTL___kmpc_for_static_init_8u);
8695 assert((IVSize == 32 || IVSize == 64) &&
8696 "IV size is not compatible with the omp runtime");
8698 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_init_4
8699 : omp::OMPRTL___kmpc_dispatch_init_4u)
8700 : (IVSigned ? omp::OMPRTL___kmpc_dispatch_init_8
8701 : omp::OMPRTL___kmpc_dispatch_init_8u);
8708 assert((IVSize == 32 || IVSize == 64) &&
8709 "IV size is not compatible with the omp runtime");
8711 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_next_4
8712 : omp::OMPRTL___kmpc_dispatch_next_4u)
8713 : (IVSigned ? omp::OMPRTL___kmpc_dispatch_next_8
8714 : omp::OMPRTL___kmpc_dispatch_next_8u);
8721 assert((IVSize == 32 || IVSize == 64) &&
8722 "IV size is not compatible with the omp runtime");
8724 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_fini_4
8725 : omp::OMPRTL___kmpc_dispatch_fini_4u)
8726 : (IVSigned ? omp::OMPRTL___kmpc_dispatch_fini_8
8727 : omp::OMPRTL___kmpc_dispatch_fini_8u);
8738 DenseMap<
Value *, std::tuple<Value *, unsigned>> &ValueReplacementMap) {
8746 auto GetUpdatedDIVariable = [&](
DILocalVariable *OldVar,
unsigned arg) {
8750 if (NewVar && (arg == NewVar->
getArg()))
8760 auto UpdateDebugRecord = [&](
auto *DR) {
8763 for (
auto Loc : DR->location_ops()) {
8764 auto Iter = ValueReplacementMap.find(
Loc);
8765 if (Iter != ValueReplacementMap.end()) {
8766 DR->replaceVariableLocationOp(
Loc, std::get<0>(Iter->second));
8767 ArgNo = std::get<1>(Iter->second) + 1;
8771 DR->setVariable(GetUpdatedDIVariable(OldVar, ArgNo));
8776 if (DVR->getNumVariableLocationOps() != 1u) {
8777 DVR->setKillLocation();
8780 Value *
Loc = DVR->getVariableLocationOp(0u);
8787 RequiredBB = &DVR->getFunction()->getEntryBlock();
8789 if (RequiredBB && RequiredBB != CurBB) {
8801 "Unexpected debug intrinsic");
8803 UpdateDebugRecord(&DVR);
8804 MoveDebugRecordToCorrectBlock(&DVR);
8807 for (
auto *DVR : DVRsToDelete)
8808 DVR->getMarker()->MarkedInstr->dropOneDbgRecord(DVR);
8812 Module *M = Func->getParent();
8815 DB.createQualifiedType(dwarf::DW_TAG_pointer_type,
nullptr);
8816 unsigned ArgNo = Func->arg_size();
8818 NewSP,
"dyn_ptr", ArgNo, NewSP->
getFile(), 0, VoidPtrTy,
8819 false, DINode::DIFlags::FlagArtificial);
8821 Argument *LastArg = Func->getArg(Func->arg_size() - 1);
8822 DB.insertDeclare(LastArg, Var, DB.createExpression(),
Loc,
8843 for (
auto &Arg : Inputs)
8844 ParameterTypes.
push_back(Arg->getType()->isPointerTy()
8848 for (
auto &Arg : Inputs)
8849 ParameterTypes.
push_back(Arg->getType());
8857 auto BB = Builder.GetInsertBlock();
8858 auto M = BB->getModule();
8869 if (TargetCpuAttr.isStringAttribute())
8870 Func->addFnAttr(TargetCpuAttr);
8872 auto TargetFeaturesAttr = ParentFn->
getFnAttribute(
"target-features");
8873 if (TargetFeaturesAttr.isStringAttribute())
8874 Func->addFnAttr(TargetFeaturesAttr);
8879 OMPBuilder.
emitUsed(
"llvm.compiler.used", {ExecMode});
8890 Builder.SetInsertPoint(EntryBB);
8896 BasicBlock *UserCodeEntryBB = Builder.GetInsertBlock();
8906 splitBB(Builder,
true,
"outlined.body");
8913 Builder.SetInsertPoint(ExitBB);
8920 Builder.CreateRetVoid();
8924 auto AllocaIP = Builder.saveIP();
8929 const auto &ArgRange =
make_range(Func->arg_begin(), Func->arg_end() - 1);
8961 if (Instr->getFunction() == Func)
8962 Instr->replaceUsesOfWith(
Input, InputCopy);
8968 for (
auto InArg :
zip(Inputs, ArgRange)) {
8970 Argument &Arg = std::get<1>(InArg);
8971 Value *InputCopy =
nullptr;
8974 Arg,
Input, InputCopy, AllocaIP, Builder.saveIP(),
8978 Builder.restoreIP(*AfterIP);
8979 ValueReplacementMap[
Input] = std::make_tuple(InputCopy, Arg.
getArgNo());
8999 DeferredReplacement.push_back(std::make_pair(
Input, InputCopy));
9006 ReplaceValue(
Input, InputCopy, Func);
9010 for (
auto Deferred : DeferredReplacement)
9011 ReplaceValue(std::get<0>(Deferred), std::get<1>(Deferred), Func);
9014 ValueReplacementMap);
9022 Value *TaskWithPrivates,
9023 Type *TaskWithPrivatesTy) {
9025 Type *TaskTy = OMPIRBuilder.Task;
9028 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskWithPrivates, 0);
9029 Value *Shareds = TaskT;
9039 if (TaskWithPrivatesTy != TaskTy)
9040 Shareds = Builder.CreateStructGEP(TaskTy, TaskT, 0);
9057 const size_t NumOffloadingArrays,
const int SharedArgsOperandNo) {
9062 assert((!NumOffloadingArrays || PrivatesTy) &&
9063 "PrivatesTy cannot be nullptr when there are offloadingArrays"
9096 Type *TaskPtrTy = OMPBuilder.TaskPtr;
9097 [[maybe_unused]]
Type *TaskTy = OMPBuilder.Task;
9103 ".omp_target_task_proxy_func",
9104 Builder.GetInsertBlock()->getModule());
9105 Value *ThreadId = ProxyFn->getArg(0);
9106 Value *TaskWithPrivates = ProxyFn->getArg(1);
9107 ThreadId->
setName(
"thread.id");
9108 TaskWithPrivates->
setName(
"task");
9110 bool HasShareds = SharedArgsOperandNo > 0;
9111 bool HasOffloadingArrays = NumOffloadingArrays > 0;
9114 Builder.SetInsertPoint(EntryBB);
9120 if (HasOffloadingArrays) {
9121 assert(TaskTy != TaskWithPrivatesTy &&
9122 "If there are offloading arrays to pass to the target"
9123 "TaskTy cannot be the same as TaskWithPrivatesTy");
9126 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskWithPrivates, 1);
9127 for (
unsigned int i = 0; i < NumOffloadingArrays; ++i)
9129 Builder.CreateStructGEP(PrivatesTy, Privates, i));
9133 auto *ArgStructAlloca =
9135 assert(ArgStructAlloca &&
9136 "Unable to find the alloca instruction corresponding to arguments "
9137 "for extracted function");
9139 std::optional<TypeSize> ArgAllocSize =
9141 assert(ArgStructType && ArgAllocSize &&
9142 "Unable to determine size of arguments for extracted function");
9143 uint64_t StructSize = ArgAllocSize->getFixedValue();
9146 Builder.CreateAlloca(ArgStructType,
nullptr,
"structArg");
9148 Value *SharedsSize = Builder.getInt64(StructSize);
9151 OMPBuilder, Builder, TaskWithPrivates, TaskWithPrivatesTy);
9153 Builder.CreateMemCpy(
9154 NewArgStructAlloca, NewArgStructAlloca->
getAlign(), LoadShared,
9156 KernelLaunchArgs.
push_back(NewArgStructAlloca);
9159 Builder.CreateRetVoid();
9165 return GEP->getSourceElementType();
9167 return Alloca->getAllocatedType();
9190 if (OffloadingArraysToPrivatize.
empty())
9191 return OMPIRBuilder.Task;
9194 for (
Value *V : OffloadingArraysToPrivatize) {
9195 assert(V->getType()->isPointerTy() &&
9196 "Expected pointer to array to privatize. Got a non-pointer value "
9199 assert(ArrayTy &&
"ArrayType cannot be nullptr");
9205 "struct.task_with_privates");
9219 EntryFnName, Inputs, CBFunc,
9224 EntryInfo, GenerateOutlinedFunction, IsOffloadEntry, OutlinedFn,
9361 TargetTaskAllocaBB->
begin());
9364 auto OI = std::make_unique<OutlineInfo>();
9365 OI->EntryBB = TargetTaskAllocaBB;
9366 OI->OuterAllocBB = AllocaIP.
getBlock();
9371 Builder, AllocaIP, ToBeDeleted, TargetTaskAllocaIP,
"global.tid",
false));
9374 Builder.restoreIP(TargetTaskBodyIP);
9375 if (
Error Err = TaskBodyCB(DeviceID, RTLoc, TargetTaskAllocaIP))
9393 bool NeedsTargetTask = HasNoWait && DeviceID;
9394 if (NeedsTargetTask) {
9400 OffloadingArraysToPrivatize.
push_back(V);
9401 OI->ExcludeArgsFromAggregate.push_back(V);
9405 OI->PostOutlineCB = [
this, ToBeDeleted, Dependencies, NeedsTargetTask,
9406 DeviceID, OffloadingArraysToPrivatize](
9409 "there must be a single user for the outlined function");
9423 const unsigned int NumStaleCIArgs = StaleCI->
arg_size();
9424 bool HasShareds = NumStaleCIArgs > OffloadingArraysToPrivatize.
size() + 1;
9426 NumStaleCIArgs == (OffloadingArraysToPrivatize.
size() + 2)) &&
9427 "Wrong number of arguments for StaleCI when shareds are present");
9428 int SharedArgOperandNo =
9429 HasShareds ? OffloadingArraysToPrivatize.
size() + 1 : 0;
9435 if (!OffloadingArraysToPrivatize.
empty())
9440 *
this,
Builder, StaleCI, PrivatesTy, TaskWithPrivatesTy,
9441 OffloadingArraysToPrivatize.
size(), SharedArgOperandNo);
9443 LLVM_DEBUG(
dbgs() <<
"Proxy task entry function created: " << *ProxyFn
9446 Builder.SetInsertPoint(StaleCI);
9463 OMPRTL___kmpc_omp_target_task_alloc);
9475 M.getDataLayout().getTypeStoreSize(TaskWithPrivatesTy));
9482 auto *ArgStructAlloca =
9484 assert(ArgStructAlloca &&
9485 "Unable to find the alloca instruction corresponding to arguments "
9486 "for extracted function");
9487 std::optional<TypeSize> ArgAllocSize =
9490 "Unable to determine size of arguments for extracted function");
9491 SharedsSize =
Builder.getInt64(ArgAllocSize->getFixedValue());
9510 TaskSize, SharedsSize,
9513 if (NeedsTargetTask) {
9514 assert(DeviceID &&
"Expected non-empty device ID.");
9524 *
this,
Builder, TaskData, TaskWithPrivatesTy);
9525 Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment,
9528 if (!OffloadingArraysToPrivatize.
empty()) {
9530 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskData, 1);
9531 for (
unsigned int i = 0; i < OffloadingArraysToPrivatize.
size(); ++i) {
9532 Value *PtrToPrivatize = OffloadingArraysToPrivatize[i];
9539 "ElementType should match ArrayType");
9542 Value *Dst =
Builder.CreateStructGEP(PrivatesTy, Privates, i);
9544 Dst, Alignment, PtrToPrivatize, Alignment,
9545 Builder.getInt64(
M.getDataLayout().getTypeStoreSize(ElementType)));
9549 Value *DepArray =
nullptr;
9550 Value *NumDeps =
nullptr;
9553 NumDeps = Dependencies.
NumDeps;
9554 }
else if (!Dependencies.
Deps.empty()) {
9556 NumDeps =
Builder.getInt32(Dependencies.
Deps.size());
9567 if (!NeedsTargetTask) {
9576 ConstantInt::get(
Builder.getInt32Ty(), 0),
9589 }
else if (DepArray) {
9597 {Ident, ThreadID, TaskData, NumDeps, DepArray,
9598 ConstantInt::get(
Builder.getInt32Ty(), 0),
9608 I->eraseFromParent();
9613 << *(
Builder.GetInsertBlock()) <<
"\n");
9615 << *(
Builder.GetInsertBlock()->getParent()->getParent())
9627 CustomMapperCB, IsNonContiguous, DeviceAddrCB))
9650 Builder.restoreIP(IP);
9656 return Builder.saveIP();
9659 bool HasDependencies = !Dependencies.
empty();
9660 bool RequiresOuterTargetTask = HasNoWait || HasDependencies;
9677 if (OutlinedFnID && DeviceID)
9679 EmitTargetCallFallbackCB, KArgs,
9680 DeviceID, RTLoc, TargetTaskAllocaIP);
9688 return EmitTargetCallFallbackCB(OMPBuilder.
Builder.
saveIP());
9695 auto &&EmitTargetCallElse =
9702 if (RequiresOuterTargetTask) {
9709 Dependencies, EmptyRTArgs, HasNoWait);
9711 return EmitTargetCallFallbackCB(Builder.saveIP());
9714 Builder.restoreIP(AfterIP);
9718 auto &&EmitTargetCallThen =
9722 Info.HasNoWait = HasNoWait;
9727 AllocaIP, Builder.saveIP(), Info, RTArgs, MapInfo, CustomMapperCB,
9733 for (
auto [DefaultVal, RuntimeVal] :
9735 NumTeamsC.
push_back(RuntimeVal ? RuntimeVal
9736 : Builder.getInt32(DefaultVal));
9740 auto InitMaxThreadsClause = [&Builder](
Value *
Clause) {
9742 Clause = Builder.CreateIntCast(
Clause, Builder.getInt32Ty(),
9746 auto CombineMaxThreadsClauses = [&Builder](
Value *
Clause,
Value *&Result) {
9749 Result ? Builder.CreateSelect(Builder.CreateICmpULT(Result,
Clause),
9757 Value *MaxThreadsClause =
9759 ? InitMaxThreadsClause(RuntimeAttrs.
MaxThreads)
9762 for (
auto [TeamsVal, TargetVal] :
zip_equal(
9764 Value *TeamsThreadLimitClause = InitMaxThreadsClause(TeamsVal);
9765 Value *NumThreads = InitMaxThreadsClause(TargetVal);
9767 CombineMaxThreadsClauses(TeamsThreadLimitClause, NumThreads);
9768 CombineMaxThreadsClauses(MaxThreadsClause, NumThreads);
9770 NumThreadsC.
push_back(NumThreads ? NumThreads : Builder.getInt32(0));
9773 unsigned NumTargetItems = Info.NumberOfPtrs;
9781 Builder.getInt64Ty(),
9783 : Builder.getInt64(0);
9787 DynCGroupMem = Builder.getInt32(0);
9790 NumTargetItems, RTArgs, TripCount, NumTeamsC, NumThreadsC, DynCGroupMem,
9791 HasNoWait,
false, DynCGroupMemFallback);
9798 if (RequiresOuterTargetTask)
9800 RTLoc, AllocaIP, Dependencies,
9801 KArgs.
RTArgs, Info.HasNoWait);
9804 Builder, OutlinedFnID, EmitTargetCallFallbackCB, KArgs,
9805 RuntimeAttrs.
DeviceID, RTLoc, AllocaIP);
9808 Builder.restoreIP(AfterIP);
9815 if (!OutlinedFnID) {
9816 cantFail(EmitTargetCallElse(AllocaIP, Builder.saveIP(), DeallocBlocks));
9822 cantFail(EmitTargetCallThen(AllocaIP, Builder.saveIP(), DeallocBlocks));
9827 EmitTargetCallElse, AllocaIP));
9840 bool HasNowait,
Value *DynCGroupMem,
9854 *
this,
Builder, IsOffloadEntry, EntryInfo, DefaultAttrs, OutlinedFn,
9855 OutlinedFnID, Inputs, CBFunc, ArgAccessorFuncCB))
9861 if (!
Config.isTargetDevice())
9863 RuntimeAttrs, IfCond, OutlinedFn, OutlinedFnID, Inputs,
9864 GenMapInfoCB, CustomMapperCB, Dependencies, HasNowait,
9865 DynCGroupMem, DynCGroupMemFallback);
9879 return OS.
str().str();
9884 return OpenMPIRBuilder::getNameWithSeparators(Parts,
Config.firstSeparator(),
9890 auto &Elem = *
InternalVars.try_emplace(Name,
nullptr).first;
9892 assert(Elem.second->getValueType() == Ty &&
9893 "OMP internal variable has different type than requested");
9906 :
M.getTargetTriple().isAMDGPU()
9908 :
DL.getDefaultGlobalsAddressSpace();
9917 const llvm::Align PtrAlign =
DL.getPointerABIAlignment(AddressSpaceVal);
9918 GV->setAlignment(std::max(TypeAlign, PtrAlign));
9925Value *OpenMPIRBuilder::getOMPCriticalRegionLock(
StringRef CriticalName) {
9926 std::string Prefix =
Twine(
"gomp_critical_user_", CriticalName).
str();
9927 std::string Name = getNameWithSeparators({Prefix,
"var"},
".",
".");
9938 return SizePtrToInt;
9943 std::string VarName) {
9951 return MaptypesArrayGlobal;
9956 unsigned NumOperands,
9965 ArrI8PtrTy,
nullptr,
".offload_baseptrs");
9969 ArrI64Ty,
nullptr,
".offload_sizes");
9980 int64_t DeviceID,
unsigned NumOperands) {
9986 Value *ArgsBaseGEP =
9988 {Builder.getInt32(0), Builder.getInt32(0)});
9991 {Builder.getInt32(0), Builder.getInt32(0)});
9992 Value *ArgSizesGEP =
9994 {Builder.getInt32(0), Builder.getInt32(0)});
9998 Builder.getInt32(NumOperands),
9999 ArgsBaseGEP, ArgsGEP, ArgSizesGEP,
10000 MaptypesArg, MapnamesArg, NullPtr});
10007 assert((!ForEndCall || Info.separateBeginEndCalls()) &&
10008 "expected region end call to runtime only when end call is separate");
10010 auto VoidPtrTy = UnqualPtrTy;
10011 auto VoidPtrPtrTy = UnqualPtrTy;
10013 auto Int64PtrTy = UnqualPtrTy;
10015 if (!Info.NumberOfPtrs) {
10027 Info.RTArgs.BasePointersArray,
10030 ArrayType::get(VoidPtrTy, Info.NumberOfPtrs), Info.RTArgs.PointersArray,
10034 ArrayType::get(Int64Ty, Info.NumberOfPtrs), Info.RTArgs.SizesArray,
10038 ForEndCall && Info.RTArgs.MapTypesArrayEnd ? Info.RTArgs.MapTypesArrayEnd
10039 : Info.RTArgs.MapTypesArray,
10045 if (!Info.EmitDebug)
10049 ArrayType::get(VoidPtrTy, Info.NumberOfPtrs), Info.RTArgs.MapNamesArray,
10054 if (!Info.HasMapper)
10058 Builder.CreatePointerCast(Info.RTArgs.MappersArray, VoidPtrPtrTy);
10079 "struct.descriptor_dim");
10081 enum { OffsetFD = 0, CountFD, StrideFD };
10085 for (
unsigned I = 0, L = 0, E = NonContigInfo.
Dims.
size();
I < E; ++
I) {
10088 if (NonContigInfo.
Dims[
I] == 1)
10093 Builder.CreateAlloca(ArrayTy,
nullptr,
"dims");
10094 Builder.restoreIP(CodeGenIP);
10095 for (
unsigned II = 0, EE = NonContigInfo.
Dims[
I];
II < EE; ++
II) {
10096 unsigned RevIdx = EE -
II - 1;
10100 Value *OffsetLVal =
Builder.CreateStructGEP(DimTy, DimsLVal, OffsetFD);
10102 NonContigInfo.
Offsets[L][RevIdx], OffsetLVal,
10103 M.getDataLayout().getPrefTypeAlign(OffsetLVal->
getType()));
10105 Value *CountLVal =
Builder.CreateStructGEP(DimTy, DimsLVal, CountFD);
10107 NonContigInfo.
Counts[L][RevIdx], CountLVal,
10108 M.getDataLayout().getPrefTypeAlign(CountLVal->
getType()));
10110 Value *StrideLVal =
Builder.CreateStructGEP(DimTy, DimsLVal, StrideFD);
10112 NonContigInfo.
Strides[L][RevIdx], StrideLVal,
10113 M.getDataLayout().getPrefTypeAlign(CountLVal->
getType()));
10116 Builder.restoreIP(CodeGenIP);
10117 Value *DAddr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
10118 DimsAddr,
Builder.getPtrTy());
10121 Info.RTArgs.PointersArray, 0,
I);
10123 DAddr,
P,
M.getDataLayout().getPrefTypeAlign(
Builder.getPtrTy()));
10128void OpenMPIRBuilder::emitUDMapperArrayInitOrDel(
10132 StringRef Prefix = IsInit ?
".init" :
".del";
10138 Builder.CreateICmpSGT(
Size, Builder.getInt64(1),
"omp.arrayinit.isarray");
10139 Value *DeleteBit = Builder.CreateAnd(
10142 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10143 OpenMPOffloadMappingFlags::OMP_MAP_DELETE)));
10148 Value *BaseIsBegin = Builder.CreateICmpNE(
Base, Begin);
10149 Cond = Builder.CreateOr(IsArray, BaseIsBegin);
10150 DeleteCond = Builder.CreateIsNull(
10155 DeleteCond =
Builder.CreateIsNotNull(
10171 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10172 OpenMPOffloadMappingFlags::OMP_MAP_TO |
10173 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
10174 MapTypeArg =
Builder.CreateOr(
10177 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10178 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT)));
10182 Value *OffloadingArgs[] = {MapperHandle,
Base, Begin,
10183 ArraySize, MapTypeArg, MapName};
10194 bool PreserveMemberOfFlags) {
10210 MapperFn->
addFnAttr(Attribute::NoInline);
10211 MapperFn->
addFnAttr(Attribute::NoUnwind);
10221 auto SavedIP =
Builder.saveIP();
10222 Builder.SetInsertPoint(EntryBB);
10234 TypeSize ElementSize =
M.getDataLayout().getTypeStoreSize(ElemTy);
10236 Value *PtrBegin = BeginIn;
10242 emitUDMapperArrayInitOrDel(MapperFn, MapperHandle, BaseIn, BeginIn,
Size,
10243 MapType, MapName, ElementSize, HeadBB,
10254 Builder.CreateICmpEQ(PtrBegin, PtrEnd,
"omp.arraymap.isempty");
10255 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
10261 Builder.CreatePHI(PtrBegin->
getType(), 2,
"omp.arraymap.ptrcurrent");
10262 PtrPHI->addIncoming(PtrBegin, HeadBB);
10267 return Info.takeError();
10271 Value *OffloadingArgs[] = {MapperHandle};
10275 Value *ShiftedPreviousSize =
10279 for (
unsigned I = 0;
I < Info->BasePointers.size(); ++
I) {
10280 Value *CurBaseArg = Info->BasePointers[
I];
10281 Value *CurBeginArg = Info->Pointers[
I];
10282 Value *CurSizeArg = Info->Sizes[
I];
10283 Value *CurNameArg = Info->Names.size()
10289 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10291 Value *MemberMapType;
10292 if (PreserveMemberOfFlags) {
10294 static_cast<uint64_t>(OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF);
10296 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10298 bool HasMemberOf = (OrigFlags & MemberOfMask) != 0;
10300 MemberMapType =
Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
10302 MemberMapType = OriMapType;
10304 MemberMapType =
Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
10322 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10323 OpenMPOffloadMappingFlags::OMP_MAP_TO |
10324 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
10334 Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
10340 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10341 OpenMPOffloadMappingFlags::OMP_MAP_TO |
10342 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
10348 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10349 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
10350 Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
10356 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10357 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
10363 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10364 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
10365 Builder.CreateCondBr(IsFrom, FromBB, EndBB);
10371 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10372 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
10381 CurMapType->
addIncoming(MemberMapType, ToElseBB);
10383 Value *OffloadingArgs[] = {MapperHandle, CurBaseArg, CurBeginArg,
10384 CurSizeArg, CurMapType, CurNameArg};
10386 auto ChildMapperFn = CustomMapperCB(
I);
10387 if (!ChildMapperFn)
10388 return ChildMapperFn.takeError();
10389 if (*ChildMapperFn) {
10404 Value *PtrNext =
Builder.CreateConstGEP1_32(ElemTy, PtrPHI, 1,
10405 "omp.arraymap.next");
10406 PtrPHI->addIncoming(PtrNext, LastBB);
10407 Value *IsDone =
Builder.CreateICmpEQ(PtrNext, PtrEnd,
"omp.arraymap.isdone");
10409 Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
10414 emitUDMapperArrayInitOrDel(MapperFn, MapperHandle, BaseIn, BeginIn,
Size,
10415 MapType, MapName, ElementSize, DoneBB,
10429 bool IsNonContiguous,
10433 Info.clearArrayInfo();
10436 if (Info.NumberOfPtrs == 0)
10445 Info.RTArgs.BasePointersArray =
Builder.CreateAlloca(
10446 PointerArrayType,
nullptr,
".offload_baseptrs");
10448 Info.RTArgs.PointersArray =
Builder.CreateAlloca(
10449 PointerArrayType,
nullptr,
".offload_ptrs");
10451 PointerArrayType,
nullptr,
".offload_mappers");
10452 Info.RTArgs.MappersArray = MappersArray;
10459 ConstantInt::get(Int64Ty, 0));
10461 for (
unsigned I = 0, E = CombinedInfo.
Sizes.
size();
I < E; ++
I) {
10462 bool IsNonContigEntry =
10464 (
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10466 OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG) != 0);
10469 if (IsNonContigEntry) {
10471 "Index must be in-bounds for NON_CONTIG Dims array");
10473 assert(DimCount > 0 &&
"NON_CONTIG DimCount must be > 0");
10474 ConstSizes[
I] = ConstantInt::get(Int64Ty, DimCount);
10479 ConstSizes[
I] = CI;
10483 RuntimeSizes.
set(
I);
10486 if (RuntimeSizes.
all()) {
10488 Info.RTArgs.SizesArray =
Builder.CreateAlloca(
10489 SizeArrayType,
nullptr,
".offload_sizes");
10495 auto *SizesArrayGbl =
10500 if (!RuntimeSizes.
any()) {
10501 Info.RTArgs.SizesArray = SizesArrayGbl;
10503 unsigned IndexSize =
M.getDataLayout().getIndexSizeInBits(0);
10504 Align OffloadSizeAlign =
M.getDataLayout().getABIIntegerTypeAlignment(64);
10507 SizeArrayType,
nullptr,
".offload_sizes");
10511 Buffer,
M.getDataLayout().getPrefTypeAlign(Buffer->
getType()),
10512 SizesArrayGbl, OffloadSizeAlign,
10517 Info.RTArgs.SizesArray = Buffer;
10525 for (
auto mapFlag : CombinedInfo.
Types)
10527 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10531 Info.RTArgs.MapTypesArray = MapTypesArrayGbl;
10537 Info.RTArgs.MapNamesArray = MapNamesArrayGbl;
10538 Info.EmitDebug =
true;
10540 Info.RTArgs.MapNamesArray =
10542 Info.EmitDebug =
false;
10547 if (Info.separateBeginEndCalls()) {
10548 bool EndMapTypesDiffer =
false;
10550 if (
Type &
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10551 OpenMPOffloadMappingFlags::OMP_MAP_PRESENT)) {
10552 Type &= ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
10553 OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
10554 EndMapTypesDiffer =
true;
10557 if (EndMapTypesDiffer) {
10559 Info.RTArgs.MapTypesArrayEnd = MapTypesArrayGbl;
10564 for (
unsigned I = 0;
I < Info.NumberOfPtrs; ++
I) {
10567 ArrayType::get(PtrTy, Info.NumberOfPtrs), Info.RTArgs.BasePointersArray,
10569 Builder.CreateAlignedStore(BPVal, BP,
10570 M.getDataLayout().getPrefTypeAlign(PtrTy));
10572 if (Info.requiresDevicePointerInfo()) {
10574 CodeGenIP =
Builder.saveIP();
10576 Info.DevicePtrInfoMap[BPVal] = {BP,
Builder.CreateAlloca(PtrTy)};
10577 Builder.restoreIP(CodeGenIP);
10579 DeviceAddrCB(
I, Info.DevicePtrInfoMap[BPVal].second);
10581 Info.DevicePtrInfoMap[BPVal] = {BP, BP};
10583 DeviceAddrCB(
I, BP);
10589 ArrayType::get(PtrTy, Info.NumberOfPtrs), Info.RTArgs.PointersArray, 0,
10592 Builder.CreateAlignedStore(PVal,
P,
10593 M.getDataLayout().getPrefTypeAlign(PtrTy));
10595 if (RuntimeSizes.
test(
I)) {
10597 ArrayType::get(Int64Ty, Info.NumberOfPtrs), Info.RTArgs.SizesArray,
10603 S,
M.getDataLayout().getPrefTypeAlign(PtrTy));
10606 unsigned IndexSize =
M.getDataLayout().getIndexSizeInBits(0);
10609 auto CustomMFunc = CustomMapperCB(
I);
10611 return CustomMFunc.takeError();
10613 MFunc =
Builder.CreatePointerCast(*CustomMFunc, PtrTy);
10616 PointerArrayType, MappersArray,
10619 MFunc, MAddr,
M.getDataLayout().getPrefTypeAlign(MAddr->
getType()));
10623 Info.NumberOfPtrs == 0)
10640 Builder.ClearInsertionPoint();
10671 auto CondConstant = CI->getSExtValue();
10673 return ThenGen(AllocaIP,
Builder.saveIP(), DeallocBlocks);
10675 return ElseGen(AllocaIP,
Builder.saveIP(), DeallocBlocks);
10685 Builder.CreateCondBr(
Cond, ThenBlock, ElseBlock);
10688 if (
Error Err = ThenGen(AllocaIP,
Builder.saveIP(), DeallocBlocks))
10694 if (
Error Err = ElseGen(AllocaIP,
Builder.saveIP(), DeallocBlocks))
10703bool OpenMPIRBuilder::checkAndEmitFlushAfterAtomic(
10707 "Unexpected Atomic Ordering.");
10709 bool Flush =
false;
10771 assert(
X.Var->getType()->isPointerTy() &&
10772 "OMP Atomic expects a pointer to target memory");
10773 Type *XElemTy =
X.ElemTy;
10776 "OMP atomic read expected a scalar type");
10778 Value *XRead =
nullptr;
10782 Builder.CreateLoad(XElemTy,
X.Var,
X.IsVolatile,
"omp.atomic.read");
10791 unsigned LoadSize =
DL.getTypeStoreSize(XElemTy);
10794 OldVal->
getAlign(),
true , AllocaIP,
X.Var);
10796 XRead = AtomicLoadRes.first;
10803 Builder.CreateLoad(IntCastTy,
X.Var,
X.IsVolatile,
"omp.atomic.load");
10806 XRead =
Builder.CreateBitCast(XLoad, XElemTy,
"atomic.flt.cast");
10808 XRead =
Builder.CreateIntToPtr(XLoad, XElemTy,
"atomic.ptr.cast");
10811 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Read);
10812 Builder.CreateStore(XRead, V.Var, V.IsVolatile);
10823 assert(
X.Var->getType()->isPointerTy() &&
10824 "OMP Atomic expects a pointer to target memory");
10825 Type *XElemTy =
X.ElemTy;
10828 "OMP atomic write expected a scalar type");
10836 unsigned LoadSize =
DL.getTypeStoreSize(XElemTy);
10839 OldVal->
getAlign(),
true , AllocaIP,
X.Var);
10847 Builder.CreateBitCast(Expr, IntCastTy,
"atomic.src.int.cast");
10852 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Write);
10859 AtomicUpdateCallbackTy &UpdateOp,
bool IsXBinopExpr,
10860 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
10866 Type *XTy =
X.Var->getType();
10868 "OMP Atomic expects a pointer to target memory");
10869 Type *XElemTy =
X.ElemTy;
10872 "OMP atomic update expected a scalar or struct type");
10875 "OpenMP atomic does not support LT or GT operations");
10879 AllocaIP,
X.Var,
X.ElemTy, Expr, AO, RMWOp, UpdateOp,
X.IsVolatile,
10880 IsXBinopExpr, IsIgnoreDenormalMode, IsFineGrainedMemory, IsRemoteMemory);
10882 return AtomicResult.takeError();
10883 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Update);
10888Value *OpenMPIRBuilder::emitRMWOpAsInstruction(
Value *Src1,
Value *Src2,
10892 return Builder.CreateAdd(Src1, Src2);
10894 return Builder.CreateSub(Src1, Src2);
10896 return Builder.CreateAnd(Src1, Src2);
10898 return Builder.CreateNeg(Builder.CreateAnd(Src1, Src2));
10900 return Builder.CreateOr(Src1, Src2);
10902 return Builder.CreateXor(Src1, Src2);
10941Expected<std::pair<Value *, Value *>> OpenMPIRBuilder::emitAtomicUpdate(
10944 AtomicUpdateCallbackTy &UpdateOp,
bool VolatileX,
bool IsXBinopExpr,
10945 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
10947 bool emitRMWOp =
false;
10955 emitRMWOp = XElemTy;
10958 emitRMWOp = (IsXBinopExpr && XElemTy);
10965 std::pair<Value *, Value *> Res;
10967 AtomicRMWInst *RMWInst =
10968 Builder.CreateAtomicRMW(RMWOp,
X, Expr, llvm::MaybeAlign(), AO);
10969 if (
T.isAMDGPU()) {
10970 if (IsIgnoreDenormalMode)
10971 RMWInst->
setMetadata(
"amdgpu.ignore.denormal.mode",
10973 if (!IsFineGrainedMemory)
10974 RMWInst->
setMetadata(
"amdgpu.no.fine.grained.memory",
10976 if (!IsRemoteMemory)
10980 Res.first = RMWInst;
10985 Res.second = Res.first;
10987 Res.second = emitRMWOpAsInstruction(Res.first, Expr, RMWOp);
10990 Builder.CreateLoad(XElemTy,
X,
X->getName() +
".atomic.load");
10996 OpenMPIRBuilder::AtomicInfo atomicInfo(
10998 OldVal->
getAlign(),
true , AllocaIP,
X);
10999 auto AtomicLoadRes = atomicInfo.EmitAtomicLoadLibcall(AO);
11002 CurBBTI = CurBBTI ? CurBBTI :
Builder.CreateUnreachable();
11009 AllocaInst *NewAtomicAddr =
Builder.CreateAlloca(XElemTy);
11010 NewAtomicAddr->
setName(
X->getName() +
"x.new.val");
11011 Builder.SetInsertPoint(ContBB);
11013 PHI->addIncoming(AtomicLoadRes.first, CurBB);
11015 Expected<Value *> CBResult = UpdateOp(OldExprVal,
Builder);
11018 Value *Upd = *CBResult;
11019 Builder.CreateStore(Upd, NewAtomicAddr);
11022 auto Result = atomicInfo.EmitAtomicCompareExchangeLibcall(
11023 AtomicLoadRes.second, NewAtomicAddr, AO, Failure);
11024 LoadInst *PHILoad =
Builder.CreateLoad(XElemTy,
Result.first);
11025 PHI->addIncoming(PHILoad,
Builder.GetInsertBlock());
11028 Res.first = OldExprVal;
11031 if (UnreachableInst *ExitTI =
11034 Builder.SetInsertPoint(ExitBB);
11036 Builder.SetInsertPoint(ExitTI);
11039 IntegerType *IntCastTy =
11042 Builder.CreateLoad(IntCastTy,
X,
X->getName() +
".atomic.load");
11052 CurBBTI = CurBBTI ? CurBBTI :
Builder.CreateUnreachable();
11059 AllocaInst *NewAtomicAddr =
Builder.CreateAlloca(XElemTy);
11060 NewAtomicAddr->
setName(
X->getName() +
"x.new.val");
11061 Builder.SetInsertPoint(ContBB);
11063 PHI->addIncoming(OldVal, CurBB);
11068 OldExprVal =
Builder.CreateBitCast(
PHI, XElemTy,
11069 X->getName() +
".atomic.fltCast");
11071 OldExprVal =
Builder.CreateIntToPtr(
PHI, XElemTy,
11072 X->getName() +
".atomic.ptrCast");
11076 Expected<Value *> CBResult = UpdateOp(OldExprVal,
Builder);
11079 Value *Upd = *CBResult;
11080 Builder.CreateStore(Upd, NewAtomicAddr);
11081 LoadInst *DesiredVal =
Builder.CreateLoad(IntCastTy, NewAtomicAddr);
11085 X,
PHI, DesiredVal, llvm::MaybeAlign(), AO, Failure);
11086 Result->setVolatile(VolatileX);
11087 Value *PreviousVal =
Builder.CreateExtractValue(Result, 0);
11088 Value *SuccessFailureVal =
Builder.CreateExtractValue(Result, 1);
11089 PHI->addIncoming(PreviousVal,
Builder.GetInsertBlock());
11090 Builder.CreateCondBr(SuccessFailureVal, ExitBB, ContBB);
11092 Res.first = OldExprVal;
11096 if (UnreachableInst *ExitTI =
11099 Builder.SetInsertPoint(ExitBB);
11101 Builder.SetInsertPoint(ExitTI);
11112 bool UpdateExpr,
bool IsPostfixUpdate,
bool IsXBinopExpr,
11113 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
11118 Type *XTy =
X.Var->getType();
11120 "OMP Atomic expects a pointer to target memory");
11121 Type *XElemTy =
X.ElemTy;
11124 "OMP atomic capture expected a scalar or struct type");
11126 "OpenMP atomic does not support LT or GT operations");
11133 AllocaIP,
X.Var,
X.ElemTy, Expr, AO, AtomicOp, UpdateOp,
X.IsVolatile,
11134 IsXBinopExpr, IsIgnoreDenormalMode, IsFineGrainedMemory, IsRemoteMemory);
11137 Value *CapturedVal =
11138 (IsPostfixUpdate ? AtomicResult->first : AtomicResult->second);
11139 Builder.CreateStore(CapturedVal, V.Var, V.IsVolatile);
11141 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Capture);
11153 IsPostfixUpdate, IsFailOnly, Failure);
11165 assert(
X.Var->getType()->isPointerTy() &&
11166 "OMP atomic expects a pointer to target memory");
11169 assert(V.Var->getType()->isPointerTy() &&
"v.var must be of pointer type");
11170 assert(V.ElemTy ==
X.ElemTy &&
"x and v must be of same type");
11173 bool IsInteger = E->getType()->isIntegerTy();
11175 if (
Op == OMPAtomicCompareOp::EQ) {
11178 Value *OldValue =
nullptr;
11179 Value *SuccessOrFail =
nullptr;
11217 X.Var->getName() +
".atomic.load");
11223 Value *EIsNaN =
Builder.CreateFCmpUNO(E, E,
"atomic.e.isnan");
11224 Value *XIsNaN =
Builder.CreateFCmpUNO(XFP, XFP,
"atomic.x.isnan");
11225 Value *EitherNaN =
Builder.CreateOr(EIsNaN, XIsNaN,
"atomic.either.nan");
11230 CurBBTI = CurBBTI ? CurBBTI :
Builder.CreateUnreachable();
11234 M.getContext(),
X.Var->getName() +
".atomic.nan",
F, ExitBB);
11236 M.getContext(),
X.Var->getName() +
".atomic.notnan",
F, ExitBB);
11238 M.getContext(),
X.Var->getName() +
".atomic.zero",
F, ExitBB);
11240 M.getContext(),
X.Var->getName() +
".atomic.normal",
F, ExitBB);
11244 Builder.SetInsertPoint(CurBB);
11245 Builder.CreateCondBr(EitherNaN, NaNBB, NotNaNBB);
11248 Builder.SetInsertPoint(NaNBB);
11252 Builder.SetInsertPoint(NotNaNBB);
11255 X.Var->getName() +
".atomic.xiszero");
11257 "atomic.e.iszero");
11258 Value *BothZero =
Builder.CreateAnd(XIsZero, EIsZero,
"atomic.both.zero");
11259 Builder.CreateCondBr(BothZero, ZeroBB, NormalBB);
11262 Builder.SetInsertPoint(ZeroBB);
11264 X.Var, XCurr, DBCast,
MaybeAlign(), AO, Failure);
11265 Value *OldZero =
Builder.CreateExtractValue(ResZero, 0);
11266 Value *OkZero =
Builder.CreateExtractValue(ResZero, 1);
11270 Builder.SetInsertPoint(NormalBB);
11272 X.Var, EBCast, DBCast,
MaybeAlign(), AO, Failure);
11273 Value *OldNormal =
Builder.CreateExtractValue(ResNormal, 0);
11274 Value *OkNormal =
Builder.CreateExtractValue(ResNormal, 1);
11280 Builder.CreatePHI(IntCastTy, 3,
X.Var->getName() +
".atomic.old");
11285 X.Var->getName() +
".atomic.ok");
11292 Builder.SetInsertPoint(ExitBB);
11297 OldValue =
Builder.CreateBitCast(OldIntPHI,
X.ElemTy,
11298 X.Var->getName() +
".atomic.old.fp");
11299 SuccessOrFail = SuccessPHI;
11307 Result =
Builder.CreateAtomicCmpXchg(
X.Var, EBCast, DBCast,
11315 OldValue =
Builder.CreateExtractValue(Result, 0);
11317 OldValue =
Builder.CreateBitCast(OldValue,
X.ElemTy);
11319 "OldValue and V must be of same type");
11320 if (IsPostfixUpdate) {
11321 Builder.CreateStore(OldValue, V.Var, V.IsVolatile);
11323 SuccessOrFail =
Builder.CreateExtractValue(Result, 1);
11327 CurBBTI = CurBBTI ? CurBBTI :
Builder.CreateUnreachable();
11329 CurBBTI,
X.Var->getName() +
".atomic.exit");
11335 Builder.CreateCondBr(SuccessOrFail, ExitBB, ContBB);
11337 Builder.SetInsertPoint(ContBB);
11338 Builder.CreateStore(OldValue, V.Var);
11344 Builder.SetInsertPoint(ExitBB);
11346 Builder.SetInsertPoint(ExitTI);
11349 Value *CapturedValue =
11350 Builder.CreateSelect(SuccessOrFail, E, OldValue);
11351 Builder.CreateStore(CapturedValue, V.Var, V.IsVolatile);
11357 assert(R.Var->getType()->isPointerTy() &&
11358 "r.var must be of pointer type");
11359 assert(R.ElemTy->isIntegerTy() &&
"r must be of integral type");
11361 Value *SuccessFailureVal =
11362 Builder.CreateExtractValue(Result, 1);
11363 Value *ResultCast =
11364 R.IsSigned ?
Builder.CreateSExt(SuccessFailureVal, R.ElemTy)
11365 :
Builder.CreateZExt(SuccessFailureVal, R.ElemTy);
11366 Builder.CreateStore(ResultCast, R.Var, R.IsVolatile);
11375 "OldValue and V must be of same type");
11376 if (IsPostfixUpdate) {
11377 Builder.CreateStore(OldValue, V.Var, V.IsVolatile);
11382 CurBBTI = CurBBTI ? CurBBTI :
Builder.CreateUnreachable();
11384 CurBBTI,
X.Var->getName() +
".atomic.exit");
11390 Builder.CreateCondBr(SuccessOrFail, ExitBB, ContBB);
11392 Builder.SetInsertPoint(ContBB);
11393 Builder.CreateStore(OldValue, V.Var);
11399 Builder.SetInsertPoint(ExitBB);
11401 Builder.SetInsertPoint(ExitTI);
11404 Value *CapturedValue =
11405 Builder.CreateSelect(SuccessOrFail, E, OldValue);
11406 Builder.CreateStore(CapturedValue, V.Var, V.IsVolatile);
11412 assert(R.Var->getType()->isPointerTy() &&
11413 "r.var must be of pointer type");
11414 assert(R.ElemTy->isIntegerTy() &&
"r must be of integral type");
11416 Value *ResultCast = R.IsSigned
11417 ?
Builder.CreateSExt(SuccessOrFail, R.ElemTy)
11418 :
Builder.CreateZExt(SuccessOrFail, R.ElemTy);
11419 Builder.CreateStore(ResultCast, R.Var, R.IsVolatile);
11423 assert((
Op == OMPAtomicCompareOp::MAX ||
Op == OMPAtomicCompareOp::MIN) &&
11424 "Op should be either max or min at this point");
11425 assert(!IsFailOnly &&
"IsFailOnly is only valid when the comparison is ==");
11436 if (IsXBinopExpr) {
11465 Value *CapturedValue =
nullptr;
11466 if (IsPostfixUpdate) {
11467 CapturedValue = OldValue;
11492 Value *NonAtomicCmp =
Builder.CreateCmp(Pred, OldValue, E);
11493 CapturedValue =
Builder.CreateSelect(NonAtomicCmp, E, OldValue);
11495 Builder.CreateStore(CapturedValue, V.Var, V.IsVolatile);
11499 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Compare);
11519 if (&OuterAllocaBB ==
Builder.GetInsertBlock()) {
11546 bool SubClausesPresent =
11547 (NumTeamsLower || NumTeamsUpper || ThreadLimit || IfExpr);
11549 if (!
Config.isTargetDevice() && SubClausesPresent) {
11550 assert((NumTeamsLower ==
nullptr || NumTeamsUpper !=
nullptr) &&
11551 "if lowerbound is non-null, then upperbound must also be non-null "
11552 "for bounds on num_teams");
11554 if (NumTeamsUpper ==
nullptr)
11555 NumTeamsUpper =
Builder.getInt32(0);
11557 if (NumTeamsLower ==
nullptr)
11558 NumTeamsLower = NumTeamsUpper;
11562 "argument to if clause must be an integer value");
11566 IfExpr =
Builder.CreateICmpNE(IfExpr,
11567 ConstantInt::get(IfExpr->
getType(), 0));
11568 NumTeamsUpper =
Builder.CreateSelect(
11569 IfExpr, NumTeamsUpper,
Builder.getInt32(1),
"numTeamsUpper");
11572 NumTeamsLower =
Builder.CreateSelect(
11573 IfExpr, NumTeamsLower,
Builder.getInt32(1),
"numTeamsLower");
11576 if (ThreadLimit ==
nullptr)
11577 ThreadLimit =
Builder.getInt32(0);
11581 Value *NumTeamsLowerInt32 =
11583 Value *NumTeamsUpperInt32 =
11585 Value *ThreadLimitInt32 =
11592 {Ident, ThreadNum, NumTeamsLowerInt32, NumTeamsUpperInt32,
11593 ThreadLimitInt32});
11598 if (
Error Err = BodyGenCB(AllocaIP, CodeGenIP, ExitBB))
11601 auto OI = std::make_unique<OutlineInfo>();
11602 OI->EntryBB = AllocaBB;
11603 OI->ExitBB = ExitBB;
11604 OI->OuterAllocBB = &OuterAllocaBB;
11610 Builder, OuterAllocaIP, ToBeDeleted, AllocaIP,
"gid",
true));
11612 Builder, OuterAllocaIP, ToBeDeleted, AllocaIP,
"tid",
true));
11614 auto HostPostOutlineCB = [
this, Ident,
11615 ToBeDeleted](
Function &OutlinedFn)
mutable {
11620 "there must be a single user for the outlined function");
11625 "Outlined function must have two or three arguments only");
11627 bool HasShared = OutlinedFn.
arg_size() == 3;
11635 assert(StaleCI &&
"Error while outlining - no CallInst user found for the "
11636 "outlined function.");
11637 Builder.SetInsertPoint(StaleCI);
11644 omp::RuntimeFunction::OMPRTL___kmpc_fork_teams),
11648 I->eraseFromParent();
11651 if (!
Config.isTargetDevice())
11652 OI->PostOutlineCB = HostPostOutlineCB;
11656 Builder.SetInsertPoint(ExitBB);
11669 if (OuterAllocaBB ==
Builder.GetInsertBlock()) {
11684 if (
Error Err = BodyGenCB(AllocaIP, CodeGenIP, ExitBB))
11689 if (
Config.isTargetDevice()) {
11690 auto OI = std::make_unique<OutlineInfo>();
11691 OI->OuterAllocBB = OuterAllocIP.
getBlock();
11692 OI->EntryBB = AllocaBB;
11693 OI->ExitBB = ExitBB;
11694 OI->OuterDeallocBBs.reserve(OuterDeallocBlocks.
size());
11695 copy(OuterDeallocBlocks, OI->OuterDeallocBBs.
end());
11699 Builder.SetInsertPoint(ExitBB);
11706 std::string VarName) {
11715 return MapNamesArrayGlobal;
11720void OpenMPIRBuilder::initializeTypes(
Module &M) {
11724 unsigned ProgramAS = M.getDataLayout().getProgramAddressSpace();
11725#define OMP_TYPE(VarName, InitValue) VarName = InitValue;
11726#define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \
11727 VarName##Ty = ArrayType::get(ElemTy, ArraySize); \
11728 VarName##PtrTy = PointerType::get(Ctx, DefaultTargetAS);
11729#define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \
11730 VarName = FunctionType::get(ReturnType, {__VA_ARGS__}, IsVarArg); \
11731 VarName##Ptr = PointerType::get(Ctx, ProgramAS);
11732#define OMP_STRUCT_TYPE(VarName, StructName, Packed, ...) \
11733 T = StructType::getTypeByName(Ctx, StructName); \
11735 T = StructType::create(Ctx, {__VA_ARGS__}, StructName, Packed); \
11737 VarName##Ptr = PointerType::get(Ctx, DefaultTargetAS);
11738#include "llvm/Frontend/OpenMP/OMPKinds.def"
11749 while (!Worklist.
empty()) {
11753 if (
BlockSet.insert(SuccBB).second)
11758std::unique_ptr<CodeExtractor>
11760 bool ArgsInZeroAddressSpace,
11762 return std::make_unique<CodeExtractor>(
11772 Suffix.
str(), ArgsInZeroAddressSpace);
11775std::unique_ptr<CodeExtractor> DeviceSharedMemOutlineInfo::createCodeExtractor(
11777 return std::make_unique<DeviceSharedMemCodeExtractor>(
11778 OMPBuilder, Blocks,
nullptr,
11786 OuterDeallocBBs.empty()
11789 Suffix.
str(), ArgsInZeroAddressSpace);
11799 Name.empty() ? Addr->
getName() : Name,
Size, Flags, 0);
11811 Fn->
addFnAttr(
"uniform-work-group-size");
11812 Fn->
addFnAttr(Attribute::MustProgress);
11830 auto &&GetMDInt = [
this](
unsigned V) {
11837 NamedMDNode *MD =
M.getOrInsertNamedMetadata(
"omp_offload.info");
11838 auto &&TargetRegionMetadataEmitter =
11839 [&
C, MD, &OrderedEntries, &GetMDInt, &GetMDString](
11854 GetMDInt(E.getKind()), GetMDInt(EntryInfo.DeviceID),
11855 GetMDInt(EntryInfo.FileID), GetMDString(EntryInfo.ParentName),
11856 GetMDInt(EntryInfo.Line), GetMDInt(EntryInfo.Count),
11857 GetMDInt(E.getOrder())};
11860 OrderedEntries[E.getOrder()] = std::make_pair(&E, EntryInfo);
11869 auto &&DeviceGlobalVarMetadataEmitter =
11870 [&
C, &OrderedEntries, &GetMDInt, &GetMDString, MD](
11880 Metadata *
Ops[] = {GetMDInt(E.getKind()), GetMDString(MangledName),
11881 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
11885 OrderedEntries[E.getOrder()] = std::make_pair(&E, varInfo);
11892 DeviceGlobalVarMetadataEmitter);
11894 for (
const auto &E : OrderedEntries) {
11895 assert(E.first &&
"All ordered entries must exist!");
11896 if (
const auto *CE =
11899 if (!CE->getID() || !CE->getAddress()) {
11903 if (!
M.getNamedValue(FnName))
11911 }
else if (
const auto *CE =
dyn_cast<
11920 if (
Config.isTargetDevice() &&
Config.hasRequiresUnifiedSharedMemory())
11922 if (!CE->getAddress()) {
11927 if (CE->getVarSize() == 0)
11931 assert(((
Config.isTargetDevice() && !CE->getAddress()) ||
11932 (!
Config.isTargetDevice() && CE->getAddress())) &&
11933 "Declaret target link address is set.");
11934 if (
Config.isTargetDevice())
11936 if (!CE->getAddress()) {
11943 if (!CE->getAddress()) {
11956 if ((
GV->hasLocalLinkage() ||
GV->hasHiddenVisibility()) &&
11960 OMPTargetGlobalVarEntryIndirectVTable))
11969 Flags, CE->getLinkage(), CE->getVarName());
11972 Flags, CE->getLinkage());
11983 if (
Config.hasRequiresFlags() && !
Config.isTargetDevice())
11989 Config.getRequiresFlags());
11999 OS <<
"_" <<
Count;
12004 unsigned NewCount = getTargetRegionEntryInfoCount(EntryInfo);
12007 EntryInfo.
Line, NewCount);
12015 auto FileIDInfo = CallBack();
12019 FileID =
Status->getUniqueID().getFile();
12023 FileID =
hash_value(std::get<0>(FileIDInfo));
12027 std::get<1>(FileIDInfo));
12033 static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
12035 !(Remain & 1); Remain = Remain >> 1)
12053 if (
static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
12055 static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
12062 if (
static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
12068 Flags &=
~omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
12069 Flags |= MemberOfFlag;
12075 bool IsDeclaration,
bool IsExternallyVisible,
12077 std::vector<GlobalVariable *> &GeneratedRefs,
bool OpenMPSIMD,
12078 std::vector<Triple> TargetTriple,
Type *LlvmPtrTy,
12079 std::function<
Constant *()> GlobalInitializer,
12090 Config.hasRequiresUnifiedSharedMemory())) {
12095 if (!IsExternallyVisible)
12097 OS <<
"_decl_tgt_ref_ptr";
12100 Value *Ptr =
M.getNamedValue(PtrName);
12109 if (!
Config.isTargetDevice()) {
12110 if (GlobalInitializer)
12111 GV->setInitializer(GlobalInitializer());
12117 CaptureClause, DeviceClause, IsDeclaration, IsExternallyVisible,
12118 EntryInfo, MangledName, GeneratedRefs, OpenMPSIMD, TargetTriple,
12119 GlobalInitializer, VariableLinkage, LlvmPtrTy,
cast<Constant>(Ptr));
12131 bool IsDeclaration,
bool IsExternallyVisible,
12133 std::vector<GlobalVariable *> &GeneratedRefs,
bool OpenMPSIMD,
12134 std::vector<Triple> TargetTriple,
12135 std::function<
Constant *()> GlobalInitializer,
12139 (TargetTriple.empty() && !
Config.isTargetDevice()))
12150 !
Config.hasRequiresUnifiedSharedMemory()) {
12152 VarName = MangledName;
12155 if (!IsDeclaration)
12157 M.getDataLayout().getTypeSizeInBits(LlvmVal->
getValueType()), 8);
12160 Linkage = (VariableLinkage) ? VariableLinkage() : LlvmVal->
getLinkage();
12164 if (
Config.isTargetDevice() &&
12173 if (!
M.getNamedValue(RefName)) {
12177 GvAddrRef->setConstant(
true);
12179 GvAddrRef->setInitializer(Addr);
12180 GeneratedRefs.push_back(GvAddrRef);
12189 if (
Config.isTargetDevice()) {
12190 VarName = (Addr) ? Addr->
getName() :
"";
12194 CaptureClause, DeviceClause, IsDeclaration, IsExternallyVisible,
12195 EntryInfo, MangledName, GeneratedRefs, OpenMPSIMD, TargetTriple,
12196 LlvmPtrTy, GlobalInitializer, VariableLinkage);
12197 VarName = (Addr) ? Addr->
getName() :
"";
12199 VarSize =
M.getDataLayout().getPointerSize();
12218 auto &&GetMDInt = [MN](
unsigned Idx) {
12223 auto &&GetMDString = [MN](
unsigned Idx) {
12225 return V->getString();
12228 switch (GetMDInt(0)) {
12232 case OffloadEntriesInfoManager::OffloadEntryInfo::
12233 OffloadingEntryInfoTargetRegion: {
12243 case OffloadEntriesInfoManager::OffloadEntryInfo::
12244 OffloadingEntryInfoDeviceGlobalVar:
12257 if (HostFilePath.
empty())
12261 if (std::error_code Err = Buf.getError()) {
12263 "OpenMPIRBuilder: " +
12271 if (std::error_code Err =
M.getError()) {
12273 (
"error parsing host file inside of OpenMPIRBuilder: " + Err.message())
12287 "expected a valid insertion block for creating an iterator loop");
12297 Builder.getCurrentDebugLocation(),
"omp.it.cont");
12309 T->eraseFromParent();
12318 if (!BodyBr || BodyBr->getSuccessor() != CLI->
getLatch()) {
12320 "iterator bodygen must terminate the canonical body with an "
12321 "unconditional branch to the loop latch",
12345 for (
const auto &
ParamAttr : ParamAttrs) {
12388 return std::string(Out.
str());
12396 unsigned VecRegSize;
12398 ISADataTy ISAData[] = {
12417 for (
char Mask :
Masked) {
12418 for (
const ISADataTy &
Data : ISAData) {
12421 Out <<
"_ZGV" <<
Data.ISA << Mask;
12423 assert(NumElts &&
"Non-zero simdlen/cdtsize expected");
12437template <
typename T>
12440 StringRef MangledName,
bool OutputBecomesInput,
12444 Out << Prefix << ISA << LMask << VLEN;
12445 if (OutputBecomesInput)
12447 Out << ParSeq <<
'_' << MangledName;
12456 bool OutputBecomesInput,
12461 OutputBecomesInput, Fn);
12463 OutputBecomesInput, Fn);
12467 OutputBecomesInput, Fn);
12469 OutputBecomesInput, Fn);
12473 OutputBecomesInput, Fn);
12475 OutputBecomesInput, Fn);
12480 OutputBecomesInput, Fn);
12491 char ISA,
unsigned NarrowestDataSize,
bool OutputBecomesInput) {
12492 assert((ISA ==
'n' || ISA ==
's') &&
"Expected ISA either 's' or 'n'.");
12504 OutputBecomesInput, Fn);
12511 OutputBecomesInput, Fn);
12513 OutputBecomesInput, Fn);
12517 OutputBecomesInput, Fn);
12521 OutputBecomesInput, Fn);
12530 OutputBecomesInput, Fn);
12537 MangledName, OutputBecomesInput, Fn);
12539 MangledName, OutputBecomesInput, Fn);
12543 MangledName, OutputBecomesInput, Fn);
12547 MangledName, OutputBecomesInput, Fn);
12557 return OffloadEntriesTargetRegion.empty() &&
12558 OffloadEntriesDeviceGlobalVar.empty();
12561unsigned OffloadEntriesInfoManager::getTargetRegionEntryInfoCount(
12563 auto It = OffloadEntriesTargetRegionCount.find(
12564 getTargetRegionEntryCountKey(EntryInfo));
12565 if (It == OffloadEntriesTargetRegionCount.end())
12570void OffloadEntriesInfoManager::incrementTargetRegionEntryInfoCount(
12572 OffloadEntriesTargetRegionCount[getTargetRegionEntryCountKey(EntryInfo)] =
12573 EntryInfo.
Count + 1;
12579 OffloadEntriesTargetRegion[EntryInfo] =
12582 ++OffloadingEntriesNum;
12588 assert(EntryInfo.
Count == 0 &&
"expected default EntryInfo");
12591 EntryInfo.
Count = getTargetRegionEntryInfoCount(EntryInfo);
12595 if (OMPBuilder->Config.isTargetDevice()) {
12600 auto &Entry = OffloadEntriesTargetRegion[EntryInfo];
12601 Entry.setAddress(Addr);
12603 Entry.setFlags(Flags);
12609 "Target region entry already registered!");
12611 OffloadEntriesTargetRegion[EntryInfo] = Entry;
12612 ++OffloadingEntriesNum;
12614 incrementTargetRegionEntryInfoCount(EntryInfo);
12621 EntryInfo.
Count = getTargetRegionEntryInfoCount(EntryInfo);
12623 auto It = OffloadEntriesTargetRegion.find(EntryInfo);
12624 if (It == OffloadEntriesTargetRegion.end()) {
12628 if (!IgnoreAddressId && (It->second.getAddress() || It->second.getID()))
12636 for (
const auto &It : OffloadEntriesTargetRegion) {
12637 Action(It.first, It.second);
12643 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
12644 ++OffloadingEntriesNum;
12650 if (OMPBuilder->Config.isTargetDevice()) {
12654 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
12656 if (Entry.getVarSize() == 0) {
12657 Entry.setVarSize(VarSize);
12658 Entry.setLinkage(Linkage);
12662 Entry.setVarSize(VarSize);
12663 Entry.setLinkage(Linkage);
12664 Entry.setAddress(Addr);
12667 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
12668 assert(Entry.isValid() && Entry.getFlags() == Flags &&
12669 "Entry not initialized!");
12670 if (Entry.getVarSize() == 0) {
12671 Entry.setVarSize(VarSize);
12672 Entry.setLinkage(Linkage);
12679 OffloadEntriesDeviceGlobalVar.try_emplace(VarName, OffloadingEntriesNum,
12680 Addr, VarSize, Flags, Linkage,
12683 OffloadEntriesDeviceGlobalVar.try_emplace(
12684 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage,
"");
12685 ++OffloadingEntriesNum;
12692 for (
const auto &E : OffloadEntriesDeviceGlobalVar)
12693 Action(E.getKey(), E.getValue());
12700void CanonicalLoopInfo::collectControlBlocks(
12707 BBs.
append({getPreheader(), Header,
Cond, Latch, Exit, getAfter()});
12719void CanonicalLoopInfo::setTripCount(
Value *TripCount) {
12731void CanonicalLoopInfo::mapIndVar(
12741 for (
Use &U : OldIV->
uses()) {
12745 if (
User->getParent() == getCond())
12747 if (
User->getParent() == getLatch())
12753 Value *NewIV = Updater(OldIV);
12756 for (Use *U : ReplacableUses)
12777 "Preheader must terminate with unconditional branch");
12779 "Preheader must jump to header");
12783 "Header must terminate with unconditional branch");
12784 assert(Header->getSingleSuccessor() == Cond &&
12785 "Header must jump to exiting block");
12788 assert(Cond->getSinglePredecessor() == Header &&
12789 "Exiting block only reachable from header");
12792 "Exiting block must terminate with conditional branch");
12794 "Exiting block's first successor jump to the body");
12796 "Exiting block's second successor must exit the loop");
12800 "Body only reachable from exiting block");
12805 "Latch must terminate with unconditional branch");
12806 assert(Latch->getSingleSuccessor() == Header &&
"Latch must jump to header");
12809 assert(Latch->getSinglePredecessor() !=
nullptr);
12814 "Exit block must terminate with unconditional branch");
12815 assert(Exit->getSingleSuccessor() == After &&
12816 "Exit block must jump to after block");
12820 "After block only reachable from exit block");
12824 assert(IndVar &&
"Canonical induction variable not found?");
12826 "Induction variable must be an integer");
12828 "Induction variable must be a PHI in the loop header");
12834 auto *NextIndVar =
cast<PHINode>(IndVar)->getIncomingValue(1);
12842 assert(TripCount &&
"Loop trip count not found?");
12844 "Trip count and induction variable must have the same type");
12848 "Exit condition must be a signed less-than comparison");
12850 "Exit condition must compare the induction variable");
12852 "Exit condition must compare with the trip count");
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Expand Atomic instructions
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This header defines various interfaces for pass management in LLVM.
iv Induction Variable Users
static Value * getOpcode(Value &V, Type &Ty, InstrumentationConfig &IConf, InstrumentorIRBuilderTy &IIRB)
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
static cl::opt< unsigned > TileSize("fuse-matrix-tile-size", cl::init(4), cl::Hidden, cl::desc("Tile size for matrix instruction fusion using square-shaped tiles."))
uint64_t IntrinsicInst * II
#define OMP_KERNEL_ARG_VERSION
Provides definitions for Target specific Grid Values.
static Value * removeASCastIfPresent(Value *V)
static void createTargetLoopWorkshareCall(OpenMPIRBuilder *OMPBuilder, WorksharingLoopType LoopType, BasicBlock *InsertBlock, Value *Ident, Value *LoopBodyArg, Value *TripCount, Function &LoopBodyFn, bool NoLoop)
Value * createFakeIntVal(IRBuilderBase &Builder, OpenMPIRBuilder::InsertPointTy OuterAllocaIP, llvm::SmallVectorImpl< Instruction * > &ToBeDeleted, OpenMPIRBuilder::InsertPointTy InnerAllocaIP, const Twine &Name="", bool AsPtr=true, bool Is64Bit=false)
static Function * createTargetParallelWrapper(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn)
Create wrapper function used to gather the outlined function's argument structure from a shared buffe...
static void redirectTo(BasicBlock *Source, BasicBlock *Target, DebugLoc DL)
Make Source branch to Target.
static FunctionCallee getKmpcDistForStaticInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
static void applyParallelAccessesMetadata(CanonicalLoopInfo *CLI, LLVMContext &Ctx, Loop *Loop, LoopInfo &LoopInfo, SmallVector< Metadata * > &LoopMDList)
static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, char ISA, StringRef ParSeq, StringRef MangledName, bool OutputBecomesInput, llvm::Function *Fn)
static FunctionCallee getKmpcForDynamicFiniForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for finalizing the dynamic loop using depending on type.
static Expected< Function * > createOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, StringRef FuncName, SmallVectorImpl< Value * > &Inputs, OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc, OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB)
static void FixupDebugInfoForOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, Function *Func, DenseMap< Value *, std::tuple< Value *, unsigned > > &ValueReplacementMap)
static OMPScheduleType getOpenMPOrderingScheduleType(OMPScheduleType BaseScheduleType, bool HasOrderedClause)
Adds ordering modifier flags to schedule type.
static OMPScheduleType getOpenMPMonotonicityScheduleType(OMPScheduleType ScheduleType, bool HasSimdModifier, bool HasMonotonic, bool HasNonmonotonic, bool HasOrderedClause)
Adds monotonicity modifier flags to schedule type.
static std::string mangleVectorParameters(ArrayRef< llvm::OpenMPIRBuilder::DeclareSimdAttrTy > ParamAttrs)
Mangle the parameter part of the vector function name according to their OpenMP classification.
static bool isGenericKernel(Function &Fn)
static void workshareLoopTargetCallback(OpenMPIRBuilder *OMPIRBuilder, CanonicalLoopInfo *CLI, Value *Ident, Function &OutlinedFn, const SmallVector< Instruction *, 4 > &ToBeDeleted, WorksharingLoopType LoopType, bool NoLoop)
static bool isValidWorkshareLoopScheduleType(OMPScheduleType SchedType)
static llvm::CallInst * emitNoUnwindRuntimeCall(IRBuilder<> &Builder, llvm::FunctionCallee Callee, ArrayRef< llvm::Value * > Args, const llvm::Twine &Name)
static Error populateReductionFunction(Function *ReductionFunc, ArrayRef< OpenMPIRBuilder::ReductionInfo > ReductionInfos, IRBuilder<> &Builder, ArrayRef< bool > IsByRef, bool IsGPU)
static Function * getFreshReductionFunc(Module &M)
static void raiseUserConstantDataAllocasToEntryBlock(IRBuilderBase &Builder, Function *Function)
static FunctionCallee getKmpcForDynamicNextForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for updating the next loop using OpenMP dynamic scheduling depending...
static bool isConflictIP(IRBuilder<>::InsertPoint IP1, IRBuilder<>::InsertPoint IP2)
Return whether IP1 and IP2 are ambiguous, i.e.
static void checkReductionInfos(ArrayRef< OpenMPIRBuilder::ReductionInfo > ReductionInfos, bool IsGPU)
static Type * getOffloadingArrayType(Value *V)
static OMPScheduleType getOpenMPBaseScheduleType(llvm::omp::ScheduleKind ClauseKind, bool HasChunks, bool HasSimdModifier, bool HasDistScheduleChunks)
Determine which scheduling algorithm to use, determined from schedule clause arguments.
static OMPScheduleType computeOpenMPScheduleType(ScheduleKind ClauseKind, bool HasChunks, bool HasSimdModifier, bool HasMonotonicModifier, bool HasNonmonotonicModifier, bool HasOrderedClause, bool HasDistScheduleChunks)
Determine the schedule type using schedule and ordering clause arguments.
static FunctionCallee getKmpcForDynamicInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for initializing loop bounds using OpenMP dynamic scheduling dependi...
static std::optional< omp::OMPTgtExecModeFlags > getTargetKernelExecMode(Function &Kernel)
Given a function, if it represents the entry point of a target kernel, this returns the execution mod...
static StructType * createTaskWithPrivatesTy(OpenMPIRBuilder &OMPIRBuilder, ArrayRef< Value * > OffloadingArraysToPrivatize)
static cl::opt< double > UnrollThresholdFactor("openmp-ir-builder-unroll-threshold-factor", cl::Hidden, cl::desc("Factor for the unroll threshold to account for code " "simplifications still taking place"), cl::init(1.5))
static cl::opt< bool > UseDefaultMaxThreads("openmp-ir-builder-use-default-max-threads", cl::Hidden, cl::desc("Use a default max threads if none is provided."), cl::init(true))
static int32_t computeHeuristicUnrollFactor(CanonicalLoopInfo *CLI)
Heuristically determine the best-performant unroll factor for CLI.
static void emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, OpenMPIRBuilder::InsertPointTy AllocaIP, ArrayRef< BasicBlock * > DeallocBlocks, OpenMPIRBuilder::TargetDataInfo &Info, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, const OpenMPIRBuilder::TargetKernelRuntimeAttrs &RuntimeAttrs, Value *IfCond, Function *OutlinedFn, Constant *OutlinedFnID, SmallVectorImpl< Value * > &Args, OpenMPIRBuilder::GenMapInfoCallbackTy GenMapInfoCB, OpenMPIRBuilder::CustomMapperCallbackTy CustomMapperCB, const OpenMPIRBuilder::DependenciesInfo &Dependencies, bool HasNoWait, Value *DynCGroupMem, OMPDynGroupprivateFallbackType DynCGroupMemFallback)
static Value * emitTaskDependencies(OpenMPIRBuilder &OMPBuilder, const SmallVectorImpl< OpenMPIRBuilder::DependData > &Dependencies)
static Error emitTargetOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, bool IsOffloadEntry, TargetRegionEntryInfo &EntryInfo, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, Function *&OutlinedFn, Constant *&OutlinedFnID, SmallVectorImpl< Value * > &Inputs, OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc, OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB)
static void updateNVPTXAttr(Function &Kernel, StringRef Name, int32_t Value, bool Min)
static OpenMPIRBuilder::InsertPointTy getInsertPointAfterInstr(Instruction *I)
static void redirectAllPredecessorsTo(BasicBlock *OldTarget, BasicBlock *NewTarget, DebugLoc DL)
Redirect all edges that branch to OldTarget to NewTarget.
static void hoistNonEntryAllocasToEntryBlock(llvm::BasicBlock &Block)
static std::unique_ptr< TargetMachine > createTargetMachine(Function *F, CodeGenOptLevel OptLevel)
Create the TargetMachine object to query the backend for optimization preferences.
static FunctionCallee getKmpcForStaticInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
static void addAccessGroupMetadata(BasicBlock *Block, MDNode *AccessGroup, LoopInfo &LI)
Attach llvm.access.group metadata to the memref instructions of Block.
static void addBasicBlockMetadata(BasicBlock *BB, ArrayRef< Metadata * > Properties)
Attach metadata Properties to the basic block described by BB.
static void restoreIPandDebugLoc(llvm::IRBuilderBase &Builder, llvm::IRBuilderBase::InsertPoint IP)
This is wrapper over IRBuilderBase::restoreIP that also restores the current debug location to the la...
static LoadInst * loadSharedDataFromTaskDescriptor(OpenMPIRBuilder &OMPIRBuilder, IRBuilderBase &Builder, Value *TaskWithPrivates, Type *TaskWithPrivatesTy)
Given a task descriptor, TaskWithPrivates, return the pointer to the block of pointers containing sha...
static cl::opt< bool > OptimisticAttributes("openmp-ir-builder-optimistic-attributes", cl::Hidden, cl::desc("Use optimistic attributes describing " "'as-if' properties of runtime calls."), cl::init(false))
static bool hasGridValue(const Triple &T)
static FunctionCallee getKmpcForStaticLoopForType(Type *Ty, OpenMPIRBuilder *OMPBuilder, WorksharingLoopType LoopType)
static const omp::GV & getGridValue(const Triple &T, Function *Kernel)
static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, StringRef Prefix, char ISA, StringRef ParSeq, StringRef MangledName, bool OutputBecomesInput, llvm::Function *Fn)
static Function * emitTargetTaskProxyFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, CallInst *StaleCI, StructType *PrivatesTy, StructType *TaskWithPrivatesTy, const size_t NumOffloadingArrays, const int SharedArgsOperandNo)
Create an entry point for a target task with the following.
static void addLoopMetadata(CanonicalLoopInfo *Loop, ArrayRef< Metadata * > Properties)
Attach loop metadata Properties to the loop described by Loop.
static AtomicOrdering TransformReleaseAcquireRelease(AtomicOrdering AO)
static void removeUnusedBlocksFromParent(ArrayRef< BasicBlock * > BBs)
static void targetParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn, Function *OuterFn, BasicBlock *OuterAllocaBB, Value *Ident, Value *IfCondition, Value *NumThreads, Instruction *PrivTID, AllocaInst *PrivTIDAddr, Value *ThreadID, const SmallVector< Instruction *, 4 > &ToBeDeleted)
static void hostParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn, Function *OuterFn, Value *Ident, Value *IfCondition, Instruction *PrivTID, AllocaInst *PrivTIDAddr, const SmallVector< Instruction *, 4 > &ToBeDeleted)
FunctionAnalysisManager FAM
This file defines the Pass Instrumentation classes that provide instrumentation points into the pass ...
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
std::unordered_set< BasicBlock * > BlockSet
This file implements the SmallBitVector class.
This file defines the SmallSet class.
static SymbolRef::Type getType(const Symbol *Sym)
Defines the virtual file system interface vfs::FileSystem.
static cl::opt< unsigned > MaxThreads("xcore-max-threads", cl::Optional, cl::desc("Maximum number of threads (for emulation thread-local storage)"), cl::Hidden, cl::value_desc("number"), cl::init(8))
static const uint32_t IV[8]
Class for arbitrary precision integers.
An arbitrary precision integer that knows its signedness.
static APSInt getUnsigned(uint64_t X)
This class represents a conversion between pointers from one address space to another.
an instruction to allocate memory on the stack
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
PointerType * getType() const
Overload to return most specific pointer type.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
unsigned getAddressSpace() const
Return the address space for the allocation.
LLVM_ABI std::optional< TypeSize > getAllocationSize(const DataLayout &DL) const
Get allocation size in bytes.
LLVM_ABI bool isArrayAllocation() const
Return true if there is an allocation size parameter to the allocation instruction that is not 1.
void setAlignment(Align Align)
const Value * getArraySize() const
Get the number of elements allocated.
bool registerPass(PassBuilderT &&PassBuilder)
Register an analysis pass with the manager.
This class represents an incoming formal argument to a Function.
unsigned getArgNo() const
Return the index of this formal argument in its containing function.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
Get the array size.
bool empty() const
Check if the array is empty.
Class to represent array types.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
A function analysis which provides an AssumptionCache.
LLVM_ABI AssumptionCache run(Function &F, FunctionAnalysisManager &)
A cache of @llvm.assume calls within a function.
An instruction that atomically checks whether a specified value is in a memory location,...
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
LLVM_ABI std::pair< LoadInst *, AllocaInst * > EmitAtomicLoadLibcall(AtomicOrdering AO)
LLVM_ABI void EmitAtomicStoreLibcall(AtomicOrdering AO, Value *Source)
an instruction that atomically reads a memory location, combines it with another value,...
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ USubCond
Subtract only if no unsigned overflow.
@ FMinimum
*p = minimum(old, v) minimum matches the behavior of llvm.minimum.
@ Min
*p = old <signed v ? old : v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ FMaximum
*p = maximum(old, v) maximum matches the behavior of llvm.maximum.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ UMax
*p = old >unsigned v ? old : v
@ FMaximumNum
*p = maximumnum(old, v) maximumnum matches the behavior of llvm.maximumnum.
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
@ FMinimumNum
*p = minimumnum(old, v) minimumnum matches the behavior of llvm.minimumnum.
This class holds the attributes for a particular argument, parameter, function, or return value.
LLVM_ABI AttributeSet addAttributes(LLVMContext &C, AttributeSet AS) const
Add attributes to the attribute set.
LLVM_ABI AttributeSet addAttribute(LLVMContext &C, Attribute::AttrKind Kind) const
Add an argument attribute.
static LLVM_ABI Attribute getWithAlignment(LLVMContext &Context, Align Alignment)
Return a uniquified Attribute object that has the specific alignment set.
LLVM Basic Block Representation.
LLVM_ABI void replaceSuccessorsPhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block's successors to refer to basic block New instead of basic bl...
iterator begin()
Instruction iterator methods.
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="")
Split the basic block into two basic blocks at the specified instruction.
const Function * getParent() const
Return the enclosing method, or null if none.
reverse_iterator rbegin()
bool hasTerminator() const LLVM_READONLY
Returns whether the block has a terminator.
const Instruction & back() const
LLVM_ABI BasicBlock * splitBasicBlockBefore(iterator I, const Twine &BBName="")
Split the basic block into two basic blocks at the specified instruction and insert the new basic blo...
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
LLVM_ABI void insertDbgRecordBefore(DbgRecord *DR, InstListType::iterator Here)
Insert a DbgRecord into a block at the position given by Here.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
LLVM_ABI InstListType::const_iterator getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
LLVM_ABI const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Instruction & front() const
InstListType::reverse_iterator reverse_iterator
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
const Instruction * getTerminatorOrNull() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
LLVM_ABI SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
InstListType::iterator iterator
Instruction iterators...
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
LLVM_ABI void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Value * getArgOperand(unsigned i) const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
Class to represented the control flow structure of an OpenMP canonical loop.
Value * getTripCount() const
Returns the llvm::Value containing the number of loop iterations.
BasicBlock * getHeader() const
The header is the entry for each iteration.
LLVM_ABI void assertOK() const
Consistency self-check.
Type * getIndVarType() const
Return the type of the induction variable (and the trip count).
BasicBlock * getBody() const
The body block is the single entry for a loop iteration and not controlled by CanonicalLoopInfo.
bool isValid() const
Returns whether this object currently represents the IR of a loop.
void setLastIter(Value *IterVar)
Sets the last iteration variable for this loop.
OpenMPIRBuilder::InsertPointTy getAfterIP() const
Return the insertion point for user code after the loop.
OpenMPIRBuilder::InsertPointTy getBodyIP() const
Return the insertion point for user code in the body.
BasicBlock * getAfter() const
The after block is intended for clean-up code such as lifetime end markers.
Function * getFunction() const
LLVM_ABI void invalidate()
Invalidate this loop.
BasicBlock * getLatch() const
Reaching the latch indicates the end of the loop body code.
OpenMPIRBuilder::InsertPointTy getPreheaderIP() const
Return the insertion point for user code before the loop.
BasicBlock * getCond() const
The condition block computes whether there is another loop iteration.
BasicBlock * getExit() const
Reaching the exit indicates no more iterations are being executed.
LLVM_ABI BasicBlock * getPreheader() const
The preheader ensures that there is only a single edge entering the loop.
Instruction * getIndVar() const
Returns the instruction representing the current logical induction variable.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ ICMP_ULT
unsigned less than
@ ICMP_ULE
unsigned less or equal
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
static LLVM_ABI Constant * getString(LLVMContext &Context, StringRef Initializer, bool AddNull=true, bool ByteString=false)
This method constructs a CDS and initializes it with a text string.
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getTruncOrBitCast(Constant *C, Type *Ty)
static LLVM_ABI Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
static LLVM_ABI Constant * getSizeOf(Type *Ty)
getSizeOf constant expr - computes the (alloc) size of a type (in address-units, not bits) in a targe...
static LLVM_ABI Constant * getAddrSpaceCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI ConstantFP * getZero(Type *Ty, bool Negative=false)
This is the shared class of boolean and integer constants.
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
This is an important base class in LLVM.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
DILocalScope * getScope() const
Get the local scope for this variable.
DINodeArray getAnnotations() const
Subprogram description. Uses SubclassData1.
uint32_t getAlignInBits() const
StringRef getName() const
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Record of a variable value-assignment, aka a non instruction representation of the dbg....
Analysis pass which computes a DominatorTree.
LLVM_ABI DominatorTree run(Function &F, FunctionAnalysisManager &)
Run the analysis pass over a function and produce a dominator tree.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Represents either an error or a value T.
Lightweight error class with error context and mandatory checking.
static ErrorSuccess success()
Create a success value.
Tagged union holding either a T or a Error.
Error takeError()
Take ownership of the stored error.
reference get()
Returns a reference to the stored T value.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Class to represent function types.
Type * getParamType(unsigned i) const
Parameter type accessors.
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
void addFnAttr(Attribute::AttrKind Kind)
Add function attributes to this function.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
const BasicBlock & getEntryBlock() const
FunctionType * getFunctionType() const
Returns the FunctionType for me.
void removeFromParent()
removeFromParent - This method unlinks 'this' from the containing module, but does not delete it.
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
AttributeList getAttributes() const
Return the attribute list for this Function.
const Function & getFunction() const
void setAttributes(AttributeList Attrs)
Set the attribute list for this Function.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
adds the attribute to the list of attributes for the given arg.
Function::iterator insert(Function::iterator Position, BasicBlock *BB)
Insert BB in the basic block list at Position.
Type * getReturnType() const
Returns the type of the ret val.
void setCallingConv(CallingConv::ID CC)
Argument * getArg(unsigned i) const
bool hasMetadata() const
Return true if this GlobalObject has any metadata attached to it.
LLVM_ABI void addMetadata(unsigned KindID, MDNode &MD)
Add a metadata attachment.
LinkageTypes getLinkage() const
void setLinkage(LinkageTypes LT)
Module * getParent()
Get the module that this global value is contained inside of...
void setDSOLocal(bool Local)
PointerType * getType() const
Global values are always pointers.
@ HiddenVisibility
The GV is hidden.
@ ProtectedVisibility
The GV is protected.
void setVisibility(VisibilityTypes V)
LinkageTypes
An enumeration for the kinds of linkage for global values.
@ PrivateLinkage
Like Internal, but omit from symbol table.
@ CommonLinkage
Tentative definitions.
@ InternalLinkage
Rename collisions when linking (static functions).
@ WeakODRLinkage
Same, but only replaced by something equivalent.
@ WeakAnyLinkage
Keep one copy of named function when linking (weak)
@ AppendingLinkage
Special purpose, only applies to global arrays.
@ LinkOnceODRLinkage
Same, but only replaced by something equivalent.
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
InsertPoint - A saved insertion point.
BasicBlock * getBlock() const
bool isSet() const
Returns true if this insert point is set.
BasicBlock::iterator getPoint() const
Common base class shared among various IRBuilders.
InsertPoint saveIP() const
Returns the current insert point.
void restoreIP(InsertPoint IP)
Sets the current insert point to a previously-saved location.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
LLVM_ABI const DebugLoc & getStableDebugLoc() const
Fetch the debug location for this node, unless this is a debug intrinsic, in which case fetch the deb...
LLVM_ABI void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI void moveBeforePreserving(InstListType::iterator MovePos)
Perform a moveBefore operation, while signalling that the caller intends to preserve the original ord...
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void insertAfter(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately after the specified instruction.
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
Align getAlign() const
Return the alignment of the access that is being performed.
Analysis pass that exposes the LoopInfo for a function.
LLVM_ABI LoopInfo run(Function &F, FunctionAnalysisManager &AM)
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
This class represents a loop nest and can be used to query its properties.
Represents a single loop in the control flow graph.
LLVM_ABI MDNode * createCallbackEncoding(unsigned CalleeArgNo, ArrayRef< int > Arguments, bool VarArgsArePassed)
Return metadata describing a callback (see llvm::AbstractCallSite).
LLVM_ABI void replaceOperandWith(unsigned I, Metadata *New)
Replace a specific operand.
static MDTuple * getDistinct(LLVMContext &Context, ArrayRef< Metadata * > MDs)
ArrayRef< MDOperand > operands() const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
This class implements a map that also provides access to all stored values in a deterministic order.
A Module instance is used to store all the information related to an LLVM module.
LLVMContext & getContext() const
Get the global data context.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
iterator_range< op_iterator > operands()
LLVM_ABI void addOperand(MDNode *M)
Device global variable entries info.
Target region entries info.
Base class of the entries info.
Class that manages information about offload code regions and data.
function_ref< void(StringRef, const OffloadEntryInfoDeviceGlobalVar &)> OffloadDeviceGlobalVarEntryInfoActTy
Applies action Action on all registered entries.
OMPTargetDeviceClauseKind
Kind of device clause for declare target variables and functions NOTE: Currently not used as a part o...
@ OMPTargetDeviceClauseAny
The target is marked for all devices.
LLVM_ABI void registerDeviceGlobalVarEntryInfo(StringRef VarName, Constant *Addr, int64_t VarSize, OMPTargetGlobalVarEntryKind Flags, GlobalValue::LinkageTypes Linkage)
Register device global variable entry.
LLVM_ABI void initializeDeviceGlobalVarEntryInfo(StringRef Name, OMPTargetGlobalVarEntryKind Flags, unsigned Order)
Initialize device global variable entry.
LLVM_ABI void actOnDeviceGlobalVarEntriesInfo(const OffloadDeviceGlobalVarEntryInfoActTy &Action)
OMPTargetRegionEntryKind
Kind of the target registry entry.
@ OMPTargetRegionEntryTargetRegion
Mark the entry as target region.
LLVM_ABI void getTargetRegionEntryFnName(SmallVectorImpl< char > &Name, const TargetRegionEntryInfo &EntryInfo)
LLVM_ABI bool hasTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, bool IgnoreAddressId=false) const
Return true if a target region entry with the provided information exists.
LLVM_ABI void registerTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, Constant *Addr, Constant *ID, OMPTargetRegionEntryKind Flags)
Register target region entry.
LLVM_ABI void actOnTargetRegionEntriesInfo(const OffloadTargetRegionEntryInfoActTy &Action)
LLVM_ABI void initializeTargetRegionEntryInfo(const TargetRegionEntryInfo &EntryInfo, unsigned Order)
Initialize target region entry.
OMPTargetGlobalVarEntryKind
Kind of the global variable entry..
@ OMPTargetGlobalVarEntryEnter
Mark the entry as a declare target enter.
@ OMPTargetGlobalRegisterRequires
Mark the entry as a register requires global.
@ OMPTargetGlobalVarEntryIndirect
Mark the entry as a declare target indirect global.
@ OMPTargetGlobalVarEntryLink
Mark the entry as a to declare target link.
@ OMPTargetGlobalVarEntryTo
Mark the entry as a to declare target.
@ OMPTargetGlobalVarEntryIndirectVTable
Mark the entry as a declare target indirect vtable.
function_ref< void(const TargetRegionEntryInfo &EntryInfo, const OffloadEntryInfoTargetRegion &)> OffloadTargetRegionEntryInfoActTy
brief Applies action Action on all registered entries.
bool hasDeviceGlobalVarEntryInfo(StringRef VarName) const
Checks if the variable with the given name has been registered already.
LLVM_ABI bool empty() const
Return true if a there are no entries defined.
std::optional< bool > IsTargetDevice
Flag to define whether to generate code for the role of the OpenMP host (if set to false) or device (...
std::optional< bool > IsGPU
Flag for specifying if the compilation is done for an accelerator.
LLVM_ABI int64_t getRequiresFlags() const
Returns requires directive clauses as flags compatible with those expected by libomptarget.
std::optional< bool > OpenMPOffloadMandatory
Flag for specifying if offloading is mandatory.
LLVM_ABI void setHasRequiresReverseOffload(bool Value)
LLVM_ABI OpenMPIRBuilderConfig()
LLVM_ABI bool hasRequiresUnifiedSharedMemory() const
LLVM_ABI void setHasRequiresUnifiedSharedMemory(bool Value)
unsigned getDefaultTargetAS() const
LLVM_ABI bool hasRequiresDynamicAllocators() const
LLVM_ABI void setHasRequiresUnifiedAddress(bool Value)
bool isTargetDevice() const
LLVM_ABI void setHasRequiresDynamicAllocators(bool Value)
LLVM_ABI bool hasRequiresReverseOffload() const
bool hasRequiresFlags() const
LLVM_ABI bool hasRequiresUnifiedAddress() const
Struct that keeps the information that should be kept throughout a 'target data' region.
An interface to create LLVM-IR for OpenMP directives.
LLVM_ABI InsertPointOrErrorTy createOrderedThreadsSimd(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool IsThreads)
Generator for 'omp ordered [threads | simd]'.
LLVM_ABI void emitAArch64DeclareSimdFunction(llvm::Function *Fn, unsigned VLENVal, llvm::ArrayRef< DeclareSimdAttrTy > ParamAttrs, DeclareSimdBranch Branch, char ISA, unsigned NarrowestDataSize, bool OutputBecomesInput)
Emit AArch64 vector-function ABI attributes for a declare simd function.
LLVM_ABI Constant * getOrCreateIdent(Constant *SrcLocStr, uint32_t SrcLocStrSize, omp::IdentFlag Flags=omp::IdentFlag(0), unsigned Reserve2Flags=0)
Return an ident_t* encoding the source location SrcLocStr and Flags.
LLVM_ABI FunctionCallee getOrCreateRuntimeFunction(Module &M, omp::RuntimeFunction FnID)
Return the function declaration for the runtime function with FnID.
LLVM_ABI InsertPointOrErrorTy createCancel(const LocationDescription &Loc, Value *IfCondition, omp::Directive CanceledDirective)
Generator for 'omp cancel'.
std::function< Expected< Function * >(StringRef FunctionName)> FunctionGenCallback
Functions used to generate a function with the given name.
LLVM_ABI CallInst * createOMPAllocShared(const LocationDescription &Loc, Value *Size, const Twine &Name=Twine(""))
Create a runtime call for kmpc_alloc_shared.
ReductionGenCBKind
Enum class for the RedctionGen CallBack type to be used.
LLVM_ABI CanonicalLoopInfo * collapseLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops, InsertPointTy ComputeIP)
Collapse a loop nest into a single loop.
LLVM_ABI void createTaskyield(const LocationDescription &Loc)
Generator for 'omp taskyield'.
std::function< Error(InsertPointTy CodeGenIP)> FinalizeCallbackTy
Callback type for variable finalization (think destructors).
LLVM_ABI void emitBranch(BasicBlock *Target)
LLVM_ABI Error emitCancelationCheckImpl(Value *CancelFlag, omp::Directive CanceledDirective)
Generate control flow and cleanup for cancellation.
static LLVM_ABI void writeThreadBoundsForKernel(const Triple &T, Function &Kernel, int32_t LB, int32_t UB)
LLVM_ABI void emitTaskwaitImpl(const LocationDescription &Loc)
Generate a taskwait runtime call.
LLVM_ABI Constant * registerTargetRegionFunction(TargetRegionEntryInfo &EntryInfo, Function *OutlinedFunction, StringRef EntryFnName, StringRef EntryFnIDName)
Registers the given function and sets up the attribtues of the function Returns the FunctionID.
LLVM_ABI GlobalVariable * emitKernelExecutionMode(StringRef KernelName, omp::OMPTgtExecModeFlags Mode)
Emit the kernel execution mode.
LLVM_ABI void initialize()
Initialize the internal state, this will put structures types and potentially other helpers into the ...
LLVM_ABI void createTargetDeinit(const LocationDescription &Loc, int32_t TeamsReductionDataSize=0, int32_t TeamsReductionBufferLength=1024)
Create a runtime call for kmpc_target_deinit.
LLVM_ABI InsertPointTy createAtomicWrite(const LocationDescription &Loc, AtomicOpValue &X, Value *Expr, AtomicOrdering AO, InsertPointTy AllocaIP)
Emit atomic write for : X = Expr — Only Scalar data types.
LLVM_ABI void loadOffloadInfoMetadata(Module &M)
Loads all the offload entries information from the host IR metadata.
function_ref< MapInfosTy &(InsertPointTy CodeGenIP)> GenMapInfoCallbackTy
Callback type for creating the map infos for the kernel parameters.
LLVM_ABI Error emitOffloadingArrays(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo, TargetDataInfo &Info, CustomMapperCallbackTy CustomMapperCB, bool IsNonContiguous=false, function_ref< void(unsigned int, Value *)> DeviceAddrCB=nullptr)
Emit the arrays used to pass the captures and map information to the offloading runtime library.
LLVM_ABI void unrollLoopFull(DebugLoc DL, CanonicalLoopInfo *Loop)
Fully unroll a loop.
function_ref< Error(InsertPointTy CodeGenIP, Value *IndVar)> LoopBodyGenCallbackTy
Callback type for loop body code generation.
LLVM_ABI InsertPointOrErrorTy emitScanReduction(const LocationDescription &Loc, ArrayRef< llvm::OpenMPIRBuilder::ReductionInfo > ReductionInfos, ScanInfo *ScanRedInfo)
This function performs the scan reduction of the values updated in the input phase.
LLVM_ABI void emitFlush(const LocationDescription &Loc)
Generate a flush runtime call.
LLVM_ABI InsertPointOrErrorTy createScope(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool IsNowait)
Generator for 'omp scope'.
static LLVM_ABI std::pair< int32_t, int32_t > readThreadBoundsForKernel(const Triple &T, Function &Kernel)
}
OpenMPIRBuilderConfig Config
The OpenMPIRBuilder Configuration.
LLVM_ABI CallInst * createOMPInteropDestroy(const LocationDescription &Loc, Value *InteropVar, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_destroy.
LLVM_ABI void emitUsed(StringRef Name, ArrayRef< llvm::WeakTrackingVH > List)
Emit the llvm.used metadata.
LLVM_ABI InsertPointOrErrorTy createSingle(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool IsNowait, ArrayRef< llvm::Value * > CPVars={}, ArrayRef< llvm::Function * > CPFuncs={})
Generator for 'omp single'.
LLVM_ABI InsertPointOrErrorTy createTeams(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, Value *NumTeamsLower=nullptr, Value *NumTeamsUpper=nullptr, Value *ThreadLimit=nullptr, Value *IfExpr=nullptr)
Generator for #omp teams
std::forward_list< CanonicalLoopInfo > LoopInfos
Collection of owned canonical loop objects that eventually need to be free'd.
LLVM_ABI void createTaskwait(const LocationDescription &Loc)
Generator for 'omp taskwait'.
LLVM_ABI llvm::StructType * getKmpTaskAffinityInfoTy()
Return the LLVM struct type matching runtime kmp_task_affinity_info_t.
LLVM_ABI CanonicalLoopInfo * createLoopSkeleton(DebugLoc DL, Value *TripCount, Function *F, BasicBlock *PreInsertBefore, BasicBlock *PostInsertBefore, const Twine &Name={})
Create the control flow structure of a canonical OpenMP loop.
LLVM_ABI std::string createPlatformSpecificName(ArrayRef< StringRef > Parts) const
Get the create a name using the platform specific separators.
LLVM_ABI FunctionCallee createDispatchNextFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_next_* runtime function for the specified size IVSize and sign IVSigned.
static LLVM_ABI void getKernelArgsVector(TargetKernelArgs &KernelArgs, IRBuilderBase &Builder, SmallVector< Value * > &ArgsVector)
Create the kernel args vector used by emitTargetKernel.
LLVM_ABI InsertPointOrErrorTy createTarget(const LocationDescription &Loc, bool IsOffloadEntry, OpenMPIRBuilder::InsertPointTy AllocaIP, OpenMPIRBuilder::InsertPointTy CodeGenIP, ArrayRef< BasicBlock * > DeallocBlocks, TargetDataInfo &Info, TargetRegionEntryInfo &EntryInfo, const TargetKernelDefaultAttrs &DefaultAttrs, const TargetKernelRuntimeAttrs &RuntimeAttrs, Value *IfCond, SmallVectorImpl< Value * > &Inputs, GenMapInfoCallbackTy GenMapInfoCB, TargetBodyGenCallbackTy BodyGenCB, TargetGenArgAccessorsCallbackTy ArgAccessorFuncCB, CustomMapperCallbackTy CustomMapperCB, const DependenciesInfo &Dependencies={}, bool HasNowait=false, Value *DynCGroupMem=nullptr, omp::OMPDynGroupprivateFallbackType DynCGroupMemFallback=omp::OMPDynGroupprivateFallbackType::Abort)
Generator for 'omp target'.
LLVM_ABI void unrollLoopHeuristic(DebugLoc DL, CanonicalLoopInfo *Loop)
Fully or partially unroll a loop.
LLVM_ABI omp::OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position)
Get OMP_MAP_MEMBER_OF flag with extra bits reserved based on the position given.
LLVM_ABI void addAttributes(omp::RuntimeFunction FnID, Function &Fn)
Add attributes known for FnID to Fn.
Module & M
The underlying LLVM-IR module.
StringMap< Constant * > SrcLocStrMap
Map to remember source location strings.
LLVM_ABI void createMapperAllocas(const LocationDescription &Loc, InsertPointTy AllocaIP, unsigned NumOperands, struct MapperAllocas &MapperAllocas)
Create the allocas instruction used in call to mapper functions.
LLVM_ABI Constant * getOrCreateSrcLocStr(StringRef LocStr, uint32_t &SrcLocStrSize)
Return the (LLVM-IR) string describing the source location LocStr.
LLVM_ABI Error emitTargetRegionFunction(TargetRegionEntryInfo &EntryInfo, FunctionGenCallback &GenerateFunctionCallback, bool IsOffloadEntry, Function *&OutlinedFn, Constant *&OutlinedFnID)
Create a unique name for the entry function using the source location information of the current targ...
LLVM_ABI InsertPointOrErrorTy createIteratorLoop(LocationDescription Loc, llvm::Value *TripCount, IteratorBodyGenTy BodyGen, llvm::StringRef Name="iterator")
Create a canonical iterator loop at the current insertion point.
LLVM_ABI Expected< SmallVector< llvm::CanonicalLoopInfo * > > createCanonicalScanLoops(const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop, InsertPointTy ComputeIP, const Twine &Name, ScanInfo *ScanRedInfo)
Generator for the control flow structure of an OpenMP canonical loops if the parent directive has an ...
LLVM_ABI FunctionCallee createDispatchFiniFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_fini_* runtime function for the specified size IVSize and sign IVSigned.
function_ref< InsertPointOrErrorTy( InsertPointTy AllocaIP, InsertPointTy CodeGenIP, ArrayRef< BasicBlock * > DeallocBlocks)> TargetBodyGenCallbackTy
LLVM_ABI void unrollLoopPartial(DebugLoc DL, CanonicalLoopInfo *Loop, int32_t Factor, CanonicalLoopInfo **UnrolledCLI)
Partially unroll a loop.
function_ref< Error(Value *DeviceID, Value *RTLoc, IRBuilderBase::InsertPoint TargetTaskAllocaIP)> TargetTaskBodyCallbackTy
Callback type for generating the bodies of device directives that require outer target tasks (e....
Expected< MapInfosTy & > MapInfosOrErrorTy
bool HandleFPNegZero
Emit atomic compare for constructs: — Only scalar data types cond-expr-stmt: x = x ordop expr ?
LLVM_ABI void emitTaskyieldImpl(const LocationDescription &Loc)
Generate a taskyield runtime call.
LLVM_ABI void emitMapperCall(const LocationDescription &Loc, Function *MapperFunc, Value *SrcLocInfo, Value *MaptypesArg, Value *MapnamesArg, struct MapperAllocas &MapperAllocas, int64_t DeviceID, unsigned NumOperands)
Create the call for the target mapper function.
LLVM_ABI InsertPointOrErrorTy createDistribute(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< BasicBlock * > DeallocBlocks, BodyGenCallbackTy BodyGenCB)
Generator for #omp distribute
LLVM_ABI Expected< Function * > emitUserDefinedMapper(function_ref< MapInfosOrErrorTy(InsertPointTy CodeGenIP, llvm::Value *PtrPHI, llvm::Value *BeginArg)> PrivAndGenMapInfoCB, llvm::Type *ElemTy, StringRef FuncName, CustomMapperCallbackTy CustomMapperCB, bool PreserveMemberOfFlags=false)
Emit the user-defined mapper function.
LLVM_ABI InsertPointOrErrorTy createTask(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< BasicBlock * > DeallocBlocks, BodyGenCallbackTy BodyGenCB, bool Tied=true, Value *Final=nullptr, Value *IfCondition=nullptr, const DependenciesInfo &Dependencies={}, const AffinityData &Affinities={}, bool Mergeable=false, Value *EventHandle=nullptr, Value *Priority=nullptr)
Generator for #omp taskloop
function_ref< Expected< Function * >(unsigned int)> CustomMapperCallbackTy
LLVM_ABI InsertPointTy createAtomicCompare(const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V, AtomicOpValue &R, Value *E, Value *D, AtomicOrdering AO, omp::OMPAtomicCompareOp Op, bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly)
LLVM_ABI InsertPointTy createOrderedDepend(const LocationDescription &Loc, InsertPointTy AllocaIP, unsigned NumLoops, ArrayRef< llvm::Value * > StoreValues, const Twine &Name, bool IsDependSource)
Generator for 'omp ordered depend (source | sink)'.
LLVM_ABI InsertPointTy createCopyinClauseBlocks(InsertPointTy IP, Value *MasterAddr, Value *PrivateAddr, llvm::IntegerType *IntPtrTy, bool BranchtoEnd=true)
Generate conditional branch and relevant BasicBlocks through which private threads copy the 'copyin' ...
function_ref< InsertPointOrErrorTy( InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &Original, Value &Inner, Value *&ReplVal)> PrivatizeCallbackTy
Callback type for variable privatization (think copy & default constructor).
LLVM_ABI bool isFinalized()
Check whether the finalize function has already run.
SmallVector< FinalizationInfo, 8 > FinalizationStack
The finalization stack made up of finalize callbacks currently in-flight, wrapped into FinalizationIn...
LLVM_ABI std::vector< CanonicalLoopInfo * > tileLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops, ArrayRef< Value * > TileSizes)
Tile a loop nest.
LLVM_ABI CallInst * createOMPInteropInit(const LocationDescription &Loc, Value *InteropVar, omp::OMPInteropType InteropType, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_init.
LLVM_ABI Error emitIfClause(Value *Cond, BodyGenCallbackTy ThenGen, BodyGenCallbackTy ElseGen, InsertPointTy AllocaIP={}, ArrayRef< BasicBlock * > DeallocBlocks={})
Emits code for OpenMP 'if' clause using specified BodyGenCallbackTy Here is the logic: if (Cond) { Th...
LLVM_ABI void finalize(Function *Fn=nullptr)
Finalize the underlying module, e.g., by outlining regions.
LLVM_ABI Function * getOrCreateRuntimeFunctionPtr(omp::RuntimeFunction FnID)
void addOutlineInfo(std::unique_ptr< OutlineInfo > &&OI)
Add a new region that will be outlined later.
LLVM_ABI InsertPointTy createTargetInit(const LocationDescription &Loc, const llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs)
The omp target interface.
LLVM_ABI InsertPointOrErrorTy createReductions(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< ReductionInfo > ReductionInfos, ArrayRef< bool > IsByRef, bool IsNoWait=false, bool IsTeamsReduction=false)
Generator for 'omp reduction'.
const Triple T
The target triple of the underlying module.
DenseMap< std::pair< Constant *, uint64_t >, Constant * > IdentMap
Map to remember existing ident_t*.
LLVM_ABI CallInst * createOMPFree(const LocationDescription &Loc, Value *Addr, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_free.
LLVM_ABI FunctionCallee createForStaticInitFunction(unsigned IVSize, bool IVSigned, bool IsGPUDistribute)
Returns __kmpc_for_static_init_* runtime function for the specified size IVSize and sign IVSigned.
LLVM_ABI CallInst * createOMPAlloc(const LocationDescription &Loc, Value *Size, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_alloc.
LLVM_ABI void emitNonContiguousDescriptor(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo, TargetDataInfo &Info)
Emit an array of struct descriptors to be assigned to the offload args.
LLVM_ABI InsertPointOrErrorTy createSection(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB)
Generator for 'omp section'.
LLVM_ABI InsertPointOrErrorTy createTaskgroup(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< BasicBlock * > DeallocBlocks, BodyGenCallbackTy BodyGenCB)
Generator for the taskgroup construct.
LLVM_ABI InsertPointOrErrorTy createParallel(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< BasicBlock * > DeallocBlocks, BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads, omp::ProcBindKind ProcBind, bool IsCancellable)
Generator for 'omp parallel'.
function_ref< InsertPointOrErrorTy(InsertPointTy)> EmitFallbackCallbackTy
Callback function type for functions emitting the host fallback code that is executed when the kernel...
static LLVM_ABI TargetRegionEntryInfo getTargetEntryUniqueInfo(FileIdentifierInfoCallbackTy CallBack, vfs::FileSystem &VFS, StringRef ParentName="")
Creates a unique info for a target entry when provided a filename and line number from.
LLVM_ABI void emitTaskDependency(IRBuilderBase &Builder, Value *Entry, const DependData &Dep)
Store one kmp_depend_info entry at the given Entry pointer.
LLVM_ABI void emitBlock(BasicBlock *BB, Function *CurFn, bool IsFinished=false)
LLVM_ABI Value * getOrCreateThreadID(Value *Ident)
Return the current thread ID.
LLVM_ABI InsertPointOrErrorTy createMaster(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB)
Generator for 'omp master'.
LLVM_ABI InsertPointOrErrorTy createTargetData(const LocationDescription &Loc, InsertPointTy AllocaIP, InsertPointTy CodeGenIP, ArrayRef< BasicBlock * > DeallocBlocks, Value *DeviceID, Value *IfCond, TargetDataInfo &Info, GenMapInfoCallbackTy GenMapInfoCB, CustomMapperCallbackTy CustomMapperCB, omp::RuntimeFunction *MapperFunc=nullptr, function_ref< InsertPointOrErrorTy(InsertPointTy CodeGenIP, BodyGenTy BodyGenType)> BodyGenCB=nullptr, function_ref< void(unsigned int, Value *)> DeviceAddrCB=nullptr, Value *SrcLocInfo=nullptr)
Generator for 'omp target data'.
CallInst * createRuntimeFunctionCall(FunctionCallee Callee, ArrayRef< Value * > Args, StringRef Name="")
LLVM_ABI InsertPointOrErrorTy emitKernelLaunch(const LocationDescription &Loc, Value *OutlinedFnID, EmitFallbackCallbackTy EmitTargetCallFallbackCB, TargetKernelArgs &Args, Value *DeviceID, Value *RTLoc, InsertPointTy AllocaIP)
Generate a target region entry call and host fallback call.
StringMap< GlobalVariable *, BumpPtrAllocator > InternalVars
An ordered map of auto-generated variables to their unique names.
LLVM_ABI InsertPointOrErrorTy createCancellationPoint(const LocationDescription &Loc, omp::Directive CanceledDirective)
Generator for 'omp cancellation point'.
LLVM_ABI CallInst * createOMPAlignedAlloc(const LocationDescription &Loc, Value *Align, Value *Size, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_align_alloc.
LLVM_ABI FunctionCallee createDispatchInitFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_init_* runtime function for the specified size IVSize and sign IVSigned.
LLVM_ABI InsertPointOrErrorTy createScan(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< llvm::Value * > ScanVars, ArrayRef< llvm::Type * > ScanVarsType, bool IsInclusive, ScanInfo *ScanRedInfo)
This directive split and directs the control flow to input phase blocks or scan phase blocks based on...
LLVM_ABI CallInst * createOMPFreeShared(const LocationDescription &Loc, Value *Addr, Value *Size, const Twine &Name=Twine(""))
Create a runtime call for kmpc_free_shared.
LLVM_ABI CallInst * createOMPInteropUse(const LocationDescription &Loc, Value *InteropVar, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_use.
IRBuilder<>::InsertPoint InsertPointTy
Type used throughout for insertion points.
LLVM_ABI GlobalVariable * getOrCreateInternalVariable(Type *Ty, const StringRef &Name, std::optional< unsigned > AddressSpace={})
Gets (if variable with the given name already exist) or creates internal global variable with the spe...
LLVM_ABI GlobalVariable * createOffloadMapnames(SmallVectorImpl< llvm::Constant * > &Names, std::string VarName)
Create the global variable holding the offload names information.
std::forward_list< ScanInfo > ScanInfos
Collection of owned ScanInfo objects that eventually need to be free'd.
static LLVM_ABI void writeTeamsForKernel(const Triple &T, Function &Kernel, int32_t LB, int32_t UB)
LLVM_ABI Value * calculateCanonicalLoopTripCount(const LocationDescription &Loc, Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop, const Twine &Name="loop")
Calculate the trip count of a canonical loop.
LLVM_ABI InsertPointOrErrorTy createBarrier(const LocationDescription &Loc, omp::Directive Kind, bool ForceSimpleCall=false, bool CheckCancelFlag=true)
Emitter methods for OpenMP directives.
LLVM_ABI void setCorrectMemberOfFlag(omp::OpenMPOffloadMappingFlags &Flags, omp::OpenMPOffloadMappingFlags MemberOfFlag)
Given an initial flag set, this function modifies it to contain the passed in MemberOfFlag generated ...
LLVM_ABI Error emitOffloadingArraysAndArgs(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info, TargetDataRTArgs &RTArgs, MapInfosTy &CombinedInfo, CustomMapperCallbackTy CustomMapperCB, bool IsNonContiguous=false, bool ForEndCall=false, function_ref< void(unsigned int, Value *)> DeviceAddrCB=nullptr)
Allocates memory for and populates the arrays required for offloading (offload_{baseptrs|ptrs|mappers...
LLVM_ABI Constant * getOrCreateDefaultSrcLocStr(uint32_t &SrcLocStrSize)
Return the (LLVM-IR) string describing the default source location.
LLVM_ABI InsertPointOrErrorTy createCritical(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, StringRef CriticalName, Value *HintInst)
Generator for 'omp critical'.
LLVM_ABI void createOffloadEntry(Constant *ID, Constant *Addr, uint64_t Size, int32_t Flags, GlobalValue::LinkageTypes, StringRef Name="")
Creates offloading entry for the provided entry ID ID, address Addr, size Size, and flags Flags.
static LLVM_ABI unsigned getOpenMPDefaultSimdAlign(const Triple &TargetTriple, const StringMap< bool > &Features)
Get the default alignment value for given target.
LLVM_ABI unsigned getFlagMemberOffset()
Get the offset of the OMP_MAP_MEMBER_OF field.
LLVM_ABI InsertPointOrErrorTy applyWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, bool NeedsBarrier, llvm::omp::ScheduleKind SchedKind=llvm::omp::OMP_SCHEDULE_Default, Value *ChunkSize=nullptr, bool HasSimdModifier=false, bool HasMonotonicModifier=false, bool HasNonmonotonicModifier=false, bool HasOrderedClause=false, omp::WorksharingLoopType LoopType=omp::WorksharingLoopType::ForStaticLoop, bool NoLoop=false, bool HasDistSchedule=false, Value *DistScheduleChunkSize=nullptr)
Modifies the canonical loop to be a workshare loop.
LLVM_ABI InsertPointOrErrorTy createAtomicCapture(const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X, AtomicOpValue &V, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool UpdateExpr, bool IsPostfixUpdate, bool IsXBinopExpr, bool IsIgnoreDenormalMode=false, bool IsFineGrainedMemory=false, bool IsRemoteMemory=false)
Emit atomic update for constructs: — Only Scalar data types V = X; X = X BinOp Expr ,...
LLVM_ABI void createOffloadEntriesAndInfoMetadata(EmitMetadataErrorReportFunctionTy &ErrorReportFunction)
LLVM_ABI void applySimd(CanonicalLoopInfo *Loop, MapVector< Value *, Value * > AlignedVars, Value *IfCond, omp::OrderKind Order, ConstantInt *Simdlen, ConstantInt *Safelen)
Add metadata to simd-ize a loop.
SmallVector< std::unique_ptr< OutlineInfo >, 16 > OutlineInfos
Collection of regions that need to be outlined during finalization.
LLVM_ABI InsertPointOrErrorTy createAtomicUpdate(const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool IsXBinopExpr, bool IsIgnoreDenormalMode=false, bool IsFineGrainedMemory=false, bool IsRemoteMemory=false)
Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X For complex Operations: X = ...
std::function< std::tuple< std::string, uint64_t >()> FileIdentifierInfoCallbackTy
bool isLastFinalizationInfoCancellable(omp::Directive DK)
Return true if the last entry in the finalization stack is of kind DK and cancellable.
LLVM_ABI InsertPointTy emitTargetKernel(const LocationDescription &Loc, InsertPointTy AllocaIP, Value *&Return, Value *Ident, Value *DeviceID, Value *NumTeams, Value *NumThreads, Value *HostPtr, ArrayRef< Value * > KernelArgs)
Generate a target region entry call.
LLVM_ABI GlobalVariable * createOffloadMaptypes(SmallVectorImpl< uint64_t > &Mappings, std::string VarName)
Create the global variable holding the offload mappings information.
LLVM_ABI ~OpenMPIRBuilder()
LLVM_ABI CallInst * createCachedThreadPrivate(const LocationDescription &Loc, llvm::Value *Pointer, llvm::ConstantInt *Size, const llvm::Twine &Name=Twine(""))
Create a runtime call for kmpc_threadprivate_cached.
IRBuilder Builder
The LLVM-IR Builder used to create IR.
LLVM_ABI GlobalValue * createGlobalFlag(unsigned Value, StringRef Name)
Create a hidden global flag Name in the module with initial value Value.
LLVM_ABI void emitOffloadingArraysArgument(IRBuilderBase &Builder, OpenMPIRBuilder::TargetDataRTArgs &RTArgs, OpenMPIRBuilder::TargetDataInfo &Info, bool ForEndCall=false)
Emit the arguments to be passed to the runtime library based on the arrays of base pointers,...
LLVM_ABI InsertPointOrErrorTy createMasked(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, Value *Filter)
Generator for 'omp masked'.
LLVM_ABI Expected< CanonicalLoopInfo * > createCanonicalLoop(const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, Value *TripCount, const Twine &Name="loop")
Generator for the control flow structure of an OpenMP canonical loop.
function_ref< Expected< InsertPointTy >( InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value *DestPtr, Value *SrcPtr)> TaskDupCallbackTy
Callback type for task duplication function code generation.
LLVM_ABI Value * getSizeInBytes(Value *BasePtr)
Computes the size of type in bytes.
llvm::function_ref< llvm::Error( InsertPointTy BodyIP, llvm::Value *LinearIV)> IteratorBodyGenTy
LLVM_ABI InsertPointOrErrorTy createReductionsGPU(const LocationDescription &Loc, InsertPointTy AllocaIP, InsertPointTy CodeGenIP, ArrayRef< ReductionInfo > ReductionInfos, ArrayRef< bool > IsByRef, bool IsNoWait=false, bool IsTeamsReduction=false, ReductionGenCBKind ReductionGenCBKind=ReductionGenCBKind::MLIR, std::optional< omp::GV > GridValue={}, unsigned ReductionBufNum=1024, Value *SrcLocInfo=nullptr)
Design of OpenMP reductions on the GPU.
LLVM_ABI FunctionCallee createDispatchDeinitFunction()
Returns __kmpc_dispatch_deinit runtime function.
LLVM_ABI void registerTargetGlobalVariable(OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause, OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause, bool IsDeclaration, bool IsExternallyVisible, TargetRegionEntryInfo EntryInfo, StringRef MangledName, std::vector< GlobalVariable * > &GeneratedRefs, bool OpenMPSIMD, std::vector< Triple > TargetTriple, std::function< Constant *()> GlobalInitializer, std::function< GlobalValue::LinkageTypes()> VariableLinkage, Type *LlvmPtrTy, Constant *Addr)
Registers a target variable for device or host.
BodyGenTy
Type of BodyGen to use for region codegen.
LLVM_ABI CanonicalLoopInfo * fuseLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops)
Fuse a sequence of loops.
LLVM_ABI void emitX86DeclareSimdFunction(llvm::Function *Fn, unsigned NumElements, const llvm::APSInt &VLENVal, llvm::ArrayRef< DeclareSimdAttrTy > ParamAttrs, DeclareSimdBranch Branch)
Emit x86 vector-function ABI attributes for a declare simd function.
SmallVector< llvm::Function *, 16 > ConstantAllocaRaiseCandidates
A collection of candidate target functions that's constant allocas will attempt to be raised on a cal...
OffloadEntriesInfoManager OffloadInfoManager
Info manager to keep track of target regions.
static LLVM_ABI std::pair< int32_t, int32_t > readTeamBoundsForKernel(const Triple &T, Function &Kernel)
Read/write a bounds on teams for Kernel.
const std::string ompOffloadInfoName
OMP Offload Info Metadata name string.
Expected< InsertPointTy > InsertPointOrErrorTy
Type used to represent an insertion point or an error value.
LLVM_ABI InsertPointTy createCopyPrivate(const LocationDescription &Loc, llvm::Value *BufSize, llvm::Value *CpyBuf, llvm::Value *CpyFn, llvm::Value *DidIt)
Generator for __kmpc_copyprivate.
LLVM_ABI InsertPointOrErrorTy createSections(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< StorableBodyGenCallbackTy > SectionCBs, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, bool IsCancellable, bool IsNowait)
Generator for 'omp sections'.
std::function< void(EmitMetadataErrorKind, TargetRegionEntryInfo)> EmitMetadataErrorReportFunctionTy
Callback function type.
function_ref< InsertPointOrErrorTy( Argument &Arg, Value *Input, Value *&RetVal, InsertPointTy AllocaIP, InsertPointTy CodeGenIP, ArrayRef< InsertPointTy > DeallocIPs)> TargetGenArgAccessorsCallbackTy
LLVM_ABI Expected< ScanInfo * > scanInfoInitialize()
Creates a ScanInfo object, allocates and returns the pointer.
LLVM_ABI InsertPointOrErrorTy emitTargetTask(TargetTaskBodyCallbackTy TaskBodyCB, Value *DeviceID, Value *RTLoc, OpenMPIRBuilder::InsertPointTy AllocaIP, const DependenciesInfo &Dependencies, const TargetDataRTArgs &RTArgs, bool HasNoWait)
Generate a target-task for the target construct.
LLVM_ABI InsertPointTy createAtomicRead(const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V, AtomicOrdering AO, InsertPointTy AllocaIP)
Emit atomic Read for : V = X — Only Scalar data types.
function_ref< Error(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, ArrayRef< BasicBlock * > DeallocBlocks)> BodyGenCallbackTy
Callback type for body (=inner region) code generation.
bool updateToLocation(const LocationDescription &Loc)
Update the internal location to Loc.
LLVM_ABI void createFlush(const LocationDescription &Loc)
Generator for 'omp flush'.
LLVM_ABI Constant * getAddrOfDeclareTargetVar(OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause, OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause, bool IsDeclaration, bool IsExternallyVisible, TargetRegionEntryInfo EntryInfo, StringRef MangledName, std::vector< GlobalVariable * > &GeneratedRefs, bool OpenMPSIMD, std::vector< Triple > TargetTriple, Type *LlvmPtrTy, std::function< Constant *()> GlobalInitializer, std::function< GlobalValue::LinkageTypes()> VariableLinkage)
Retrieve (or create if non-existent) the address of a declare target variable, used in conjunction wi...
EmitMetadataErrorKind
The kind of errors that can occur when emitting the offload entries and metadata.
@ EMIT_MD_DECLARE_TARGET_ERROR
@ EMIT_MD_GLOBAL_VAR_INDIRECT_ERROR
@ EMIT_MD_GLOBAL_VAR_LINK_ERROR
@ EMIT_MD_TARGET_REGION_ERROR
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Pseudo-analysis pass that exposes the PassInstrumentation to pass managers.
Class to represent pointers.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
Analysis pass that exposes the ScalarEvolution for a function.
LLVM_ABI ScalarEvolution run(Function &F, FunctionAnalysisManager &AM)
The main scalar evolution driver.
ScanInfo holds the information to assist in lowering of Scan reduction.
llvm::SmallDenseMap< llvm::Value *, llvm::Value * > * ScanBuffPtrs
Maps the private reduction variable to the pointer of the temporary buffer.
llvm::BasicBlock * OMPScanLoopExit
Exit block of loop body.
llvm::Value * IV
Keeps track of value of iteration variable for input/scan loop to be used for Scan directive lowering...
llvm::BasicBlock * OMPAfterScanBlock
Dominates the body of the loop before scan directive.
llvm::BasicBlock * OMPScanInit
Block before loop body where scan initializations are done.
llvm::BasicBlock * OMPBeforeScanBlock
Dominates the body of the loop before scan directive.
llvm::BasicBlock * OMPScanFinish
Block after loop body where scan finalizations are done.
llvm::Value * Span
Stores the span of canonical loop being lowered to be used for temporary buffer allocation or Finaliz...
bool OMPFirstScanLoop
If true, it indicates Input phase is lowered; else it indicates ScanPhase is lowered.
llvm::BasicBlock * OMPScanDispatch
Controls the flow to before or after scan blocks.
A vector that has set insertion semantics.
bool remove_if(UnaryPredicate P)
Remove items from the set vector based on a predicate function.
bool empty() const
Determine if the SetVector is empty or not.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
bool test(unsigned Idx) const
bool all() const
Returns true if all bits are set.
bool any() const
Returns true if any bit is set.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
void append(StringRef RHS)
Append from a StringRef.
StringRef str() const
Explicit conversion to StringRef.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAlignment(Align Align)
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
ValueTy lookup(StringRef Key) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Represent a constant reference to a string, i.e.
std::string str() const
Get the contents as an std::string.
constexpr bool empty() const
Check if the string is empty.
constexpr size_t size() const
Get the string size.
size_t count(char C) const
Return the number of occurrences of C in the string.
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
StringRef drop_back(size_t N=1) const
Return a StringRef equal to 'this' but with the last N elements dropped.
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
static LLVM_ABI StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
Type * getElementType(unsigned N) const
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
Analysis pass providing the TargetTransformInfo.
LLVM_ABI Result run(const Function &F, FunctionAnalysisManager &)
TargetTransformInfo Result
Analysis pass providing the TargetLibraryInfo.
Target - Wrapper for Target specific information.
TargetMachine * createTargetMachine(const Triple &TT, StringRef CPU, StringRef Features, const TargetOptions &Options, std::optional< Reloc::Model > RM, std::optional< CodeModel::Model > CM=std::nullopt, CodeGenOptLevel OL=CodeGenOptLevel::Default, bool JIT=false) const
createTargetMachine - Create a target specific machine implementation for the specified Triple.
Triple - Helper class for working with autoconf configuration names.
bool isPPC() const
Tests whether the target is PowerPC (32- or 64-bit LE or BE).
bool isX86() const
Tests whether the target is x86 (32- or 64-bit).
bool isWasm() const
Tests whether the target is wasm (32- and 64-bit).
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
LLVM_ABI unsigned getIntegerBitWidth() const
LLVM_ABI Type * getStructElementType(unsigned N) const
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
bool isStructTy() const
True if this is an instance of StructType.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isVoidTy() const
Return true if this is 'void'.
Unconditional Branch instruction.
static UncondBrInst * Create(BasicBlock *Target, InsertPosition InsertBefore=nullptr)
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
This function has undefined behavior.
Produce an estimate of the unrolled cost of the specified loop.
LLVM_ABI bool canUnroll(OptimizationRemarkEmitter *ORE=nullptr, const Loop *L=nullptr) const
Whether it is legal to unroll this loop.
uint64_t getRolledLoopSize() const
A Use represents the edge between a Value definition and its users.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
LLVM_ABI bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
LLVM_ABI User * getUniqueUndroppableUser()
Return true if there is exactly one unique user of this value that cannot be dropped (that user can h...
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
LLVM_ABI bool replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)
Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...
iterator_range< use_iterator > uses()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
The virtual file system interface.
llvm::ErrorOr< std::unique_ptr< llvm::MemoryBuffer > > getBufferForFile(const Twine &Name, int64_t FileSize=-1, bool RequiresNullTerminator=true, bool IsVolatile=false, bool IsText=true)
This is a convenience method that opens a file, gets its content and then closes the file.
virtual llvm::ErrorOr< Status > status(const Twine &Path)=0
Get the status of the entry at Path, if one exists.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
Flag
These should be considered private to the implementation of the MCInstrDesc class.
constexpr StringLiteral MaxNTID("nvvm.maxntid")
constexpr StringLiteral MaxClusterRank("nvvm.maxclusterrank")
initializer< Ty > init(const Ty &Val)
@ User
could "use" a pointer
LLVM_ABI GlobalVariable * emitOffloadingEntry(Module &M, object::OffloadKind Kind, Constant *Addr, StringRef Name, uint64_t Size, uint32_t Flags, uint64_t Data, Constant *AuxAddr=nullptr)
OpenMPOffloadMappingFlags
Values for bit flags used to specify the mapping type for offloading.
@ OMP_MAP_PTR_AND_OBJ
The element being mapped is a pointer-pointee pair; both the pointer and the pointee should be mapped...
@ OMP_MAP_MEMBER_OF
The 16 MSBs of the flags indicate whether the entry is member of some struct/class.
IdentFlag
IDs for all omp runtime library ident_t flag encodings (see their defintion in openmp/runtime/src/kmp...
RuntimeFunction
IDs for all omp runtime library (RTL) functions.
constexpr const GV & getAMDGPUGridValues()
static constexpr GV SPIRVGridValues
For generic SPIR-V GPUs.
OMPDynGroupprivateFallbackType
The fallback types for the dyn_groupprivate clause.
static constexpr GV NVPTXGridValues
For Nvidia GPUs.
@ OMP_TGT_EXEC_MODE_SPMD_NO_LOOP
@ OMP_TGT_EXEC_MODE_GENERIC
Function * Kernel
Summary of a kernel (=entry point for target offloading).
WorksharingLoopType
A type of worksharing loop construct.
OMPAtomicCompareOp
Atomic compare operations. Currently OpenMP only supports ==, >, and <.
NodeAddr< PhiNode * > Phi
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
LLVM_ABI BasicBlock * splitBBWithSuffix(IRBuilderBase &Builder, bool CreateBranch, llvm::Twine Suffix=".split")
Like splitBB, but reuses the current block's name for the new name.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
FunctionAddr VTableAddr Value
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
hash_code hash_value(const FixedPointSemantics &Val)
LLVM_ABI Expected< std::unique_ptr< Module > > parseBitcodeFile(MemoryBufferRef Buffer, LLVMContext &Context, ParserCallbacks Callbacks={})
Read the specified bitcode file, returning the module.
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
@ LLVM_MARK_AS_BITMASK_ENUM
LLVM_ABI BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, bool MapAtoms=true)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
unsigned getPointerAddressSpace(const Type *T)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
auto successors(const MachineBasicBlock *BB)
LLVM_ABI std::error_code inconvertibleErrorCode()
The value returned by this function can be returned from convertToErrorCode for Error values where no...
testing::Matcher< const detail::ErrorHolder & > Failed()
constexpr from_range_t from_range
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
LLVM_ABI BasicBlock * splitBB(IRBuilderBase::InsertPoint IP, bool CreateBranch, DebugLoc DL, llvm::Twine Name={})
Split a BasicBlock at an InsertPoint, even if the block is degenerate (missing the terminator).
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
std::string utostr(uint64_t X, bool isNeg=false)
ErrorOr< T > expectedToErrorOrAndEmitErrors(LLVMContext &Ctx, Expected< T > Val)
bool isa_and_nonnull(const Y &Val)
auto dyn_cast_or_null(const Y &Val)
LLVM_ABI bool convertUsersOfConstantsToInstructions(ArrayRef< Constant * > Consts, Function *RestrictToFunc=nullptr, bool RemoveDeadConstants=true, bool IncludeSelf=false)
Replace constant expressions users of the given constants with instructions.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
FunctionAddr VTableAddr uintptr_t uintptr_t Version
auto reverse(ContainerTy &&C)
LLVM_ABI TargetTransformInfo::PeelingPreferences gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, std::optional< bool > UserAllowPeeling, std::optional< bool > UserAllowProfileBasedPeeling, bool UnrollingSpecficValues=false)
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
CodeGenOptLevel
Code generation optimization level.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Error make_error(ArgTs &&... Args)
Make a Error instance representing failure using the given error info type.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
LLVM_ABI bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
@ Mul
Product of integers.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the specified block at the specified instruction.
FunctionAddr VTableAddr Next
DWARFExpression::Operation Op
LLVM_ABI void remapInstructionsInBlocks(ArrayRef< BasicBlock * > Blocks, ValueToValueMapTy &VMap)
Remaps instructions in Blocks using the mapping in VMap.
ArrayRef(const T &OneElt) -> ArrayRef< T >
OutputIt copy(R &&Range, OutputIt Out)
LLVM_ABI TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, llvm::OptimizationRemarkEmitter &ORE, int OptLevel, std::optional< unsigned > UserThreshold, std::optional< unsigned > UserCount, std::optional< bool > UserAllowPartial, std::optional< bool > UserRuntime, std::optional< bool > UserUpperBound, std::optional< unsigned > UserFullUnrollMaxCount)
Gather the various unrolling parameters based on the defaults, compiler flags, TTI overrides and user...
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
LLVM_ABI void spliceBB(IRBuilderBase::InsertPoint IP, BasicBlock *New, bool CreateBranch, DebugLoc DL)
Move the instruction after an InsertPoint to the beginning of another BasicBlock.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto predecessors(const MachineBasicBlock *BB)
auto filter_to_vector(ContainerTy &&C, PredicateFn &&Pred)
Filter a range to a SmallVector with the element types deduced.
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
LLVM_ABI Constant * ConstantFoldInsertValueInstruction(Constant *Agg, Constant *Val, ArrayRef< unsigned > Idxs)
Attempt to constant fold an insertvalue instruction with the specified operands and indices.
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI void DeleteDeadBlocks(ArrayRef< BasicBlock * > BBs, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified blocks from BB.
bool to_integer(StringRef S, N &Num, unsigned Base=0)
Convert the string S to an integer of the specified type using the radix Base. If Base is 0,...
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
LLVM_ABI void computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, AssumptionCache *AC, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, OptimizationRemarkEmitter *ORE, unsigned TripCount, unsigned MaxTripCount, bool MaxOrZero, unsigned TripMultiple, const UnrollCostEstimator &UCE, TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP)
This struct is a compact representation of a valid (non-zero power of two) alignment.
static LLVM_ABI void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
A struct to pack the relevant information for an OpenMP affinity clause.
a struct to pack relevant information while generating atomic Ops
A struct to pack the relevant information for an OpenMP depend clause.
omp::RTLDependenceKindTy DepKind
A struct to pack static and dynamic dependency information for a task.
SmallVector< DependData > Deps
Error mergeFiniBB(IRBuilderBase &Builder, BasicBlock *ExistingFiniBB)
For cases where there is an unavoidable existing finalization block (e.g.
Expected< BasicBlock * > getFiniBB(IRBuilderBase &Builder)
The basic block to which control should be transferred to implement the FiniCB.
Description of a LLVM-IR insertion point (IP) and a debug/source location (filename,...
MapNonContiguousArrayTy Offsets
MapNonContiguousArrayTy Counts
MapNonContiguousArrayTy Strides
This structure contains combined information generated for mappable clauses, including base pointers,...
MapDeviceInfoArrayTy DevicePointers
MapValuesArrayTy BasePointers
MapValuesArrayTy Pointers
StructNonContiguousInfo NonContigInfo
Helper that contains information about regions we need to outline during finalization.
LLVM_ABI void collectBlocks(SmallPtrSetImpl< BasicBlock * > &BlockSet, SmallVectorImpl< BasicBlock * > &BlockVector)
Collect all blocks in between EntryBB and ExitBB in both the given vector and set.
BasicBlock * OuterAllocBB
virtual LLVM_ABI std::unique_ptr< CodeExtractor > createCodeExtractor(ArrayRef< BasicBlock * > Blocks, bool ArgsInZeroAddressSpace, Twine Suffix=Twine(""))
Create a CodeExtractor instance based on the information stored in this structure,...
Information about an OpenMP reduction.
EvalKind EvaluationKind
Reduction evaluation kind - scalar, complex or aggregate.
ReductionGenAtomicCBTy AtomicReductionGen
Callback for generating the atomic reduction body, may be null.
ReductionGenCBTy ReductionGen
Callback for generating the reduction body.
Value * Variable
Reduction variable of pointer type.
Value * PrivateVariable
Thread-private partial reduction variable.
ReductionGenClangCBTy ReductionGenClang
Clang callback for generating the reduction body.
Type * ElementType
Reduction element type, must match pointee type of variable.
ReductionGenDataPtrPtrCBTy DataPtrPtrGen
Container for the arguments used to pass data to the runtime library.
Value * SizesArray
The array of sizes passed to the runtime library.
Value * PointersArray
The array of section pointers passed to the runtime library.
Value * MappersArray
The array of user-defined mappers passed to the runtime library.
Value * MapTypesArrayEnd
The array of map types passed to the runtime library for the end of the region, or nullptr if there a...
Value * BasePointersArray
The array of base pointer passed to the runtime library.
Value * MapTypesArray
The array of map types passed to the runtime library for the beginning of the region or for the entir...
Value * MapNamesArray
The array of original declaration names of mapped pointers sent to the runtime library for debugging.
Data structure that contains the needed information to construct the kernel args vector.
ArrayRef< Value * > NumThreads
The number of threads.
TargetDataRTArgs RTArgs
Arguments passed to the runtime library.
Value * NumIterations
The number of iterations.
Value * DynCGroupMem
The size of the dynamic shared memory.
unsigned NumTargetItems
Number of arguments passed to the runtime library.
bool StrictBlocksAndThreads
True if the kernel strictly requires the number of blocks and threads above to run.
bool HasNoWait
True if the kernel has 'no wait' clause.
ArrayRef< Value * > NumTeams
The number of teams.
omp::OMPDynGroupprivateFallbackType DynCGroupMemFallback
The fallback mechanism for the shared memory.
Container to pass the default attributes with which a kernel must be launched, used to set kernel att...
omp::OMPTgtExecModeFlags ExecFlags
SmallVector< int32_t, 3 > MaxTeams
Container to pass LLVM IR runtime values or constants related to the number of teams and threads with...
Value * DeviceID
Device ID value used in the kernel launch.
SmallVector< Value *, 3 > MaxTeams
Value * MaxThreads
'parallel' construct 'num_threads' clause value, if present and it is an SPMD kernel.
Value * LoopTripCount
Total number of iterations of the SPMD or Generic-SPMD kernel or null if it is a generic kernel.
SmallVector< Value *, 3 > TargetThreadLimit
SmallVector< Value *, 3 > TeamsThreadLimit
Data structure to contain the information needed to uniquely identify a target entry.
static LLVM_ABI void getTargetRegionEntryFnName(SmallVectorImpl< char > &Name, StringRef ParentName, unsigned DeviceID, unsigned FileID, unsigned Line, unsigned Count)
static constexpr const char * KernelNamePrefix
The prefix used for kernel names.
static LLVM_ABI const Target * lookupTarget(const Triple &TheTriple, std::string &Error)
lookupTarget - Lookup a target based on a target triple.
Defines various target-specific GPU grid values that must be consistent between host RTL (plugin),...