69#define DEBUG_TYPE "openmp-ir-builder"
76 cl::desc(
"Use optimistic attributes describing "
77 "'as-if' properties of runtime calls."),
81 "openmp-ir-builder-unroll-threshold-factor",
cl::Hidden,
82 cl::desc(
"Factor for the unroll threshold to account for code "
83 "simplifications still taking place"),
94 if (!IP1.isSet() || !IP2.isSet())
96 return IP1.getBlock() == IP2.getBlock() && IP1.getPoint() == IP2.getPoint();
101 switch (SchedType & ~OMPScheduleType::MonotonicityMask) {
102 case OMPScheduleType::UnorderedStaticChunked:
103 case OMPScheduleType::UnorderedStatic:
104 case OMPScheduleType::UnorderedDynamicChunked:
105 case OMPScheduleType::UnorderedGuidedChunked:
106 case OMPScheduleType::UnorderedRuntime:
107 case OMPScheduleType::UnorderedAuto:
108 case OMPScheduleType::UnorderedTrapezoidal:
109 case OMPScheduleType::UnorderedGreedy:
110 case OMPScheduleType::UnorderedBalanced:
111 case OMPScheduleType::UnorderedGuidedIterativeChunked:
112 case OMPScheduleType::UnorderedGuidedAnalyticalChunked:
113 case OMPScheduleType::UnorderedSteal:
114 case OMPScheduleType::UnorderedStaticBalancedChunked:
115 case OMPScheduleType::UnorderedGuidedSimd:
116 case OMPScheduleType::UnorderedRuntimeSimd:
117 case OMPScheduleType::OrderedStaticChunked:
118 case OMPScheduleType::OrderedStatic:
119 case OMPScheduleType::OrderedDynamicChunked:
120 case OMPScheduleType::OrderedGuidedChunked:
121 case OMPScheduleType::OrderedRuntime:
122 case OMPScheduleType::OrderedAuto:
123 case OMPScheduleType::OrderdTrapezoidal:
124 case OMPScheduleType::NomergeUnorderedStaticChunked:
125 case OMPScheduleType::NomergeUnorderedStatic:
126 case OMPScheduleType::NomergeUnorderedDynamicChunked:
127 case OMPScheduleType::NomergeUnorderedGuidedChunked:
128 case OMPScheduleType::NomergeUnorderedRuntime:
129 case OMPScheduleType::NomergeUnorderedAuto:
130 case OMPScheduleType::NomergeUnorderedTrapezoidal:
131 case OMPScheduleType::NomergeUnorderedGreedy:
132 case OMPScheduleType::NomergeUnorderedBalanced:
133 case OMPScheduleType::NomergeUnorderedGuidedIterativeChunked:
134 case OMPScheduleType::NomergeUnorderedGuidedAnalyticalChunked:
135 case OMPScheduleType::NomergeUnorderedSteal:
136 case OMPScheduleType::NomergeOrderedStaticChunked:
137 case OMPScheduleType::NomergeOrderedStatic:
138 case OMPScheduleType::NomergeOrderedDynamicChunked:
139 case OMPScheduleType::NomergeOrderedGuidedChunked:
140 case OMPScheduleType::NomergeOrderedRuntime:
141 case OMPScheduleType::NomergeOrderedAuto:
142 case OMPScheduleType::NomergeOrderedTrapezoidal:
143 case OMPScheduleType::OrderedDistributeChunked:
144 case OMPScheduleType::OrderedDistribute:
152 SchedType & OMPScheduleType::MonotonicityMask;
153 if (MonotonicityFlags == OMPScheduleType::MonotonicityMask)
165 Builder.restoreIP(IP);
173 return T.isAMDGPU() ||
T.isNVPTX() ||
T.isSPIRV();
179 Kernel->getFnAttribute(
"target-features").getValueAsString();
180 if (Features.
count(
"+wavefrontsize64"))
195 bool HasSimdModifier,
bool HasDistScheduleChunks) {
197 switch (ClauseKind) {
198 case OMP_SCHEDULE_Default:
199 case OMP_SCHEDULE_Static:
200 return HasChunks ? OMPScheduleType::BaseStaticChunked
201 : OMPScheduleType::BaseStatic;
202 case OMP_SCHEDULE_Dynamic:
203 return OMPScheduleType::BaseDynamicChunked;
204 case OMP_SCHEDULE_Guided:
205 return HasSimdModifier ? OMPScheduleType::BaseGuidedSimd
206 : OMPScheduleType::BaseGuidedChunked;
207 case OMP_SCHEDULE_Auto:
209 case OMP_SCHEDULE_Runtime:
210 return HasSimdModifier ? OMPScheduleType::BaseRuntimeSimd
211 : OMPScheduleType::BaseRuntime;
212 case OMP_SCHEDULE_Distribute:
213 return HasDistScheduleChunks ? OMPScheduleType::BaseDistributeChunked
214 : OMPScheduleType::BaseDistribute;
222 bool HasOrderedClause) {
223 assert((BaseScheduleType & OMPScheduleType::ModifierMask) ==
224 OMPScheduleType::None &&
225 "Must not have ordering nor monotonicity flags already set");
228 ? OMPScheduleType::ModifierOrdered
229 : OMPScheduleType::ModifierUnordered;
230 OMPScheduleType OrderingScheduleType = BaseScheduleType | OrderingModifier;
233 if (OrderingScheduleType ==
234 (OMPScheduleType::BaseGuidedSimd | OMPScheduleType::ModifierOrdered))
235 return OMPScheduleType::OrderedGuidedChunked;
236 else if (OrderingScheduleType == (OMPScheduleType::BaseRuntimeSimd |
237 OMPScheduleType::ModifierOrdered))
238 return OMPScheduleType::OrderedRuntime;
240 return OrderingScheduleType;
246 bool HasSimdModifier,
bool HasMonotonic,
247 bool HasNonmonotonic,
bool HasOrderedClause) {
248 assert((ScheduleType & OMPScheduleType::MonotonicityMask) ==
249 OMPScheduleType::None &&
250 "Must not have monotonicity flags already set");
251 assert((!HasMonotonic || !HasNonmonotonic) &&
252 "Monotonic and Nonmonotonic are contradicting each other");
255 return ScheduleType | OMPScheduleType::ModifierMonotonic;
256 }
else if (HasNonmonotonic) {
257 return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
267 if ((BaseScheduleType == OMPScheduleType::BaseStatic) ||
268 (BaseScheduleType == OMPScheduleType::BaseStaticChunked) ||
274 return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
282 bool HasSimdModifier,
bool HasMonotonicModifier,
283 bool HasNonmonotonicModifier,
bool HasOrderedClause,
284 bool HasDistScheduleChunks) {
286 ClauseKind, HasChunks, HasSimdModifier, HasDistScheduleChunks);
290 OrderedSchedule, HasSimdModifier, HasMonotonicModifier,
291 HasNonmonotonicModifier, HasOrderedClause);
299static std::optional<omp::OMPTgtExecModeFlags>
304 if (
Call->getCalledFunction()->getName() ==
"__kmpc_target_init") {
305 TargetInitCall =
Call;
330 std::optional<omp::OMPTgtExecModeFlags> ExecMode =
342 if (
Instruction *Term = Source->getTerminatorOrNull()) {
351 NewBr->setDebugLoc(
DL);
356 assert(New->getFirstInsertionPt() == New->begin() &&
357 "Target BB must not have PHI nodes");
373 New->splice(New->begin(), Old, IP.
getPoint(), Old->
end());
377 NewBr->setDebugLoc(
DL);
389 Builder.SetInsertPoint(Old);
393 Builder.SetCurrentDebugLocation(
DebugLoc);
403 New->replaceSuccessorsPhiUsesWith(Old, New);
412 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
414 Builder.SetInsertPoint(Builder.GetInsertBlock());
417 Builder.SetCurrentDebugLocation(
DebugLoc);
426 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
428 Builder.SetInsertPoint(Builder.GetInsertBlock());
431 Builder.SetCurrentDebugLocation(
DebugLoc);
448 const Twine &Name =
"",
bool AsPtr =
true,
449 bool Is64Bit =
false) {
450 Builder.restoreIP(OuterAllocaIP);
454 Builder.CreateAlloca(IntTy,
nullptr, Name +
".addr");
458 FakeVal = FakeValAddr;
460 FakeVal = Builder.CreateLoad(IntTy, FakeValAddr, Name +
".val");
465 Builder.restoreIP(InnerAllocaIP);
468 UseFakeVal = Builder.CreateLoad(IntTy, FakeVal, Name +
".use");
471 FakeVal, Is64Bit ? Builder.getInt64(10) : Builder.getInt32(10)));
484enum OpenMPOffloadingRequiresDirFlags {
486 OMP_REQ_UNDEFINED = 0x000,
488 OMP_REQ_NONE = 0x001,
490 OMP_REQ_REVERSE_OFFLOAD = 0x002,
492 OMP_REQ_UNIFIED_ADDRESS = 0x004,
494 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008,
496 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010,
503 DominatorTree *DT =
nullptr,
bool AggregateArgs =
false,
504 BlockFrequencyInfo *BFI =
nullptr,
505 BranchProbabilityInfo *BPI =
nullptr,
506 AssumptionCache *AC =
nullptr,
bool AllowVarArgs =
false,
507 bool AllowAlloca =
false,
508 BasicBlock *AllocationBlock =
nullptr,
510 std::string Suffix =
"",
bool ArgsInZeroAddressSpace =
false)
511 : CodeExtractor(BBs, DT, AggregateArgs, BFI, BPI, AC, AllowVarArgs,
512 AllowAlloca, AllocationBlock, DeallocationBlocks, Suffix,
513 ArgsInZeroAddressSpace),
514 OMPBuilder(OMPBuilder) {}
516 virtual ~OMPCodeExtractor() =
default;
519 OpenMPIRBuilder &OMPBuilder;
522class DeviceSharedMemCodeExtractor :
public OMPCodeExtractor {
524 using OMPCodeExtractor::OMPCodeExtractor;
525 virtual ~DeviceSharedMemCodeExtractor() =
default;
529 allocateVar(IRBuilder<>::InsertPoint AllocaIP,
Type *VarType,
530 const Twine &Name = Twine(
""),
531 AddrSpaceCastInst **CastedAlloc =
nullptr)
override {
532 return OMPBuilder.createOMPAllocShared(AllocaIP, VarType, Name);
535 virtual Instruction *deallocateVar(IRBuilder<>::InsertPoint DeallocIP,
537 return OMPBuilder.createOMPFreeShared(DeallocIP, Var, VarType);
544 OpenMPIRBuilder &OMPBuilder;
546 DeviceSharedMemOutlineInfo(OpenMPIRBuilder &OMPBuilder)
547 : OMPBuilder(OMPBuilder) {}
548 virtual ~DeviceSharedMemOutlineInfo() =
default;
550 virtual std::unique_ptr<CodeExtractor>
552 bool ArgsInZeroAddressSpace,
553 Twine Suffix = Twine(
""))
override;
559 : RequiresFlags(OMP_REQ_UNDEFINED) {}
563 bool HasRequiresReverseOffload,
bool HasRequiresUnifiedAddress,
564 bool HasRequiresUnifiedSharedMemory,
bool HasRequiresDynamicAllocators)
567 RequiresFlags(OMP_REQ_UNDEFINED) {
568 if (HasRequiresReverseOffload)
569 RequiresFlags |= OMP_REQ_REVERSE_OFFLOAD;
570 if (HasRequiresUnifiedAddress)
571 RequiresFlags |= OMP_REQ_UNIFIED_ADDRESS;
572 if (HasRequiresUnifiedSharedMemory)
573 RequiresFlags |= OMP_REQ_UNIFIED_SHARED_MEMORY;
574 if (HasRequiresDynamicAllocators)
575 RequiresFlags |= OMP_REQ_DYNAMIC_ALLOCATORS;
579 return RequiresFlags & OMP_REQ_REVERSE_OFFLOAD;
583 return RequiresFlags & OMP_REQ_UNIFIED_ADDRESS;
587 return RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY;
591 return RequiresFlags & OMP_REQ_DYNAMIC_ALLOCATORS;
596 :
static_cast<int64_t
>(OMP_REQ_NONE);
601 RequiresFlags |= OMP_REQ_REVERSE_OFFLOAD;
603 RequiresFlags &= ~OMP_REQ_REVERSE_OFFLOAD;
608 RequiresFlags |= OMP_REQ_UNIFIED_ADDRESS;
610 RequiresFlags &= ~OMP_REQ_UNIFIED_ADDRESS;
615 RequiresFlags |= OMP_REQ_UNIFIED_SHARED_MEMORY;
617 RequiresFlags &= ~OMP_REQ_UNIFIED_SHARED_MEMORY;
622 RequiresFlags |= OMP_REQ_DYNAMIC_ALLOCATORS;
624 RequiresFlags &= ~OMP_REQ_DYNAMIC_ALLOCATORS;
637 constexpr size_t MaxDim = 3;
642 Value *DynCGroupMemFallbackFlag =
644 DynCGroupMemFallbackFlag =
Builder.CreateShl(DynCGroupMemFallbackFlag, 2);
645 Value *Flags =
Builder.CreateOr(HasNoWaitFlag, DynCGroupMemFallbackFlag);
651 Value *NumThreads3D =
682 auto FnAttrs = Attrs.getFnAttrs();
683 auto RetAttrs = Attrs.getRetAttrs();
685 for (
size_t ArgNo = 0; ArgNo < Fn.
arg_size(); ++ArgNo)
690 bool Param =
true) ->
void {
691 bool HasSignExt = AS.hasAttribute(Attribute::SExt);
692 bool HasZeroExt = AS.hasAttribute(Attribute::ZExt);
693 if (HasSignExt || HasZeroExt) {
694 assert(AS.getNumAttributes() == 1 &&
695 "Currently not handling extension attr combined with others.");
697 if (
auto AK = TargetLibraryInfo::getExtAttrForI32Param(
T, HasSignExt))
700 TargetLibraryInfo::getExtAttrForI32Return(
T, HasSignExt))
707#define OMP_ATTRS_SET(VarName, AttrSet) AttributeSet VarName = AttrSet;
708#include "llvm/Frontend/OpenMP/OMPKinds.def"
712#define OMP_RTL_ATTRS(Enum, FnAttrSet, RetAttrSet, ArgAttrSets) \
714 FnAttrs = FnAttrs.addAttributes(Ctx, FnAttrSet); \
715 addAttrSet(RetAttrs, RetAttrSet, false); \
716 for (size_t ArgNo = 0; ArgNo < ArgAttrSets.size(); ++ArgNo) \
717 addAttrSet(ArgAttrs[ArgNo], ArgAttrSets[ArgNo]); \
718 Fn.setAttributes(AttributeList::get(Ctx, FnAttrs, RetAttrs, ArgAttrs)); \
720#include "llvm/Frontend/OpenMP/OMPKinds.def"
734#define OMP_RTL(Enum, Str, IsVarArg, ReturnType, ...) \
736 FnTy = FunctionType::get(ReturnType, ArrayRef<Type *>{__VA_ARGS__}, \
738 Fn = M.getFunction(Str); \
740#include "llvm/Frontend/OpenMP/OMPKinds.def"
746#define OMP_RTL(Enum, Str, ...) \
748 Fn = Function::Create(FnTy, GlobalValue::ExternalLinkage, Str, M); \
750#include "llvm/Frontend/OpenMP/OMPKinds.def"
754 if (FnID == OMPRTL___kmpc_fork_call || FnID == OMPRTL___kmpc_fork_teams) {
764 LLVMContext::MD_callback,
766 2, {-1, -1},
true)}));
779 assert(Fn &&
"Failed to create OpenMP runtime function");
790 Builder.SetInsertPoint(FiniBB);
802 FiniBB = OtherFiniBB;
804 Builder.SetInsertPoint(FiniBB->getFirstNonPHIIt());
812 auto EndIt = FiniBB->end();
813 if (FiniBB->size() >= 1)
814 if (
auto Prev = std::prev(EndIt); Prev->isTerminator())
819 FiniBB->replaceAllUsesWith(OtherFiniBB);
820 FiniBB->eraseFromParent();
821 FiniBB = OtherFiniBB;
828 assert(Fn &&
"Failed to create OpenMP runtime function pointer");
851 for (
auto Inst =
Block->getReverseIterator()->begin();
852 Inst !=
Block->getReverseIterator()->end();) {
881 Block.getParent()->getEntryBlock().getTerminator()->getIterator();
902 DeferredOutlines.
push_back(std::move(OI));
906 ParallelRegionBlockSet.
clear();
908 OI->collectBlocks(ParallelRegionBlockSet, Blocks);
918 bool ArgsInZeroAddressSpace =
Config.isTargetDevice();
919 std::unique_ptr<CodeExtractor> Extractor =
920 OI->createCodeExtractor(Blocks, ArgsInZeroAddressSpace,
".omp_par");
924 <<
" Exit: " << OI->ExitBB->getName() <<
"\n");
925 assert(Extractor->isEligible() &&
926 "Expected OpenMP outlining to be possible!");
928 for (
auto *V : OI->ExcludeArgsFromAggregate)
929 Extractor->excludeArgFromAggregate(V);
932 Extractor->extractCodeRegion(CEAC, OI->Inputs, OI->Outputs);
936 if (TargetCpuAttr.isStringAttribute())
939 auto TargetFeaturesAttr = OuterFn->
getFnAttribute(
"target-features");
940 if (TargetFeaturesAttr.isStringAttribute())
941 OutlinedFn->
addFnAttr(TargetFeaturesAttr);
944 LLVM_DEBUG(
dbgs() <<
" Outlined function: " << *OutlinedFn <<
"\n");
946 "OpenMP outlined functions should not return a value!");
951 M.getFunctionList().insertAfter(OuterFn->
getIterator(), OutlinedFn);
958 assert(OI->EntryBB->getUniquePredecessor() == &ArtificialEntry);
965 "Expected instructions to add in the outlined region entry");
967 End = ArtificialEntry.
rend();
972 if (
I.isTerminator()) {
974 if (
Instruction *TI = OI->EntryBB->getTerminatorOrNull())
975 TI->adoptDbgRecords(&ArtificialEntry,
I.getIterator(),
false);
979 I.moveBeforePreserving(*OI->EntryBB,
980 OI->EntryBB->getFirstInsertionPt());
983 OI->EntryBB->moveBefore(&ArtificialEntry);
990 if (OI->PostOutlineCB)
991 OI->PostOutlineCB(*OutlinedFn);
993 if (OI->FixUpNonEntryAllocas)
1025 errs() <<
"Error of kind: " << Kind
1026 <<
" when emitting offload entries and metadata during "
1027 "OMPIRBuilder finalization \n";
1033 if (
Config.EmitLLVMUsedMetaInfo.value_or(
false)) {
1034 std::vector<WeakTrackingVH> LLVMCompilerUsed = {
1035 M.getGlobalVariable(
"__openmp_nvptx_data_transfer_temporary_storage")};
1036 emitUsed(
"llvm.compiler.used", LLVMCompilerUsed);
1053 ConstantInt::get(I32Ty,
Value), Name);
1066 for (
unsigned I = 0, E =
List.size();
I != E; ++
I)
1070 if (UsedArray.
empty())
1077 GV->setSection(
"llvm.metadata");
1083 auto *Int8Ty =
Builder.getInt8Ty();
1086 ConstantInt::get(Int8Ty, Mode),
Twine(KernelName,
"_exec_mode"));
1094 unsigned Reserve2Flags) {
1096 LocFlags |= OMP_IDENT_FLAG_KMPC;
1103 ConstantInt::get(Int32,
uint32_t(LocFlags)),
1104 ConstantInt::get(Int32, Reserve2Flags),
1105 ConstantInt::get(Int32, SrcLocStrSize), SrcLocStr};
1107 size_t SrcLocStrArgIdx = 4;
1108 if (OpenMPIRBuilder::Ident->getElementType(SrcLocStrArgIdx)
1112 SrcLocStr, OpenMPIRBuilder::Ident->getElementType(SrcLocStrArgIdx));
1119 if (
GV.getValueType() == OpenMPIRBuilder::Ident &&
GV.hasInitializer())
1120 if (
GV.getInitializer() == Initializer)
1125 M, OpenMPIRBuilder::Ident,
1128 M.getDataLayout().getDefaultGlobalsAddressSpace());
1140 SrcLocStrSize = LocStr.
size();
1149 if (
GV.isConstant() &&
GV.hasInitializer() &&
1150 GV.getInitializer() == Initializer)
1153 SrcLocStr =
Builder.CreateGlobalString(
1154 LocStr,
"",
M.getDataLayout().getDefaultGlobalsAddressSpace(),
1162 unsigned Line,
unsigned Column,
1168 Buffer.
append(FunctionName);
1170 Buffer.
append(std::to_string(Line));
1172 Buffer.
append(std::to_string(Column));
1180 StringRef UnknownLoc =
";unknown;unknown;0;0;;";
1191 !DIL->getFilename().empty() ? DIL->getFilename() :
M.getName();
1196 DIL->getColumn(), SrcLocStrSize);
1202 Loc.IP.getBlock()->getParent());
1208 "omp_global_thread_num");
1213 bool ForceSimpleCall,
bool CheckCancelFlag) {
1223 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_FOR;
1226 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SECTIONS;
1229 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SINGLE;
1232 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_EXPL;
1235 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL;
1248 bool UseCancelBarrier =
1253 ? OMPRTL___kmpc_cancel_barrier
1254 : OMPRTL___kmpc_barrier),
1257 if (UseCancelBarrier && CheckCancelFlag)
1267 omp::Directive CanceledDirective) {
1272 auto *UI =
Builder.CreateUnreachable();
1280 Builder.SetInsertPoint(ElseTI);
1281 auto ElseIP =
Builder.saveIP();
1289 Builder.SetInsertPoint(ThenTI);
1291 Value *CancelKind =
nullptr;
1292 switch (CanceledDirective) {
1293#define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
1294 case DirectiveEnum: \
1295 CancelKind = Builder.getInt32(Value); \
1297#include "llvm/Frontend/OpenMP/OMPKinds.def"
1314 Builder.SetInsertPoint(UI->getParent());
1315 UI->eraseFromParent();
1322 omp::Directive CanceledDirective) {
1327 auto *UI =
Builder.CreateUnreachable();
1330 Value *CancelKind =
nullptr;
1331 switch (CanceledDirective) {
1332#define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
1333 case DirectiveEnum: \
1334 CancelKind = Builder.getInt32(Value); \
1336#include "llvm/Frontend/OpenMP/OMPKinds.def"
1353 Builder.SetInsertPoint(UI->getParent());
1354 UI->eraseFromParent();
1367 auto *KernelArgsPtr =
1368 Builder.CreateAlloca(OpenMPIRBuilder::KernelArgs,
nullptr,
"kernel_args");
1373 Builder.CreateStructGEP(OpenMPIRBuilder::KernelArgs, KernelArgsPtr,
I);
1376 M.getDataLayout().getPrefTypeAlign(KernelArgs[
I]->getType()));
1380 NumThreads, HostPtr, KernelArgsPtr};
1407 assert(OutlinedFnID &&
"Invalid outlined function ID!");
1411 Value *Return =
nullptr;
1431 Builder, AllocaIP, Return, RTLoc, DeviceID, Args.NumTeams.front(),
1432 Args.NumThreads.front(), OutlinedFnID, ArgsVector));
1439 Builder.CreateCondBr(
Failed, OffloadFailedBlock, OffloadContBlock);
1441 auto CurFn =
Builder.GetInsertBlock()->getParent();
1448 emitBlock(OffloadContBlock, CurFn,
true);
1453 Value *CancelFlag, omp::Directive CanceledDirective) {
1455 "Unexpected cancellation!");
1475 Builder.CreateCondBr(Cmp, NonCancellationBlock, CancellationBlock,
1484 Builder.SetInsertPoint(CancellationBlock);
1485 Builder.CreateBr(*FiniBBOrErr);
1488 Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->
begin());
1500 size_t NumArgs = OutlinedFn.
arg_size();
1501 assert((NumArgs == 2 || NumArgs == 3) &&
1502 "expected a 2-3 argument parallel outlined function");
1503 bool UseArgStruct = NumArgs == 3;
1508 {Builder.getInt16Ty(), Builder.getInt32Ty()},
1512 OutlinedFn.
getName() +
".wrapper", OMPIRBuilder->
M);
1514 WrapperFn->addParamAttr(0, Attribute::NoUndef);
1515 WrapperFn->addParamAttr(0, Attribute::ZExt);
1516 WrapperFn->addParamAttr(1, Attribute::NoUndef);
1520 Builder.SetInsertPoint(EntryBB);
1523 Value *AddrAlloca = Builder.CreateAlloca(Builder.getInt32Ty(),
1525 AddrAlloca = Builder.CreatePointerBitCastOrAddrSpaceCast(
1526 AddrAlloca, Builder.getPtrTy(0),
1527 AddrAlloca->
getName() +
".ascast");
1529 Value *ZeroAlloca = Builder.CreateAlloca(Builder.getInt32Ty(),
1531 ZeroAlloca = Builder.CreatePointerBitCastOrAddrSpaceCast(
1532 ZeroAlloca, Builder.getPtrTy(0),
1533 ZeroAlloca->
getName() +
".ascast");
1535 Value *ArgsAlloca =
nullptr;
1537 ArgsAlloca = Builder.CreateAlloca(Builder.getPtrTy(),
1538 nullptr,
"global_args");
1539 ArgsAlloca = Builder.CreatePointerBitCastOrAddrSpaceCast(
1540 ArgsAlloca, Builder.getPtrTy(0),
1541 ArgsAlloca->
getName() +
".ascast");
1545 Builder.CreateStore(WrapperFn->getArg(1), AddrAlloca);
1546 Builder.CreateStore(Builder.getInt32(0), ZeroAlloca);
1550 llvm::omp::RuntimeFunction::OMPRTL___kmpc_get_shared_variables),
1558 Value *StructArg = Builder.CreateLoad(Builder.getPtrTy(), ArgsAlloca);
1559 StructArg = Builder.CreateInBoundsGEP(Builder.getPtrTy(), StructArg,
1560 {Builder.getInt64(0)});
1561 StructArg = Builder.CreateLoad(Builder.getPtrTy(), StructArg,
"structArg");
1562 Args.push_back(StructArg);
1566 Builder.CreateCall(&OutlinedFn, Args);
1567 Builder.CreateRetVoid();
1582 "Expected at least tid and bounded tid as arguments");
1583 unsigned NumCapturedVars = OutlinedFn.
arg_size() - 2;
1591 OutlinedFn.
addFnAttr(Attribute::NoUnwind);
1594 assert(CI &&
"Expected call instruction to outlined function");
1595 CI->
getParent()->setName(
"omp_parallel");
1597 Builder.SetInsertPoint(CI);
1598 Type *PtrTy = OMPIRBuilder->VoidPtr;
1601 OpenMPIRBuilder ::InsertPointTy CurrentIP = Builder.saveIP();
1605 Value *Args = ArgsAlloca;
1609 Args = Builder.CreatePointerCast(ArgsAlloca, PtrTy);
1610 Builder.restoreIP(CurrentIP);
1613 for (
unsigned Idx = 0; Idx < NumCapturedVars; Idx++) {
1615 Value *StoreAddress = Builder.CreateConstInBoundsGEP2_64(
1617 Builder.CreateStore(V, StoreAddress);
1621 IfCondition ? Builder.CreateSExtOrTrunc(IfCondition, OMPIRBuilder->Int32)
1622 : Builder.getInt32(1);
1623 Value *NumThreadsArg =
1624 NumThreads ? Builder.CreateZExtOrTrunc(NumThreads, OMPIRBuilder->Int32)
1625 : Builder.getInt32(-1);
1635 Value *Parallel60CallArgs[] = {
1640 Builder.getInt32(-1),
1644 Builder.getInt64(NumCapturedVars),
1645 Builder.getInt32(0)};
1653 << *Builder.GetInsertBlock()->getParent() <<
"\n");
1656 Builder.SetInsertPoint(PrivTID);
1658 Builder.CreateStore(Builder.CreateLoad(OMPIRBuilder->Int32, OutlinedAI),
1665 I->eraseFromParent();
1688 if (!
F->hasMetadata(LLVMContext::MD_callback)) {
1696 F->addMetadata(LLVMContext::MD_callback,
1705 OutlinedFn.
addFnAttr(Attribute::NoUnwind);
1708 "Expected at least tid and bounded tid as arguments");
1709 unsigned NumCapturedVars = OutlinedFn.
arg_size() - 2;
1712 CI->
getParent()->setName(
"omp_parallel");
1713 Builder.SetInsertPoint(CI);
1716 Value *ForkCallArgs[] = {Ident, Builder.getInt32(NumCapturedVars),
1720 RealArgs.
append(std::begin(ForkCallArgs), std::end(ForkCallArgs));
1722 Value *
Cond = Builder.CreateSExtOrTrunc(IfCondition, OMPIRBuilder->Int32);
1729 auto PtrTy = OMPIRBuilder->VoidPtr;
1730 if (IfCondition && NumCapturedVars == 0) {
1738 << *Builder.GetInsertBlock()->getParent() <<
"\n");
1741 Builder.SetInsertPoint(PrivTID);
1743 Builder.CreateStore(Builder.CreateLoad(OMPIRBuilder->Int32, OutlinedAI),
1750 I->eraseFromParent();
1758 Value *NumThreads, omp::ProcBindKind ProcBind,
bool IsCancellable) {
1767 const bool NeedThreadID = NumThreads ||
Config.isTargetDevice() ||
1768 (ProcBind != OMP_PROC_BIND_default);
1775 bool ArgsInZeroAddressSpace =
Config.isTargetDevice();
1779 if (NumThreads && !
Config.isTargetDevice()) {
1782 Builder.CreateIntCast(NumThreads, Int32,
false)};
1787 if (ProcBind != OMP_PROC_BIND_default) {
1791 ConstantInt::get(Int32,
unsigned(ProcBind),
true)};
1813 Builder.CreateAlloca(Int32,
nullptr,
"zero.addr");
1816 if (ArgsInZeroAddressSpace &&
M.getDataLayout().getAllocaAddrSpace() != 0) {
1819 TIDAddrAlloca, PointerType ::get(
M.getContext(), 0),
"tid.addr.ascast");
1823 PointerType ::get(
M.getContext(), 0),
1824 "zero.addr.ascast");
1848 if (IP.getBlock()->end() == IP.getPoint()) {
1854 assert(IP.getBlock()->getTerminator()->getNumSuccessors() == 1 &&
1855 IP.getBlock()->getTerminator()->getSuccessor(0) == PRegExitBB &&
1856 "Unexpected insertion point for finalization call!");
1868 Builder.CreateAlloca(Int32,
nullptr,
"tid.addr.local");
1874 Builder.CreateLoad(Int32, ZeroAddr,
"zero.addr.use");
1892 LLVM_DEBUG(
dbgs() <<
"Before body codegen: " << *OuterFn <<
"\n");
1895 assert(BodyGenCB &&
"Expected body generation callback!");
1897 if (
Error Err = BodyGenCB(InnerAllocaIP, CodeGenIP, PRegExitBB))
1900 LLVM_DEBUG(
dbgs() <<
"After body codegen: " << *OuterFn <<
"\n");
1904 bool UsesDeviceSharedMemory =
1906 std::unique_ptr<OutlineInfo> OI =
1907 UsesDeviceSharedMemory
1908 ? std::make_unique<DeviceSharedMemOutlineInfo>(*
this)
1909 : std::make_unique<OutlineInfo>();
1911 if (
Config.isTargetDevice()) {
1913 OI->PostOutlineCB = [=, ToBeDeletedVec =
1914 std::move(ToBeDeleted)](
Function &OutlinedFn) {
1916 IfCondition, NumThreads, PrivTID, PrivTIDAddr,
1917 ThreadID, ToBeDeletedVec);
1921 OI->PostOutlineCB = [=, ToBeDeletedVec =
1922 std::move(ToBeDeleted)](
Function &OutlinedFn) {
1924 PrivTID, PrivTIDAddr, ToBeDeletedVec);
1928 OI->FixUpNonEntryAllocas =
true;
1929 OI->OuterAllocBB = OuterAllocaBlock;
1930 OI->EntryBB = PRegEntryBB;
1931 OI->ExitBB = PRegExitBB;
1932 OI->OuterDeallocBBs.reserve(OuterDeallocBlocks.
size());
1933 copy(OuterDeallocBlocks, OI->OuterDeallocBBs.
end());
1937 OI->collectBlocks(ParallelRegionBlockSet, Blocks);
1949 ".omp_par", ArgsInZeroAddressSpace);
1954 Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
1956 Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands,
1961 return GV->getValueType() == OpenMPIRBuilder::Ident;
1966 LLVM_DEBUG(
dbgs() <<
"Before privatization: " << *OuterFn <<
"\n");
1972 if (&V == TIDAddr || &V == ZeroAddr) {
1973 OI->ExcludeArgsFromAggregate.push_back(&V);
1978 for (
Use &U : V.uses())
1980 if (ParallelRegionBlockSet.
count(UserI->getParent()))
1990 if (!V.getType()->isPointerTy()) {
1994 Builder.restoreIP(OuterAllocIP);
1996 if (UsesDeviceSharedMemory) {
1999 V.getName() +
".reloaded");
2000 for (
BasicBlock *DeallocBlock : OuterDeallocBlocks)
2002 InsertPointTy(DeallocBlock, DeallocBlock->getFirstInsertionPt()),
2005 Ptr =
Builder.CreateAlloca(V.getType(),
nullptr,
2006 V.getName() +
".reloaded");
2011 Builder.SetInsertPoint(InsertBB,
2016 Builder.restoreIP(InnerAllocaIP);
2017 Inner =
Builder.CreateLoad(V.getType(), Ptr);
2020 Value *ReplacementValue =
nullptr;
2023 ReplacementValue = PrivTID;
2026 PrivCB(InnerAllocaIP,
Builder.saveIP(), V, *Inner, ReplacementValue);
2034 assert(ReplacementValue &&
2035 "Expected copy/create callback to set replacement value!");
2036 if (ReplacementValue == &V)
2041 UPtr->set(ReplacementValue);
2066 for (
Value *Output : Outputs)
2070 "OpenMP outlining should not produce live-out values!");
2072 LLVM_DEBUG(
dbgs() <<
"After privatization: " << *OuterFn <<
"\n");
2074 for (
auto *BB : Blocks)
2075 dbgs() <<
" PBR: " << BB->getName() <<
"\n";
2083 assert(FiniInfo.DK == OMPD_parallel &&
2084 "Unexpected finalization stack state!");
2095 Builder.CreateBr(*FiniBBOrErr);
2099 Term->eraseFromParent();
2105 InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end());
2106 UI->eraseFromParent();
2169 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr));
2171 Builder.CreateStore(DepValPtr, Addr);
2174 DependInfo, Entry,
static_cast<unsigned int>(RTLDependInfoFields::Len));
2176 ConstantInt::get(SizeTy,
2181 DependInfo, Entry,
static_cast<unsigned int>(RTLDependInfoFields::Flags));
2183 static_cast<unsigned int>(Dep.
DepKind)),
2196 if (Dependencies.
empty())
2216 Type *DependInfo = OMPBuilder.DependInfo;
2218 Value *DepArray =
nullptr;
2220 Builder.SetInsertPoint(
2224 DepArray = Builder.CreateAlloca(DepArrayTy,
nullptr,
".dep.arr.addr");
2226 Builder.restoreIP(OldIP);
2228 for (
const auto &[DepIdx, Dep] :
enumerate(Dependencies)) {
2230 Builder.CreateConstInBoundsGEP2_64(DepArrayTy, DepArray, 0, DepIdx);
2237Expected<Value *> OpenMPIRBuilder::createTaskDuplicationFunction(
2239 unsigned ProgramAddressSpace =
M.getDataLayout().getProgramAddressSpace();
2254 Builder.getVoidTy(), {VoidPtrTy, VoidPtrTy, Builder.getInt32Ty()},
2258 "omp_taskloop_dup",
M);
2261 Value *LastprivateFlagArg = DupFunction->
getArg(2);
2262 DestTaskArg->
setName(
"dest_task");
2263 SrcTaskArg->
setName(
"src_task");
2264 LastprivateFlagArg->
setName(
"lastprivate_flag");
2266 IRBuilderBase::InsertPointGuard Guard(
Builder);
2270 auto GetTaskContextPtrFromArg = [&](
Value *Arg) ->
Value * {
2271 Type *TaskWithPrivatesTy =
2274 TaskWithPrivatesTy, Arg, {
Builder.getInt32(0),
Builder.getInt32(1)});
2276 PrivatesTy, TaskPrivates,
2281 Value *DestTaskContextPtr = GetTaskContextPtrFromArg(DestTaskArg);
2282 Value *SrcTaskContextPtr = GetTaskContextPtrFromArg(SrcTaskArg);
2284 DestTaskContextPtr->
setName(
"destPtr");
2285 SrcTaskContextPtr->
setName(
"srcPtr");
2290 Expected<IRBuilderBase::InsertPoint> AfterIPOrError =
2291 DupCB(AllocaIP, CodeGenIP, DestTaskContextPtr, SrcTaskContextPtr);
2292 if (!AfterIPOrError)
2294 Builder.restoreIP(*AfterIPOrError);
2304 llvm::function_ref<llvm::Expected<llvm::CanonicalLoopInfo *>()> LoopInfo,
2306 Value *GrainSize,
bool NoGroup,
int Sched,
Value *Final,
bool Mergeable,
2308 Value *TaskContextStructPtrVal) {
2313 uint32_t SrcLocStrSize;
2329 if (
Error Err = BodyGenCB(TaskloopAllocaIP, TaskloopBodyIP, TaskloopExitBB))
2332 llvm::Expected<llvm::CanonicalLoopInfo *> result = LoopInfo();
2337 llvm::CanonicalLoopInfo *CLI = result.
get();
2338 auto OI = std::make_unique<OutlineInfo>();
2339 OI->EntryBB = TaskloopAllocaBB;
2340 OI->OuterAllocBB = AllocaIP.getBlock();
2341 OI->ExitBB = TaskloopExitBB;
2342 OI->OuterDeallocBBs.reserve(DeallocBlocks.
size());
2343 copy(DeallocBlocks, OI->OuterDeallocBBs.end());
2349 Builder, AllocaIP, ToBeDeleted, TaskloopAllocaIP,
"global.tid",
false));
2351 TaskloopAllocaIP,
"lb",
false,
true);
2353 TaskloopAllocaIP,
"ub",
false,
true);
2355 TaskloopAllocaIP,
"step",
false,
true);
2358 OI->Inputs.insert(FakeLB);
2359 OI->Inputs.insert(FakeUB);
2360 OI->Inputs.insert(FakeStep);
2361 if (TaskContextStructPtrVal)
2362 OI->Inputs.insert(TaskContextStructPtrVal);
2363 assert(((TaskContextStructPtrVal && DupCB) ||
2364 (!TaskContextStructPtrVal && !DupCB)) &&
2365 "Task context struct ptr and duplication callback must be both set "
2371 unsigned ProgramAddressSpace =
M.getDataLayout().getProgramAddressSpace();
2375 {FakeLB->getType(), FakeUB->getType(), FakeStep->getType(), PointerTy});
2376 Expected<Value *> TaskDupFnOrErr = createTaskDuplicationFunction(
2379 if (!TaskDupFnOrErr) {
2382 Value *TaskDupFn = *TaskDupFnOrErr;
2384 OI->PostOutlineCB = [
this, Ident, LBVal, UBVal, StepVal, Untied,
2385 TaskloopAllocaBB, CLI, Loc, TaskDupFn, ToBeDeleted,
2386 IfCond, GrainSize, NoGroup, Sched, FakeLB, FakeUB,
2387 FakeStep, FakeSharedsTy, Final, Mergeable, Priority,
2388 NumOfCollapseLoops](
Function &OutlinedFn)
mutable {
2390 assert(OutlinedFn.hasOneUse() &&
2391 "there must be a single user for the outlined function");
2398 Value *CastedLBVal =
2399 Builder.CreateIntCast(LBVal,
Builder.getInt64Ty(),
true,
"lb64");
2400 Value *CastedUBVal =
2401 Builder.CreateIntCast(UBVal,
Builder.getInt64Ty(),
true,
"ub64");
2402 Value *CastedStepVal =
2403 Builder.CreateIntCast(StepVal,
Builder.getInt64Ty(),
true,
"step64");
2405 Builder.SetInsertPoint(StaleCI);
2418 Builder.CreateCall(TaskgroupFn, {Ident, ThreadID});
2439 divideCeil(
M.getDataLayout().getTypeSizeInBits(Task), 8));
2441 AllocaInst *ArgStructAlloca =
2443 assert(ArgStructAlloca &&
2444 "Unable to find the alloca instruction corresponding to arguments "
2445 "for extracted function");
2446 std::optional<TypeSize> ArgAllocSize =
2449 "Unable to determine size of arguments for extracted function");
2450 Value *SharedsSize =
Builder.getInt64(ArgAllocSize->getFixedValue());
2455 CallInst *TaskData =
Builder.CreateCall(
2456 TaskAllocFn, {Ident, ThreadID,
Flags,
2457 TaskSize, SharedsSize,
2462 Value *TaskShareds =
Builder.CreateLoad(VoidPtr, TaskData);
2463 Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment,
2468 FakeSharedsTy, TaskShareds, {
Builder.getInt32(0),
Builder.getInt32(0)});
2471 FakeSharedsTy, TaskShareds, {
Builder.getInt32(0),
Builder.getInt32(1)});
2474 FakeSharedsTy, TaskShareds, {
Builder.getInt32(0),
Builder.getInt32(2)});
2480 IfCond ?
Builder.CreateIntCast(IfCond,
Builder.getInt32Ty(),
true)
2486 Value *GrainSizeVal =
2487 GrainSize ?
Builder.CreateIntCast(GrainSize,
Builder.getInt64Ty(),
true)
2489 Value *TaskDup = TaskDupFn;
2491 Value *
Args[] = {Ident, ThreadID, TaskData, IfCondVal, Lb, Ub,
2492 Loadstep, NoGroupVal, SchedVal, GrainSizeVal, TaskDup};
2497 Builder.CreateCall(TaskloopFn, Args);
2504 Builder.CreateCall(EndTaskgroupFn, {Ident, ThreadID});
2509 Builder.SetInsertPoint(TaskloopAllocaBB, TaskloopAllocaBB->begin());
2511 LoadInst *SharedsOutlined =
2512 Builder.CreateLoad(VoidPtr, OutlinedFn.getArg(1));
2513 OutlinedFn.getArg(1)->replaceUsesWithIf(
2515 [SharedsOutlined](Use &U) {
return U.getUser() != SharedsOutlined; });
2518 Type *IVTy =
IV->getType();
2524 Value *TaskLB =
nullptr;
2525 Value *TaskUB =
nullptr;
2526 Value *TaskStep =
nullptr;
2527 Value *LoadTaskLB =
nullptr;
2528 Value *LoadTaskUB =
nullptr;
2529 Value *LoadTaskStep =
nullptr;
2530 for (Instruction &
I : *TaskloopAllocaBB) {
2531 if (
I.getOpcode() == Instruction::GetElementPtr) {
2534 switch (CI->getZExtValue()) {
2546 }
else if (
I.getOpcode() == Instruction::Load) {
2548 if (
Load.getPointerOperand() == TaskLB) {
2549 assert(TaskLB !=
nullptr &&
"Expected value for TaskLB");
2551 }
else if (
Load.getPointerOperand() == TaskUB) {
2552 assert(TaskUB !=
nullptr &&
"Expected value for TaskUB");
2554 }
else if (
Load.getPointerOperand() == TaskStep) {
2555 assert(TaskStep !=
nullptr &&
"Expected value for TaskStep");
2561 Builder.SetInsertPoint(CLI->getPreheader()->getTerminator());
2563 assert(LoadTaskLB !=
nullptr &&
"Expected value for LoadTaskLB");
2564 assert(LoadTaskUB !=
nullptr &&
"Expected value for LoadTaskUB");
2565 assert(LoadTaskStep !=
nullptr &&
"Expected value for LoadTaskStep");
2567 Builder.CreateSub(LoadTaskUB, LoadTaskLB), LoadTaskStep);
2568 Value *TripCount =
Builder.CreateAdd(TripCountMinusOne, One,
"trip_cnt");
2569 Value *CastedTripCount =
Builder.CreateIntCast(TripCount, IVTy,
true);
2570 Value *CastedTaskLB =
Builder.CreateIntCast(LoadTaskLB, IVTy,
true);
2572 CLI->setTripCount(CastedTripCount);
2574 Builder.SetInsertPoint(CLI->getBody(),
2575 CLI->getBody()->getFirstInsertionPt());
2577 if (NumOfCollapseLoops > 1) {
2583 Builder.CreateSub(CastedTaskLB, ConstantInt::get(IVTy, 1)));
2586 for (
auto IVUse = CLI->getIndVar()->uses().begin();
2587 IVUse != CLI->getIndVar()->uses().end(); IVUse++) {
2588 User *IVUser = IVUse->getUser();
2590 if (
Op->getOpcode() == Instruction::URem ||
2591 Op->getOpcode() == Instruction::UDiv) {
2596 for (User *User : UsersToReplace) {
2597 User->replaceUsesOfWith(CLI->getIndVar(), IVPlusTaskLB);
2614 assert(CLI->getIndVar()->getNumUses() == 3 &&
2615 "Canonical loop should have exactly three uses of the ind var");
2616 for (User *IVUser : CLI->getIndVar()->users()) {
2618 if (
Mul->getOpcode() == Instruction::Mul) {
2619 for (User *MulUser :
Mul->users()) {
2621 if (
Add->getOpcode() == Instruction::Add) {
2622 Add->setOperand(1, CastedTaskLB);
2631 FakeLB->replaceAllUsesWith(CastedLBVal);
2632 FakeUB->replaceAllUsesWith(CastedUBVal);
2633 FakeStep->replaceAllUsesWith(CastedStepVal);
2635 I->eraseFromParent();
2640 Builder.SetInsertPoint(TaskloopExitBB, TaskloopExitBB->
begin());
2646 M.getContext(),
M.getDataLayout().getPointerSizeInBits());
2656 bool Mergeable,
Value *EventHandle,
Value *Priority) {
2688 if (
Error Err = BodyGenCB(TaskAllocaIP, TaskBodyIP, TaskExitBB))
2691 auto OI = std::make_unique<OutlineInfo>();
2692 OI->EntryBB = TaskAllocaBB;
2693 OI->OuterAllocBB = AllocaIP.
getBlock();
2694 OI->ExitBB = TaskExitBB;
2695 OI->OuterDeallocBBs.reserve(DeallocBlocks.
size());
2696 copy(DeallocBlocks, OI->OuterDeallocBBs.
end());
2701 Builder, AllocaIP, ToBeDeleted, TaskAllocaIP,
"global.tid",
false));
2703 OI->PostOutlineCB = [
this, Ident, Tied, Final, IfCondition, Dependencies,
2704 Affinities, Mergeable, Priority, EventHandle,
2706 ToBeDeleted](
Function &OutlinedFn)
mutable {
2708 assert(OutlinedFn.hasOneUse() &&
2709 "there must be a single user for the outlined function");
2714 bool HasShareds = StaleCI->
arg_size() > 1;
2715 Builder.SetInsertPoint(StaleCI);
2740 bool UseMergedIf0Path = ConstIfCondition && ConstIfCondition->isZero();
2744 Flags =
Builder.CreateOr(FinalFlag, Flags);
2747 if (Mergeable || UseMergedIf0Path)
2759 divideCeil(
M.getDataLayout().getTypeSizeInBits(Task), 8));
2768 assert(ArgStructAlloca &&
2769 "Unable to find the alloca instruction corresponding to arguments "
2770 "for extracted function");
2771 std::optional<TypeSize> ArgAllocSize =
2774 "Unable to determine size of arguments for extracted function");
2775 SharedsSize =
Builder.getInt64(ArgAllocSize->getFixedValue());
2781 TaskAllocFn, {Ident, ThreadID, Flags,
2782 TaskSize, SharedsSize,
2785 if (Affinities.
Count && Affinities.
Info) {
2787 OMPRTL___kmpc_omp_reg_task_with_affinity);
2798 OMPRTL___kmpc_task_allow_completion_event);
2802 Builder.CreatePointerBitCastOrAddrSpaceCast(EventHandle,
2804 EventVal =
Builder.CreatePtrToInt(EventVal,
Builder.getInt64Ty());
2805 Builder.CreateStore(EventVal, EventHandleAddr);
2811 Value *TaskShareds =
Builder.CreateLoad(VoidPtr, TaskData);
2812 Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment,
2830 Builder.CreateInBoundsGEP(TaskPtr, TaskData, {Zero, Zero});
2833 VoidPtr, VoidPtr,
Builder.getInt32Ty(), VoidPtr, VoidPtr);
2835 TaskStructType, TaskGEP, {Zero, ConstantInt::get(
Int32Ty, 4)});
2838 Value *CmplrData =
Builder.CreateInBoundsGEP(CmplrStructType,
2839 PriorityData, {Zero, Zero});
2840 Builder.CreateStore(Priority, CmplrData);
2843 Value *DepArray =
nullptr;
2844 Value *NumDeps =
nullptr;
2847 NumDeps = Dependencies.
NumDeps;
2848 }
else if (!Dependencies.
Deps.empty()) {
2850 NumDeps =
Builder.getInt32(Dependencies.
Deps.size());
2870 if (IfCondition && !UseMergedIf0Path) {
2875 Builder.GetInsertPoint()->getParent()->getTerminator();
2876 Instruction *ThenTI = IfTerminator, *ElseTI =
nullptr;
2877 Builder.SetInsertPoint(IfTerminator);
2880 Builder.SetInsertPoint(ElseTI);
2887 {Ident, ThreadID, NumDeps, DepArray,
2888 ConstantInt::get(
Builder.getInt32Ty(), 0),
2903 Builder.SetInsertPoint(ThenTI);
2911 {Ident, ThreadID, TaskData, NumDeps, DepArray,
2912 ConstantInt::get(
Builder.getInt32Ty(), 0),
2923 Builder.SetInsertPoint(TaskAllocaBB, TaskAllocaBB->
begin());
2925 LoadInst *Shareds =
Builder.CreateLoad(VoidPtr, OutlinedFn.getArg(1));
2926 OutlinedFn.getArg(1)->replaceUsesWithIf(
2927 Shareds, [Shareds](
Use &U) {
return U.getUser() != Shareds; });
2931 I->eraseFromParent();
2935 Builder.SetInsertPoint(TaskExitBB, TaskExitBB->
begin());
2957 if (
Error Err = BodyGenCB(AllocaIP,
Builder.saveIP(), DeallocBlocks))
2960 Builder.SetInsertPoint(TaskgroupExitBB);
3003 unsigned CaseNumber = 0;
3004 for (
auto SectionCB : SectionCBs) {
3006 M.getContext(),
"omp_section_loop.body.case", CurFn,
Continue);
3008 Builder.SetInsertPoint(CaseBB);
3012 {CaseEndBr->getParent(), CaseEndBr->getIterator()}, {}))
3023 Value *LB = ConstantInt::get(I32Ty, 0);
3024 Value *UB = ConstantInt::get(I32Ty, SectionCBs.
size());
3025 Value *ST = ConstantInt::get(I32Ty, 1);
3027 Loc, LoopBodyGenCB, LB, UB, ST,
true,
false, AllocaIP,
"section_loop");
3032 applyStaticWorkshareLoop(
Loc.DL, *
LoopInfo, AllocaIP,
3033 WorksharingLoopType::ForStaticLoop, !IsNowait);
3039 assert(LoopFini &&
"Bad structure of static workshare loop finalization");
3043 assert(FiniInfo.DK == OMPD_sections &&
3044 "Unexpected finalization stack state!");
3045 if (
Error Err = FiniInfo.mergeFiniBB(
Builder, LoopFini))
3059 if (IP.getBlock()->end() != IP.getPoint())
3070 auto *CaseBB =
Loc.IP.getBlock();
3071 auto *CondBB = CaseBB->getSinglePredecessor()->getSinglePredecessor();
3072 auto *ExitBB = CondBB->getTerminator()->getSuccessor(1);
3078 Directive OMPD = Directive::OMPD_sections;
3081 return EmitOMPInlinedRegion(OMPD,
nullptr,
nullptr, BodyGenCB, FiniCBWrapper,
3092Value *OpenMPIRBuilder::getGPUThreadID() {
3095 OMPRTL___kmpc_get_hardware_thread_id_in_block),
3099Value *OpenMPIRBuilder::getGPUWarpSize() {
3104Value *OpenMPIRBuilder::getNVPTXWarpID() {
3105 unsigned LaneIDBits =
Log2_32(
Config.getGridValue().GV_Warp_Size);
3106 return Builder.CreateAShr(getGPUThreadID(), LaneIDBits,
"nvptx_warp_id");
3109Value *OpenMPIRBuilder::getNVPTXLaneID() {
3110 unsigned LaneIDBits =
Log2_32(
Config.getGridValue().GV_Warp_Size);
3111 assert(LaneIDBits < 32 &&
"Invalid LaneIDBits size in NVPTX device.");
3112 unsigned LaneIDMask = ~0
u >> (32u - LaneIDBits);
3113 return Builder.CreateAnd(getGPUThreadID(),
Builder.getInt32(LaneIDMask),
3120 uint64_t FromSize =
M.getDataLayout().getTypeStoreSize(FromType);
3121 uint64_t ToSize =
M.getDataLayout().getTypeStoreSize(ToType);
3122 assert(FromSize > 0 &&
"From size must be greater than zero");
3123 assert(ToSize > 0 &&
"To size must be greater than zero");
3124 if (FromType == ToType)
3126 if (FromSize == ToSize)
3127 return Builder.CreateBitCast(From, ToType);
3129 return Builder.CreateIntCast(From, ToType,
true);
3135 Value *ValCastItem =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3136 CastItem,
Builder.getPtrTy(0));
3137 Builder.CreateStore(From, ValCastItem);
3138 return Builder.CreateLoad(ToType, CastItem);
3145 uint64_t
Size =
M.getDataLayout().getTypeStoreSize(ElementType);
3146 assert(
Size <= 8 &&
"Unsupported bitwidth in shuffle instruction");
3150 Value *ElemCast = castValueToType(AllocaIP, Element, CastTy);
3152 Builder.CreateIntCast(getGPUWarpSize(),
Builder.getInt16Ty(),
true);
3154 Size <= 4 ? RuntimeFunction::OMPRTL___kmpc_shuffle_int32
3155 : RuntimeFunction::OMPRTL___kmpc_shuffle_int64);
3156 Value *WarpSizeCast =
3158 Value *ShuffleCall =
3160 return castValueToType(AllocaIP, ShuffleCall, CastTy);
3167 uint64_t
Size =
M.getDataLayout().getTypeStoreSize(ElemType);
3179 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
3180 Value *ElemPtr = DstAddr;
3181 Value *Ptr = SrcAddr;
3182 for (
unsigned IntSize = 8; IntSize >= 1; IntSize /= 2) {
3186 Ptr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3189 Builder.CreateGEP(ElemType, SrcAddr, {ConstantInt::get(IndexTy, 1)});
3190 ElemPtr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3194 if ((
Size / IntSize) > 1) {
3195 Value *PtrEnd =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3196 SrcAddrGEP,
Builder.getPtrTy());
3213 Builder.CreatePointerBitCastOrAddrSpaceCast(Ptr,
Builder.getPtrTy()));
3215 Builder.CreateICmpSGT(PtrDiff,
Builder.getInt64(IntSize - 1)), ThenBB,
3218 Value *Res = createRuntimeShuffleFunction(
3221 IntType, Ptr,
M.getDataLayout().getPrefTypeAlign(ElemType)),
3223 Builder.CreateAlignedStore(Res, ElemPtr,
3224 M.getDataLayout().getPrefTypeAlign(ElemType));
3226 Builder.CreateGEP(IntType, Ptr, {ConstantInt::get(IndexTy, 1)});
3227 Value *LocalElemPtr =
3228 Builder.CreateGEP(IntType, ElemPtr, {ConstantInt::get(IndexTy, 1)});
3234 Value *Res = createRuntimeShuffleFunction(
3235 AllocaIP,
Builder.CreateLoad(IntType, Ptr), IntType,
Offset);
3238 Res =
Builder.CreateTrunc(Res, ElemType);
3239 Builder.CreateStore(Res, ElemPtr);
3240 Ptr =
Builder.CreateGEP(IntType, Ptr, {ConstantInt::get(IndexTy, 1)});
3242 Builder.CreateGEP(IntType, ElemPtr, {ConstantInt::get(IndexTy, 1)});
3248Error OpenMPIRBuilder::emitReductionListCopy(
3253 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
3254 Value *RemoteLaneOffset = CopyOptions.RemoteLaneOffset;
3258 for (
auto En :
enumerate(ReductionInfos)) {
3260 Value *SrcElementAddr =
nullptr;
3261 AllocaInst *DestAlloca =
nullptr;
3262 Value *DestElementAddr =
nullptr;
3263 Value *DestElementPtrAddr =
nullptr;
3265 bool ShuffleInElement =
false;
3268 bool UpdateDestListPtr =
false;
3272 ReductionArrayTy, SrcBase,
3273 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3274 SrcElementAddr =
Builder.CreateLoad(
Builder.getPtrTy(), SrcElementPtrAddr);
3278 DestElementPtrAddr =
Builder.CreateInBoundsGEP(
3279 ReductionArrayTy, DestBase,
3280 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3281 bool IsByRefElem = (!IsByRef.
empty() && IsByRef[En.index()]);
3287 Type *DestAllocaType =
3288 IsByRefElem ? RI.ByRefAllocatedType : RI.ElementType;
3289 DestAlloca =
Builder.CreateAlloca(DestAllocaType,
nullptr,
3290 ".omp.reduction.element");
3292 M.getDataLayout().getPrefTypeAlign(DestAllocaType));
3293 DestElementAddr = DestAlloca;
3296 DestElementAddr->
getName() +
".ascast");
3298 ShuffleInElement =
true;
3299 UpdateDestListPtr =
true;
3311 if (ShuffleInElement) {
3312 Type *ShuffleType = RI.ElementType;
3313 Value *ShuffleSrcAddr = SrcElementAddr;
3314 Value *ShuffleDestAddr = DestElementAddr;
3315 AllocaInst *LocalStorage =
nullptr;
3318 assert(RI.ByRefElementType &&
"Expected by-ref element type to be set");
3319 assert(RI.ByRefAllocatedType &&
3320 "Expected by-ref allocated type to be set");
3325 ShuffleType = RI.ByRefElementType;
3327 if (RI.DataPtrPtrGen) {
3330 Builder.saveIP(), ShuffleSrcAddr, ShuffleSrcAddr);
3333 return GenResult.takeError();
3342 LocalStorage =
Builder.CreateAlloca(ShuffleType);
3344 ShuffleDestAddr = LocalStorage;
3349 ShuffleDestAddr = DestElementAddr;
3353 shuffleAndStore(AllocaIP, ShuffleSrcAddr, ShuffleDestAddr, ShuffleType,
3354 RemoteLaneOffset, ReductionArrayTy, IsByRefElem);
3356 if (IsByRefElem && RI.DataPtrPtrGen) {
3358 Value *DestDescriptorAddr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3359 DestAlloca,
Builder.getPtrTy(),
".ascast");
3362 DestDescriptorAddr, LocalStorage, SrcElementAddr,
3363 RI.ByRefAllocatedType, RI.DataPtrPtrGen);
3366 return GenResult.takeError();
3369 switch (RI.EvaluationKind) {
3371 Value *Elem =
Builder.CreateLoad(RI.ElementType, SrcElementAddr);
3373 Builder.CreateStore(Elem, DestElementAddr);
3377 Value *SrcRealPtr =
Builder.CreateConstInBoundsGEP2_32(
3378 RI.ElementType, SrcElementAddr, 0, 0,
".realp");
3380 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
3382 RI.ElementType, SrcElementAddr, 0, 1,
".imagp");
3384 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
3386 Value *DestRealPtr =
Builder.CreateConstInBoundsGEP2_32(
3387 RI.ElementType, DestElementAddr, 0, 0,
".realp");
3388 Value *DestImgPtr =
Builder.CreateConstInBoundsGEP2_32(
3389 RI.ElementType, DestElementAddr, 0, 1,
".imagp");
3390 Builder.CreateStore(SrcReal, DestRealPtr);
3391 Builder.CreateStore(SrcImg, DestImgPtr);
3396 M.getDataLayout().getTypeStoreSize(RI.ElementType));
3398 DestElementAddr,
M.getDataLayout().getPrefTypeAlign(RI.ElementType),
3399 SrcElementAddr,
M.getDataLayout().getPrefTypeAlign(RI.ElementType),
3411 if (UpdateDestListPtr) {
3412 Value *CastDestAddr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3413 DestElementAddr,
Builder.getPtrTy(),
3414 DestElementAddr->
getName() +
".ascast");
3415 Builder.CreateStore(CastDestAddr, DestElementPtrAddr);
3422Expected<Function *> OpenMPIRBuilder::emitInterWarpCopyFunction(
3426 LLVMContext &Ctx =
M.getContext();
3428 Builder.getVoidTy(), {Builder.getPtrTy(), Builder.getInt32Ty()},
3432 "_omp_reduction_inter_warp_copy_func", &
M);
3438 Builder.SetInsertPoint(EntryBB);
3455 StringRef TransferMediumName =
3456 "__openmp_nvptx_data_transfer_temporary_storage";
3457 GlobalVariable *TransferMedium =
M.getGlobalVariable(TransferMediumName);
3458 unsigned WarpSize =
Config.getGridValue().GV_Warp_Size;
3460 if (!TransferMedium) {
3461 TransferMedium =
new GlobalVariable(
3469 Value *GPUThreadID = getGPUThreadID();
3471 Value *LaneID = getNVPTXLaneID();
3473 Value *WarpID = getNVPTXWarpID();
3477 Builder.GetInsertBlock()->getFirstInsertionPt());
3481 AllocaInst *ReduceListAlloca =
Builder.CreateAlloca(
3482 Arg0Type,
nullptr, ReduceListArg->
getName() +
".addr");
3483 AllocaInst *NumWarpsAlloca =
3484 Builder.CreateAlloca(Arg1Type,
nullptr, NumWarpsArg->
getName() +
".addr");
3485 Value *ReduceListAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3486 ReduceListAlloca, Arg0Type, ReduceListAlloca->
getName() +
".ascast");
3487 Value *NumWarpsAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3488 NumWarpsAlloca,
Builder.getPtrTy(0),
3489 NumWarpsAlloca->
getName() +
".ascast");
3490 Builder.CreateStore(ReduceListArg, ReduceListAddrCast);
3491 Builder.CreateStore(NumWarpsArg, NumWarpsAddrCast);
3500 for (
auto En :
enumerate(ReductionInfos)) {
3506 bool IsByRefElem = !IsByRef.
empty() && IsByRef[En.index()];
3507 unsigned RealTySize =
M.getDataLayout().getTypeAllocSize(
3508 IsByRefElem ? RI.ByRefElementType : RI.ElementType);
3509 for (
unsigned TySize = 4; TySize > 0 && RealTySize > 0; TySize /= 2) {
3512 unsigned NumIters = RealTySize / TySize;
3515 Value *Cnt =
nullptr;
3516 Value *CntAddr =
nullptr;
3523 Builder.CreateAlloca(
Builder.getInt32Ty(),
nullptr,
".cnt.addr");
3525 CntAddr =
Builder.CreateAddrSpaceCast(CntAddr,
Builder.getPtrTy(),
3526 CntAddr->
getName() +
".ascast");
3538 Cnt, ConstantInt::get(
Builder.getInt32Ty(), NumIters));
3539 Builder.CreateCondBr(Cmp, BodyBB, ExitBB);
3546 omp::Directive::OMPD_unknown,
3550 return BarrierIP1.takeError();
3556 Value *IsWarpMaster =
Builder.CreateIsNull(LaneID,
"warp_master");
3557 Builder.CreateCondBr(IsWarpMaster, ThenBB, ElseBB);
3561 auto *RedListArrayTy =
3564 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
3566 Builder.CreateInBoundsGEP(RedListArrayTy, ReduceList,
3567 {ConstantInt::get(IndexTy, 0),
3568 ConstantInt::get(IndexTy, En.index())});
3572 if (IsByRefElem && RI.DataPtrPtrGen) {
3574 RI.DataPtrPtrGen(
Builder.saveIP(), ElemPtr, ElemPtr);
3577 return GenRes.takeError();
3588 ArrayTy, TransferMedium, {
Builder.getInt64(0), WarpID});
3593 Builder.CreateStore(Elem, MediumPtr,
3605 omp::Directive::OMPD_unknown,
3609 return BarrierIP2.takeError();
3616 Value *NumWarpsVal =
3619 Value *IsActiveThread =
3620 Builder.CreateICmpULT(GPUThreadID, NumWarpsVal,
"is_active_thread");
3621 Builder.CreateCondBr(IsActiveThread, W0ThenBB, W0ElseBB);
3628 ArrayTy, TransferMedium, {
Builder.getInt64(0), GPUThreadID});
3630 Value *TargetElemPtrPtr =
3631 Builder.CreateInBoundsGEP(RedListArrayTy, ReduceList,
3632 {ConstantInt::get(IndexTy, 0),
3633 ConstantInt::get(IndexTy, En.index())});
3634 Value *TargetElemPtrVal =
3636 Value *TargetElemPtr = TargetElemPtrVal;
3638 if (IsByRefElem && RI.DataPtrPtrGen) {
3640 RI.DataPtrPtrGen(
Builder.saveIP(), TargetElemPtr, TargetElemPtr);
3643 return GenRes.takeError();
3645 TargetElemPtr =
Builder.CreateLoad(
Builder.getPtrTy(), TargetElemPtr);
3653 Value *SrcMediumValue =
3654 Builder.CreateLoad(CType, SrcMediumPtrVal,
true);
3655 Builder.CreateStore(SrcMediumValue, TargetElemPtr);
3665 Cnt, ConstantInt::get(
Builder.getInt32Ty(), 1));
3666 Builder.CreateStore(Cnt, CntAddr,
false);
3668 auto *CurFn =
Builder.GetInsertBlock()->getParent();
3672 RealTySize %= TySize;
3682Expected<Function *> OpenMPIRBuilder::emitShuffleAndReduceFunction(
3685 LLVMContext &Ctx =
M.getContext();
3686 FunctionType *FuncTy =
3688 {Builder.getPtrTy(), Builder.getInt16Ty(),
3689 Builder.getInt16Ty(), Builder.getInt16Ty()},
3693 "_omp_reduction_shuffle_and_reduce_func", &
M);
3704 Builder.SetInsertPoint(EntryBB);
3715 Type *ReduceListArgType = ReduceListArg->
getType();
3719 ReduceListArgType,
nullptr, ReduceListArg->
getName() +
".addr");
3720 Value *LaneIdAlloca =
Builder.CreateAlloca(LaneIDArgType,
nullptr,
3721 LaneIDArg->
getName() +
".addr");
3723 LaneIDArgType,
nullptr, RemoteLaneOffsetArg->
getName() +
".addr");
3724 Value *AlgoVerAlloca =
Builder.CreateAlloca(LaneIDArgType,
nullptr,
3725 AlgoVerArg->
getName() +
".addr");
3732 RedListArrayTy,
nullptr,
".omp.reduction.remote_reduce_list");
3734 Value *ReduceListAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3735 ReduceListAlloca, ReduceListArgType,
3736 ReduceListAlloca->
getName() +
".ascast");
3737 Value *LaneIdAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3738 LaneIdAlloca, LaneIDArgPtrType, LaneIdAlloca->
getName() +
".ascast");
3739 Value *RemoteLaneOffsetAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3740 RemoteLaneOffsetAlloca, LaneIDArgPtrType,
3741 RemoteLaneOffsetAlloca->
getName() +
".ascast");
3742 Value *AlgoVerAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3743 AlgoVerAlloca, LaneIDArgPtrType, AlgoVerAlloca->
getName() +
".ascast");
3744 Value *RemoteListAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3745 RemoteReductionListAlloca,
Builder.getPtrTy(),
3746 RemoteReductionListAlloca->
getName() +
".ascast");
3748 Builder.CreateStore(ReduceListArg, ReduceListAddrCast);
3749 Builder.CreateStore(LaneIDArg, LaneIdAddrCast);
3750 Builder.CreateStore(RemoteLaneOffsetArg, RemoteLaneOffsetAddrCast);
3751 Builder.CreateStore(AlgoVerArg, AlgoVerAddrCast);
3753 Value *ReduceList =
Builder.CreateLoad(ReduceListArgType, ReduceListAddrCast);
3754 Value *LaneId =
Builder.CreateLoad(LaneIDArgType, LaneIdAddrCast);
3755 Value *RemoteLaneOffset =
3756 Builder.CreateLoad(LaneIDArgType, RemoteLaneOffsetAddrCast);
3757 Value *AlgoVer =
Builder.CreateLoad(LaneIDArgType, AlgoVerAddrCast);
3764 Error EmitRedLsCpRes = emitReductionListCopy(
3766 ReduceList, RemoteListAddrCast, IsByRef,
3767 {RemoteLaneOffset,
nullptr,
nullptr});
3770 return EmitRedLsCpRes;
3795 Value *LaneComp =
Builder.CreateICmpULT(LaneId, RemoteLaneOffset);
3800 Value *Algo2AndLaneIdComp =
Builder.CreateAnd(Algo2, LaneIdComp);
3801 Value *RemoteOffsetComp =
3803 Value *CondAlgo2 =
Builder.CreateAnd(Algo2AndLaneIdComp, RemoteOffsetComp);
3804 Value *CA0OrCA1 =
Builder.CreateOr(CondAlgo0, CondAlgo1);
3805 Value *CondReduce =
Builder.CreateOr(CA0OrCA1, CondAlgo2);
3811 Builder.CreateCondBr(CondReduce, ThenBB, ElseBB);
3813 Value *LocalReduceListPtr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3814 ReduceList,
Builder.getPtrTy());
3815 Value *RemoteReduceListPtr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3816 RemoteListAddrCast,
Builder.getPtrTy());
3818 ->addFnAttr(Attribute::NoUnwind);
3829 Value *LaneIdGtOffset =
Builder.CreateICmpUGE(LaneId, RemoteLaneOffset);
3830 Value *CondCopy =
Builder.CreateAnd(Algo1, LaneIdGtOffset);
3835 Builder.CreateCondBr(CondCopy, CpyThenBB, CpyElseBB);
3839 EmitRedLsCpRes = emitReductionListCopy(
3841 RemoteListAddrCast, ReduceList, IsByRef);
3844 return EmitRedLsCpRes;
3859OpenMPIRBuilder::generateReductionDescriptor(
3861 Type *DescriptorType,
3867 Value *DescriptorSize =
3868 Builder.getInt64(
M.getDataLayout().getTypeStoreSize(DescriptorType));
3870 DescriptorAddr,
M.getDataLayout().getPrefTypeAlign(DescriptorType),
3871 SrcDescriptorAddr,
M.getDataLayout().getPrefTypeAlign(DescriptorType),
3875 Value *DataPtrField;
3877 DataPtrPtrGen(
Builder.saveIP(), DescriptorAddr, DataPtrField);
3880 return GenResult.takeError();
3883 DataPtr,
Builder.getPtrTy(),
".ascast"),
3889Expected<Function *> OpenMPIRBuilder::emitListToGlobalCopyFunction(
3893 LLVMContext &Ctx =
M.getContext();
3896 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3900 "_omp_reduction_list_to_global_copy_func", &
M);
3907 Builder.SetInsertPoint(EntryBlock);
3917 BufferArg->
getName() +
".addr");
3921 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3922 Value *BufferArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3923 BufferArgAlloca,
Builder.getPtrTy(),
3924 BufferArgAlloca->
getName() +
".ascast");
3925 Value *IdxArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3926 IdxArgAlloca,
Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3927 Value *ReduceListArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
3928 ReduceListArgAlloca,
Builder.getPtrTy(),
3929 ReduceListArgAlloca->
getName() +
".ascast");
3931 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3932 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3933 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3935 Value *LocalReduceList =
3937 Value *BufferArgVal =
3941 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
3942 for (
auto En :
enumerate(ReductionInfos)) {
3944 auto *RedListArrayTy =
3948 RedListArrayTy, LocalReduceList,
3949 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3955 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferArgVal, Idxs);
3957 ReductionsBufferTy, BufferVD, 0, En.index());
3959 switch (RI.EvaluationKind) {
3961 Value *TargetElement;
3963 if (IsByRef.
empty() || !IsByRef[En.index()]) {
3964 TargetElement =
Builder.CreateLoad(RI.ElementType, ElemPtr);
3966 if (RI.DataPtrPtrGen) {
3968 RI.DataPtrPtrGen(
Builder.saveIP(), ElemPtr, ElemPtr);
3971 return GenResult.takeError();
3975 TargetElement =
Builder.CreateLoad(RI.ByRefElementType, ElemPtr);
3978 Builder.CreateStore(TargetElement, GlobVal);
3982 Value *SrcRealPtr =
Builder.CreateConstInBoundsGEP2_32(
3983 RI.ElementType, ElemPtr, 0, 0,
".realp");
3985 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
3987 RI.ElementType, ElemPtr, 0, 1,
".imagp");
3989 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
3991 Value *DestRealPtr =
Builder.CreateConstInBoundsGEP2_32(
3992 RI.ElementType, GlobVal, 0, 0,
".realp");
3993 Value *DestImgPtr =
Builder.CreateConstInBoundsGEP2_32(
3994 RI.ElementType, GlobVal, 0, 1,
".imagp");
3995 Builder.CreateStore(SrcReal, DestRealPtr);
3996 Builder.CreateStore(SrcImg, DestImgPtr);
4001 Builder.getInt64(
M.getDataLayout().getTypeStoreSize(RI.ElementType));
4003 GlobVal,
M.getDataLayout().getPrefTypeAlign(RI.ElementType), ElemPtr,
4004 M.getDataLayout().getPrefTypeAlign(RI.ElementType), SizeVal,
false);
4015Expected<Function *> OpenMPIRBuilder::emitListToGlobalReduceFunction(
4019 LLVMContext &Ctx =
M.getContext();
4022 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
4026 "_omp_reduction_list_to_global_reduce_func", &
M);
4033 Builder.SetInsertPoint(EntryBlock);
4043 BufferArg->
getName() +
".addr");
4047 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
4048 auto *RedListArrayTy =
4053 Value *LocalReduceList =
4054 Builder.CreateAlloca(RedListArrayTy,
nullptr,
".omp.reduction.red_list");
4058 Value *BufferArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4059 BufferArgAlloca,
Builder.getPtrTy(),
4060 BufferArgAlloca->
getName() +
".ascast");
4061 Value *IdxArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4062 IdxArgAlloca,
Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
4063 Value *ReduceListArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4064 ReduceListArgAlloca,
Builder.getPtrTy(),
4065 ReduceListArgAlloca->
getName() +
".ascast");
4066 Value *LocalReduceListAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4067 LocalReduceList,
Builder.getPtrTy(),
4068 LocalReduceList->
getName() +
".ascast");
4070 Builder.CreateStore(BufferArg, BufferArgAddrCast);
4071 Builder.CreateStore(IdxArg, IdxArgAddrCast);
4072 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
4077 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
4078 for (
auto En :
enumerate(ReductionInfos)) {
4081 Value *TargetElementPtrPtr =
Builder.CreateInBoundsGEP(
4082 RedListArrayTy, LocalReduceListAddrCast,
4083 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
4085 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
4087 Value *GlobValPtr =
Builder.CreateConstInBoundsGEP2_32(
4088 ReductionsBufferTy, BufferVD, 0, En.index());
4090 if (!IsByRef.
empty() && IsByRef[En.index()] && RI.DataPtrPtrGen) {
4094 Value *ByRefAlloc =
Builder.CreateAlloca(RI.ByRefAllocatedType);
4095 ByRefAlloc =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4096 ByRefAlloc,
Builder.getPtrTy(), ByRefAlloc->
getName() +
".ascast");
4103 Value *SrcElementPtrPtr =
4104 Builder.CreateInBoundsGEP(RedListArrayTy, ReduceList,
4105 {ConstantInt::get(IndexTy, 0),
4106 ConstantInt::get(IndexTy, En.index())});
4107 Value *SrcDescriptorAddr =
4112 generateReductionDescriptor(ByRefAlloc, GlobValPtr, SrcDescriptorAddr,
4113 RI.ByRefAllocatedType, RI.DataPtrPtrGen);
4116 return GenResult.takeError();
4118 Builder.CreateStore(ByRefAlloc, TargetElementPtrPtr);
4120 Builder.CreateStore(GlobValPtr, TargetElementPtrPtr);
4128 ->addFnAttr(Attribute::NoUnwind);
4134Expected<Function *> OpenMPIRBuilder::emitGlobalToListCopyFunction(
4138 LLVMContext &Ctx =
M.getContext();
4141 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
4145 "_omp_reduction_global_to_list_copy_func", &
M);
4152 Builder.SetInsertPoint(EntryBlock);
4162 BufferArg->
getName() +
".addr");
4166 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
4167 Value *BufferArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4168 BufferArgAlloca,
Builder.getPtrTy(),
4169 BufferArgAlloca->
getName() +
".ascast");
4170 Value *IdxArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4171 IdxArgAlloca,
Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
4172 Value *ReduceListArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4173 ReduceListArgAlloca,
Builder.getPtrTy(),
4174 ReduceListArgAlloca->
getName() +
".ascast");
4175 Builder.CreateStore(BufferArg, BufferArgAddrCast);
4176 Builder.CreateStore(IdxArg, IdxArgAddrCast);
4177 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
4179 Value *LocalReduceList =
4184 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
4185 for (
auto En :
enumerate(ReductionInfos)) {
4186 const OpenMPIRBuilder::ReductionInfo &RI = En.value();
4187 auto *RedListArrayTy =
4191 RedListArrayTy, LocalReduceList,
4192 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
4197 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
4198 Value *GlobValPtr =
Builder.CreateConstInBoundsGEP2_32(
4199 ReductionsBufferTy, BufferVD, 0, En.index());
4205 if (!IsByRef.
empty() && IsByRef[En.index()]) {
4212 return GenResult.takeError();
4218 Value *TargetElement =
Builder.CreateLoad(ElemType, GlobValPtr);
4219 Builder.CreateStore(TargetElement, ElemPtr);
4223 Value *SrcRealPtr =
Builder.CreateConstInBoundsGEP2_32(
4232 Value *DestRealPtr =
Builder.CreateConstInBoundsGEP2_32(
4234 Value *DestImgPtr =
Builder.CreateConstInBoundsGEP2_32(
4236 Builder.CreateStore(SrcReal, DestRealPtr);
4237 Builder.CreateStore(SrcImg, DestImgPtr);
4244 ElemPtr,
M.getDataLayout().getPrefTypeAlign(RI.
ElementType),
4245 GlobValPtr,
M.getDataLayout().getPrefTypeAlign(RI.
ElementType),
4257Expected<Function *> OpenMPIRBuilder::emitGlobalToListReduceFunction(
4261 LLVMContext &Ctx =
M.getContext();
4264 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
4268 "_omp_reduction_global_to_list_reduce_func", &
M);
4275 Builder.SetInsertPoint(EntryBlock);
4285 BufferArg->
getName() +
".addr");
4289 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
4295 Value *LocalReduceList =
4296 Builder.CreateAlloca(RedListArrayTy,
nullptr,
".omp.reduction.red_list");
4300 Value *BufferArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4301 BufferArgAlloca,
Builder.getPtrTy(),
4302 BufferArgAlloca->
getName() +
".ascast");
4303 Value *IdxArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4304 IdxArgAlloca,
Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
4305 Value *ReduceListArgAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4306 ReduceListArgAlloca,
Builder.getPtrTy(),
4307 ReduceListArgAlloca->
getName() +
".ascast");
4308 Value *ReductionList =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4309 LocalReduceList,
Builder.getPtrTy(),
4310 LocalReduceList->
getName() +
".ascast");
4312 Builder.CreateStore(BufferArg, BufferArgAddrCast);
4313 Builder.CreateStore(IdxArg, IdxArgAddrCast);
4314 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
4319 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
4320 for (
auto En :
enumerate(ReductionInfos)) {
4323 Value *TargetElementPtrPtr =
Builder.CreateInBoundsGEP(
4324 RedListArrayTy, ReductionList,
4325 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
4328 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
4329 Value *GlobValPtr =
Builder.CreateConstInBoundsGEP2_32(
4330 ReductionsBufferTy, BufferVD, 0, En.index());
4332 if (!IsByRef.
empty() && IsByRef[En.index()] && RI.DataPtrPtrGen) {
4336 Value *ByRefAlloc =
Builder.CreateAlloca(RI.ByRefAllocatedType);
4337 ByRefAlloc =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4338 ByRefAlloc,
Builder.getPtrTy(), ByRefAlloc->
getName() +
".ascast");
4343 Value *ReduceListVal =
4345 Value *SrcElementPtrPtr =
4346 Builder.CreateInBoundsGEP(RedListArrayTy, ReduceListVal,
4347 {ConstantInt::get(IndexTy, 0),
4348 ConstantInt::get(IndexTy, En.index())});
4349 Value *SrcDescriptorAddr =
4354 generateReductionDescriptor(ByRefAlloc, GlobValPtr, SrcDescriptorAddr,
4355 RI.ByRefAllocatedType, RI.DataPtrPtrGen);
4357 return GenResult.takeError();
4359 Builder.CreateStore(ByRefAlloc, TargetElementPtrPtr);
4361 Builder.CreateStore(GlobValPtr, TargetElementPtrPtr);
4369 ->addFnAttr(Attribute::NoUnwind);
4375std::string OpenMPIRBuilder::getReductionFuncName(StringRef Name)
const {
4376 std::string Suffix =
4378 return (Name + Suffix).str();
4381Expected<Function *> OpenMPIRBuilder::createReductionFunction(
4384 AttributeList FuncAttrs) {
4386 {Builder.getPtrTy(), Builder.getPtrTy()},
4388 std::string
Name = getReductionFuncName(ReducerName);
4397 Builder.SetInsertPoint(EntryBB);
4401 Value *LHSArrayPtr =
nullptr;
4402 Value *RHSArrayPtr =
nullptr;
4409 Builder.CreateAlloca(Arg0Type,
nullptr, Arg0->
getName() +
".addr");
4411 Builder.CreateAlloca(Arg1Type,
nullptr, Arg1->
getName() +
".addr");
4412 Value *LHSAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4413 LHSAlloca, Arg0Type, LHSAlloca->
getName() +
".ascast");
4414 Value *RHSAddrCast =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4415 RHSAlloca, Arg1Type, RHSAlloca->
getName() +
".ascast");
4416 Builder.CreateStore(Arg0, LHSAddrCast);
4417 Builder.CreateStore(Arg1, RHSAddrCast);
4418 LHSArrayPtr =
Builder.CreateLoad(Arg0Type, LHSAddrCast);
4419 RHSArrayPtr =
Builder.CreateLoad(Arg1Type, RHSAddrCast);
4423 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
4425 for (
auto En :
enumerate(ReductionInfos)) {
4428 RedArrayTy, RHSArrayPtr,
4429 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
4431 Value *RHSPtr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4432 RHSI8Ptr, RI.PrivateVariable->getType(),
4433 RHSI8Ptr->
getName() +
".ascast");
4436 RedArrayTy, LHSArrayPtr,
4437 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
4439 Value *LHSPtr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4440 LHSI8Ptr, RI.Variable->getType(), LHSI8Ptr->
getName() +
".ascast");
4449 if (!IsByRef.
empty() && !IsByRef[En.index()]) {
4450 LHS =
Builder.CreateLoad(RI.ElementType, LHSPtr);
4451 RHS =
Builder.CreateLoad(RI.ElementType, RHSPtr);
4458 return AfterIP.takeError();
4459 if (!
Builder.GetInsertBlock())
4460 return ReductionFunc;
4464 if (!IsByRef.
empty() && !IsByRef[En.index()])
4465 Builder.CreateStore(Reduced, LHSPtr);
4470 for (
auto En :
enumerate(ReductionInfos)) {
4471 unsigned Index = En.index();
4473 Value *LHSFixupPtr, *RHSFixupPtr;
4474 Builder.restoreIP(RI.ReductionGenClang(
4475 Builder.saveIP(), Index, &LHSFixupPtr, &RHSFixupPtr, ReductionFunc));
4480 LHSPtrs[Index], [ReductionFunc](
const Use &U) {
4485 RHSPtrs[Index], [ReductionFunc](
const Use &U) {
4499 return ReductionFunc;
4507 assert(RI.Variable &&
"expected non-null variable");
4508 assert(RI.PrivateVariable &&
"expected non-null private variable");
4509 assert((RI.ReductionGen || RI.ReductionGenClang) &&
4510 "expected non-null reduction generator callback");
4513 RI.Variable->getType() == RI.PrivateVariable->getType() &&
4514 "expected variables and their private equivalents to have the same "
4517 assert(RI.Variable->getType()->isPointerTy() &&
4518 "expected variables to be pointers");
4527 unsigned ReductionBufNum,
Value *SrcLocInfo) {
4541 if (ReductionInfos.
size() == 0)
4551 Builder.SetInsertPoint(InsertBlock, InsertBlock->
end());
4555 AttributeList FuncAttrs;
4556 AttrBuilder AttrBldr(Ctx);
4558 AttrBldr.addAttribute(Attr);
4559 AttrBldr.removeAttribute(Attribute::OptimizeNone);
4560 FuncAttrs = FuncAttrs.addFnAttributes(Ctx, AttrBldr);
4564 Builder.GetInsertBlock()->getParent()->getName(), ReductionInfos, IsByRef,
4566 if (!ReductionResult)
4568 Function *ReductionFunc = *ReductionResult;
4572 if (GridValue.has_value())
4573 Config.setGridValue(GridValue.value());
4588 Builder.getPtrTy(
M.getDataLayout().getProgramAddressSpace());
4592 Value *ReductionListAlloca =
4593 Builder.CreateAlloca(RedArrayTy,
nullptr,
".omp.reduction.red_list");
4594 Value *ReductionList =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4595 ReductionListAlloca, PtrTy, ReductionListAlloca->
getName() +
".ascast");
4598 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
4599 for (
auto En :
enumerate(ReductionInfos)) {
4602 RedArrayTy, ReductionList,
4603 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
4606 bool IsByRefElem = !IsByRef.
empty() && IsByRef[En.index()];
4611 Builder.CreatePointerBitCastOrAddrSpaceCast(PrivateVar, PtrTy);
4612 Builder.CreateStore(CastElem, ElemPtr);
4616 ReductionInfos, ReductionFunc, FuncAttrs, IsByRef);
4622 emitInterWarpCopyFunction(
Loc, ReductionInfos, FuncAttrs, IsByRef);
4628 Value *RL =
Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList, PtrTy);
4640 unsigned MaxDataSize = 0;
4642 for (
auto En :
enumerate(ReductionInfos)) {
4646 Type *RedTypeArg = (!IsByRef.
empty() && IsByRef[En.index()])
4647 ? En.value().ByRefElementType
4648 : En.value().ElementType;
4649 auto Size =
M.getDataLayout().getTypeStoreSize(RedTypeArg);
4650 if (
Size > MaxDataSize)
4654 Value *ReductionDataSize =
4655 Builder.getInt64(MaxDataSize * ReductionInfos.
size());
4656 if (!IsTeamsReduction) {
4657 Value *SarFuncCast =
4658 Builder.CreatePointerBitCastOrAddrSpaceCast(*SarFunc, FuncPtrTy);
4660 Builder.CreatePointerBitCastOrAddrSpaceCast(WcFunc, FuncPtrTy);
4661 Value *Args[] = {SrcLocInfo, ReductionDataSize, RL, SarFuncCast,
4664 RuntimeFunction::OMPRTL___kmpc_nvptx_parallel_reduce_nowait_v2);
4669 Ctx, ReductionTypeArgs,
"struct._globalized_locals_ty");
4671 RuntimeFunction::OMPRTL___kmpc_reduction_get_fixed_buffer);
4674 ReductionInfos, ReductionsBufferTy, FuncAttrs, IsByRef);
4679 ReductionInfos, ReductionFunc, ReductionsBufferTy, FuncAttrs, IsByRef);
4684 ReductionInfos, ReductionsBufferTy, FuncAttrs, IsByRef);
4689 ReductionInfos, ReductionFunc, ReductionsBufferTy, FuncAttrs, IsByRef);
4696 RedFixedBufferFn, {},
"_openmp_teams_reductions_buffer_$_$ptr");
4698 Value *Args3[] = {SrcLocInfo,
4699 KernelTeamsReductionPtr,
4700 Builder.getInt32(ReductionBufNum),
4711 RuntimeFunction::OMPRTL___kmpc_nvptx_teams_reduce_nowait_v2);
4728 for (
auto En :
enumerate(ReductionInfos)) {
4736 Value *LHSPtr, *RHSPtr;
4738 &LHSPtr, &RHSPtr, CurFunc));
4744 RedValue =
Builder.CreatePointerBitCastOrAddrSpaceCast(
4746 if (RHSPtr->
getType() != RHS->getType())
4748 Builder.CreatePointerBitCastOrAddrSpaceCast(RHS, RHSPtr->
getType());
4759 if (IsByRef.
empty() || !IsByRef[En.index()]) {
4761 "red.value." +
Twine(En.index()));
4772 if (!IsByRef.
empty() && !IsByRef[En.index()])
4777 if (ContinuationBlock) {
4778 Builder.CreateBr(ContinuationBlock);
4779 Builder.SetInsertPoint(ContinuationBlock);
4781 Config.setEmitLLVMUsed();
4792 ".omp.reduction.func", &M);
4802 Builder.SetInsertPoint(ReductionFuncBlock);
4803 Value *LHSArrayPtr =
nullptr;
4804 Value *RHSArrayPtr =
nullptr;
4815 Builder.CreateAlloca(Arg0Type,
nullptr, Arg0->
getName() +
".addr");
4817 Builder.CreateAlloca(Arg1Type,
nullptr, Arg1->
getName() +
".addr");
4818 Value *LHSAddrCast =
4819 Builder.CreatePointerBitCastOrAddrSpaceCast(LHSAlloca, Arg0Type);
4820 Value *RHSAddrCast =
4821 Builder.CreatePointerBitCastOrAddrSpaceCast(RHSAlloca, Arg1Type);
4822 Builder.CreateStore(Arg0, LHSAddrCast);
4823 Builder.CreateStore(Arg1, RHSAddrCast);
4824 LHSArrayPtr = Builder.CreateLoad(Arg0Type, LHSAddrCast);
4825 RHSArrayPtr = Builder.CreateLoad(Arg1Type, RHSAddrCast);
4827 LHSArrayPtr = ReductionFunc->
getArg(0);
4828 RHSArrayPtr = ReductionFunc->
getArg(1);
4831 unsigned NumReductions = ReductionInfos.
size();
4834 for (
auto En :
enumerate(ReductionInfos)) {
4836 Value *LHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
4837 RedArrayTy, LHSArrayPtr, 0, En.index());
4838 Value *LHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), LHSI8PtrPtr);
4839 Value *LHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
4842 Value *RHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
4843 RedArrayTy, RHSArrayPtr, 0, En.index());
4844 Value *RHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), RHSI8PtrPtr);
4845 Value *RHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
4854 Builder.restoreIP(*AfterIP);
4856 if (!Builder.GetInsertBlock())
4860 if (!IsByRef[En.index()])
4861 Builder.CreateStore(Reduced, LHSPtr);
4863 Builder.CreateRetVoid();
4870 bool IsNoWait,
bool IsTeamsReduction) {
4874 IsByRef, IsNoWait, IsTeamsReduction);
4881 if (ReductionInfos.
size() == 0)
4891 unsigned NumReductions = ReductionInfos.
size();
4894 Value *RedArray =
Builder.CreateAlloca(RedArrayTy,
nullptr,
"red.array");
4896 Builder.SetInsertPoint(InsertBlock, InsertBlock->
end());
4898 for (
auto En :
enumerate(ReductionInfos)) {
4899 unsigned Index = En.index();
4901 Value *RedArrayElemPtr =
Builder.CreateConstInBoundsGEP2_64(
4902 RedArrayTy, RedArray, 0, Index,
"red.array.elem." +
Twine(Index));
4909 M.getDataLayout(),
M.getDataLayout().getDefaultGlobalsAddressSpace());
4919 ? IdentFlag::OMP_IDENT_FLAG_ATOMIC_REDUCE
4924 unsigned RedArrayByteSize =
DL.getTypeStoreSize(RedArrayTy);
4925 Constant *RedArraySize = ConstantInt::get(IndexTy, RedArrayByteSize);
4927 Value *Lock = getOMPCriticalRegionLock(
".reduction");
4929 IsNoWait ? RuntimeFunction::OMPRTL___kmpc_reduce_nowait
4930 : RuntimeFunction::OMPRTL___kmpc_reduce);
4933 {Ident, ThreadId, NumVariables, RedArraySize,
4934 RedArray, ReductionFunc, Lock},
4945 Builder.CreateSwitch(ReduceCall, ContinuationBlock, 2);
4946 Switch->addCase(
Builder.getInt32(1), NonAtomicRedBlock);
4947 Switch->addCase(
Builder.getInt32(2), AtomicRedBlock);
4952 Builder.SetInsertPoint(NonAtomicRedBlock);
4953 for (
auto En :
enumerate(ReductionInfos)) {
4959 if (!IsByRef[En.index()]) {
4961 "red.value." +
Twine(En.index()));
4963 Value *PrivateRedValue =
4965 "red.private.value." +
Twine(En.index()));
4973 if (!
Builder.GetInsertBlock())
4976 if (!IsByRef[En.index()])
4980 IsNoWait ? RuntimeFunction::OMPRTL___kmpc_end_reduce_nowait
4981 : RuntimeFunction::OMPRTL___kmpc_end_reduce);
4983 Builder.CreateBr(ContinuationBlock);
4988 Builder.SetInsertPoint(AtomicRedBlock);
4989 if (CanGenerateAtomic &&
llvm::none_of(IsByRef, [](
bool P) {
return P; })) {
4996 if (!
Builder.GetInsertBlock())
4999 Builder.CreateBr(ContinuationBlock);
5012 if (!
Builder.GetInsertBlock())
5015 Builder.SetInsertPoint(ContinuationBlock);
5026 Directive OMPD = Directive::OMPD_master;
5031 Value *Args[] = {Ident, ThreadId};
5039 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
5050 Directive OMPD = Directive::OMPD_masked;
5056 Value *ArgsEnd[] = {Ident, ThreadId};
5064 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
5074 Call->setDoesNotThrow();
5089 bool IsInclusive,
ScanInfo *ScanRedInfo) {
5091 llvm::Error Err = emitScanBasedDirectiveDeclsIR(AllocaIP, ScanVars,
5092 ScanVarsType, ScanRedInfo);
5103 for (
size_t i = 0; i < ScanVars.
size(); i++) {
5106 Type *DestTy = ScanVarsType[i];
5107 Value *Val =
Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
5110 Builder.CreateStore(Src, Val);
5115 Builder.GetInsertBlock()->getParent());
5118 IV = ScanRedInfo->
IV;
5121 for (
size_t i = 0; i < ScanVars.
size(); i++) {
5124 Type *DestTy = ScanVarsType[i];
5126 Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
5128 Builder.CreateStore(Src, ScanVars[i]);
5142 Builder.GetInsertBlock()->getParent());
5147Error OpenMPIRBuilder::emitScanBasedDirectiveDeclsIR(
5151 Builder.restoreIP(AllocaIP);
5153 for (
size_t i = 0; i < ScanVars.
size(); i++) {
5155 Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
"vla");
5162 Builder.restoreIP(CodeGenIP);
5164 Builder.CreateAdd(ScanRedInfo->
Span, Builder.getInt32(1));
5165 for (
size_t i = 0; i < ScanVars.
size(); i++) {
5169 Value *Buff = Builder.CreateMalloc(IntPtrTy, ScanVarsType[i], Allocsize,
5170 AllocSpan,
nullptr,
"arr");
5171 Builder.CreateStore(Buff, (*(ScanRedInfo->
ScanBuffPtrs))[ScanVars[i]]);
5189 Builder.SetInsertPoint(
Builder.GetInsertBlock()->getTerminator());
5198Error OpenMPIRBuilder::emitScanBasedDirectiveFinalsIR(
5204 Value *PrivateVar = RedInfo.PrivateVariable;
5205 Value *OrigVar = RedInfo.Variable;
5209 Type *SrcTy = RedInfo.ElementType;
5214 Builder.CreateStore(Src, OrigVar);
5237 Builder.SetInsertPoint(
Builder.GetInsertBlock()->getTerminator());
5262 Builder.GetInsertBlock()->getModule(),
5269 Builder.GetInsertBlock()->getModule(),
5275 llvm::ConstantInt::get(ScanRedInfo->
Span->
getType(), 1));
5276 Builder.SetInsertPoint(InputBB);
5279 Builder.SetInsertPoint(LoopBB);
5295 Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
5297 Builder.SetInsertPoint(InnerLoopBB);
5301 Value *ReductionVal = RedInfo.PrivateVariable;
5304 Type *DestTy = RedInfo.ElementType;
5307 Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
5310 Builder.CreateInBoundsGEP(DestTy, Buff, OffsetIval,
"arrayOffset");
5315 RedInfo.ReductionGen(
Builder.saveIP(), LHS, RHS, Result);
5318 Builder.CreateStore(Result, LHSPtr);
5321 IVal, llvm::ConstantInt::get(
Builder.getInt32Ty(), 1));
5323 CmpI =
Builder.CreateICmpUGE(NextIVal, Pow2K);
5324 Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
5327 Counter, llvm::ConstantInt::get(Counter->
getType(), 1));
5333 Builder.CreateCondBr(Cmp, LoopBB, ExitBB);
5354 Error Err = emitScanBasedDirectiveFinalsIR(ReductionInfos, ScanRedInfo);
5361Error OpenMPIRBuilder::emitScanBasedDirectiveIR(
5373 Error Err = InputLoopGen();
5384 Error Err = ScanLoopGen(Builder.saveIP());
5391void OpenMPIRBuilder::createScanBBs(ScanInfo *ScanRedInfo) {
5428 Builder.SetInsertPoint(Preheader);
5431 Builder.SetInsertPoint(Header);
5432 PHINode *IndVarPHI =
Builder.CreatePHI(IndVarTy, 2,
"omp_" + Name +
".iv");
5433 IndVarPHI->
addIncoming(ConstantInt::get(IndVarTy, 0), Preheader);
5438 Builder.CreateICmpULT(IndVarPHI, TripCount,
"omp_" + Name +
".cmp");
5439 Builder.CreateCondBr(Cmp, Body, Exit);
5444 Builder.SetInsertPoint(Latch);
5446 "omp_" + Name +
".next",
true);
5457 CL->Header = Header;
5476 NextBB, NextBB, Name);
5508 Value *Start,
Value *Stop,
Value *Step,
bool IsSigned,
bool InclusiveStop,
5517 ComputeLoc, Start, Stop, Step, IsSigned, InclusiveStop, Name);
5518 ScanRedInfo->
Span = TripCount;
5524 ScanRedInfo->
IV =
IV;
5525 createScanBBs(ScanRedInfo);
5528 assert(Terminator->getNumSuccessors() == 1);
5529 BasicBlock *ContinueBlock = Terminator->getSuccessor(0);
5532 Builder.GetInsertBlock()->getParent());
5535 Builder.GetInsertBlock()->getParent());
5536 Builder.CreateBr(ContinueBlock);
5542 const auto &&InputLoopGen = [&]() ->
Error {
5544 Builder.saveIP(), BodyGen, Start, Stop, Step, IsSigned, InclusiveStop,
5545 ComputeIP, Name,
true, ScanRedInfo);
5549 Builder.restoreIP((*LoopInfo)->getAfterIP());
5555 InclusiveStop, ComputeIP, Name,
true, ScanRedInfo);
5559 Builder.restoreIP((*LoopInfo)->getAfterIP());
5563 Error Err = emitScanBasedDirectiveIR(InputLoopGen, ScanLoopGen, ScanRedInfo);
5571 bool IsSigned,
bool InclusiveStop,
const Twine &Name) {
5581 assert(IndVarTy == Stop->
getType() &&
"Stop type mismatch");
5582 assert(IndVarTy == Step->
getType() &&
"Step type mismatch");
5586 ConstantInt *Zero = ConstantInt::get(IndVarTy, 0);
5602 Incr =
Builder.CreateSelect(IsNeg,
Builder.CreateNeg(Step), Step);
5605 Span =
Builder.CreateSub(UB, LB,
"",
false,
true);
5609 Span =
Builder.CreateSub(Stop, Start,
"",
true);
5614 Value *CountIfLooping;
5615 if (InclusiveStop) {
5616 CountIfLooping =
Builder.CreateAdd(
Builder.CreateUDiv(Span, Incr), One);
5622 CountIfLooping =
Builder.CreateSelect(OneCmp, One, CountIfTwo);
5625 return Builder.CreateSelect(ZeroCmp, Zero, CountIfLooping,
5626 "omp_" + Name +
".tripcount");
5631 Value *Start,
Value *Stop,
Value *Step,
bool IsSigned,
bool InclusiveStop,
5638 ComputeLoc, Start, Stop, Step, IsSigned, InclusiveStop, Name);
5645 ScanRedInfo->
IV = IndVar;
5646 return BodyGenCB(
Builder.saveIP(), IndVar);
5652 Builder.getCurrentDebugLocation());
5663 unsigned Bitwidth = Ty->getIntegerBitWidth();
5666 M, omp::RuntimeFunction::OMPRTL___kmpc_dist_for_static_init_4u);
5669 M, omp::RuntimeFunction::OMPRTL___kmpc_dist_for_static_init_8u);
5679 unsigned Bitwidth = Ty->getIntegerBitWidth();
5682 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_4u);
5685 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_8u);
5693 assert(CLI->
isValid() &&
"Requires a valid canonical loop");
5695 "Require dedicated allocate IP");
5701 uint32_t SrcLocStrSize;
5705 case WorksharingLoopType::ForStaticLoop:
5706 Flag = OMP_IDENT_FLAG_WORK_LOOP;
5708 case WorksharingLoopType::DistributeStaticLoop:
5709 Flag = OMP_IDENT_FLAG_WORK_DISTRIBUTE;
5711 case WorksharingLoopType::DistributeForStaticLoop:
5712 Flag = OMP_IDENT_FLAG_WORK_DISTRIBUTE | OMP_IDENT_FLAG_WORK_LOOP;
5719 Type *IVTy =
IV->getType();
5720 FunctionCallee StaticInit =
5721 LoopType == WorksharingLoopType::DistributeForStaticLoop
5724 FunctionCallee StaticFini =
5728 Builder.SetInsertPoint(AllocaIP.getBlock()->getFirstNonPHIOrDbgOrAlloca());
5731 Value *PLastIter =
Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
5732 Value *PLowerBound =
Builder.CreateAlloca(IVTy,
nullptr,
"p.lowerbound");
5733 Value *PUpperBound =
Builder.CreateAlloca(IVTy,
nullptr,
"p.upperbound");
5734 Value *PStride =
Builder.CreateAlloca(IVTy,
nullptr,
"p.stride");
5743 Constant *One = ConstantInt::get(IVTy, 1);
5744 Builder.CreateStore(Zero, PLowerBound);
5746 Builder.CreateStore(UpperBound, PUpperBound);
5747 Builder.CreateStore(One, PStride);
5753 (LoopType == WorksharingLoopType::DistributeStaticLoop)
5754 ? OMPScheduleType::OrderedDistribute
5757 ConstantInt::get(I32Type,
static_cast<int>(SchedType));
5761 auto BuildInitCall = [LoopType, SrcLoc, ThreadNum, PLastIter, PLowerBound,
5762 PUpperBound, IVTy, PStride, One,
Zero, StaticInit,
5765 PLowerBound, PUpperBound});
5766 if (LoopType == WorksharingLoopType::DistributeForStaticLoop) {
5767 Value *PDistUpperBound =
5768 Builder.CreateAlloca(IVTy,
nullptr,
"p.distupperbound");
5769 Args.push_back(PDistUpperBound);
5774 BuildInitCall(SchedulingType,
Builder);
5775 if (HasDistSchedule &&
5776 LoopType != WorksharingLoopType::DistributeStaticLoop) {
5777 Constant *DistScheduleSchedType = ConstantInt::get(
5782 BuildInitCall(DistScheduleSchedType,
Builder);
5784 Value *LowerBound =
Builder.CreateLoad(IVTy, PLowerBound);
5785 Value *InclusiveUpperBound =
Builder.CreateLoad(IVTy, PUpperBound);
5786 Value *TripCountMinusOne =
Builder.CreateSub(InclusiveUpperBound, LowerBound);
5787 Value *TripCount =
Builder.CreateAdd(TripCountMinusOne, One);
5788 CLI->setTripCount(TripCount);
5794 CLI->mapIndVar([&](Instruction *OldIV) ->
Value * {
5798 return Builder.CreateAdd(OldIV, LowerBound);
5810 omp::Directive::OMPD_for,
false,
5813 return BarrierIP.takeError();
5840 Reachable.insert(
Block);
5850 Ctx, {
MDString::get(Ctx,
"llvm.loop.parallel_accesses"), AccessGroup}));
5854OpenMPIRBuilder::applyStaticChunkedWorkshareLoop(
5858 assert(CLI->
isValid() &&
"Requires a valid canonical loop");
5859 assert((ChunkSize || DistScheduleChunkSize) &&
"Chunk size is required");
5864 Type *IVTy =
IV->getType();
5866 "Max supported tripcount bitwidth is 64 bits");
5868 :
Type::getInt64Ty(Ctx);
5871 Constant *One = ConstantInt::get(InternalIVTy, 1);
5877 for (BasicBlock &BB : *
F)
5878 if (!BB.hasTerminator())
5879 UIs.
push_back(
new UnreachableInst(
F->getContext(), &BB));
5884 LoopInfo &&LI = LIA.
run(*
F,
FAM);
5885 for (Instruction *
I : UIs)
5886 I->eraseFromParent();
5889 if (ChunkSize || DistScheduleChunkSize)
5894 FunctionCallee StaticInit =
5896 FunctionCallee StaticFini =
5902 Value *PLastIter =
Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
5903 Value *PLowerBound =
5904 Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.lowerbound");
5905 Value *PUpperBound =
5906 Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.upperbound");
5907 Value *PStride =
Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.stride");
5916 ChunkSize ? ChunkSize : Zero, InternalIVTy,
"chunksize");
5917 Value *CastedDistScheduleChunkSize =
Builder.CreateZExtOrTrunc(
5918 DistScheduleChunkSize ? DistScheduleChunkSize : Zero, InternalIVTy,
5919 "distschedulechunksize");
5920 Value *CastedTripCount =
5921 Builder.CreateZExt(OrigTripCount, InternalIVTy,
"tripcount");
5924 ConstantInt::get(I32Type,
static_cast<int>(SchedType));
5926 ConstantInt::get(I32Type,
static_cast<int>(DistScheduleSchedType));
5927 Builder.CreateStore(Zero, PLowerBound);
5928 Value *OrigUpperBound =
Builder.CreateSub(CastedTripCount, One);
5929 Value *IsTripCountZero =
Builder.CreateICmpEQ(CastedTripCount, Zero);
5931 Builder.CreateSelect(IsTripCountZero, Zero, OrigUpperBound);
5932 Builder.CreateStore(UpperBound, PUpperBound);
5933 Builder.CreateStore(One, PStride);
5937 uint32_t SrcLocStrSize;
5940 if (DistScheduleSchedType != OMPScheduleType::None) {
5941 Flag |= OMP_IDENT_FLAG_WORK_DISTRIBUTE;
5946 auto BuildInitCall = [StaticInit, SrcLoc, ThreadNum, PLastIter, PLowerBound,
5947 PUpperBound, PStride, One,
5948 this](
Value *SchedulingType,
Value *ChunkSize,
5951 StaticInit, {SrcLoc, ThreadNum,
5952 SchedulingType, PLastIter,
5953 PLowerBound, PUpperBound,
5957 BuildInitCall(SchedulingType, CastedChunkSize,
Builder);
5958 if (DistScheduleSchedType != OMPScheduleType::None &&
5959 SchedType != OMPScheduleType::OrderedDistributeChunked &&
5960 SchedType != OMPScheduleType::OrderedDistribute) {
5964 BuildInitCall(DistSchedulingType, CastedDistScheduleChunkSize,
Builder);
5968 Value *FirstChunkStart =
5969 Builder.CreateLoad(InternalIVTy, PLowerBound,
"omp_firstchunk.lb");
5970 Value *FirstChunkStop =
5971 Builder.CreateLoad(InternalIVTy, PUpperBound,
"omp_firstchunk.ub");
5972 Value *FirstChunkEnd =
Builder.CreateAdd(FirstChunkStop, One);
5974 Builder.CreateSub(FirstChunkEnd, FirstChunkStart,
"omp_chunk.range");
5975 Value *NextChunkStride =
5976 Builder.CreateLoad(InternalIVTy, PStride,
"omp_dispatch.stride");
5980 Value *DispatchCounter;
5988 DispatchCounter = Counter;
5991 FirstChunkStart, CastedTripCount, NextChunkStride,
6014 Value *ChunkEnd =
Builder.CreateAdd(DispatchCounter, ChunkRange);
6015 Value *IsLastChunk =
6016 Builder.CreateICmpUGE(ChunkEnd, CastedTripCount,
"omp_chunk.is_last");
6017 Value *CountUntilOrigTripCount =
6018 Builder.CreateSub(CastedTripCount, DispatchCounter);
6020 IsLastChunk, CountUntilOrigTripCount, ChunkRange,
"omp_chunk.tripcount");
6021 Value *BackcastedChunkTC =
6022 Builder.CreateTrunc(ChunkTripCount, IVTy,
"omp_chunk.tripcount.trunc");
6023 CLI->setTripCount(BackcastedChunkTC);
6028 Value *BackcastedDispatchCounter =
6029 Builder.CreateTrunc(DispatchCounter, IVTy,
"omp_dispatch.iv.trunc");
6030 CLI->mapIndVar([&](Instruction *) ->
Value * {
6032 return Builder.CreateAdd(
IV, BackcastedDispatchCounter);
6045 return AfterIP.takeError();
6060static FunctionCallee
6063 unsigned Bitwidth = Ty->getIntegerBitWidth();
6066 case WorksharingLoopType::ForStaticLoop:
6069 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_loop_4u);
6072 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_loop_8u);
6074 case WorksharingLoopType::DistributeStaticLoop:
6077 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_static_loop_4u);
6080 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_static_loop_8u);
6082 case WorksharingLoopType::DistributeForStaticLoop:
6085 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_for_static_loop_4u);
6088 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_for_static_loop_8u);
6091 if (Bitwidth != 32 && Bitwidth != 64) {
6103 Function &LoopBodyFn,
bool NoLoop) {
6114 if (LoopType == WorksharingLoopType::DistributeStaticLoop) {
6115 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
6116 RealArgs.
push_back(ConstantInt::get(Builder.getInt8Ty(), 0));
6117 Builder.restoreIP({InsertBlock, std::prev(InsertBlock->
end())});
6122 M, omp::RuntimeFunction::OMPRTL_omp_get_num_threads);
6123 Builder.restoreIP({InsertBlock, std::prev(InsertBlock->
end())});
6127 Builder.CreateZExtOrTrunc(NumThreads, TripCountTy,
"num.threads.cast"));
6128 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
6129 if (LoopType == WorksharingLoopType::DistributeForStaticLoop) {
6130 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
6131 RealArgs.
push_back(ConstantInt::get(Builder.getInt8Ty(), NoLoop));
6133 RealArgs.
push_back(ConstantInt::get(Builder.getInt8Ty(), 0));
6157 Builder.restoreIP({Preheader, Preheader->
end()});
6160 Builder.CreateBr(CLI->
getExit());
6168 CleanUpInfo.
collectBlocks(RegionBlockSet, BlocksToBeRemoved);
6176 "Expected unique undroppable user of outlined function");
6178 assert(OutlinedFnCallInstruction &&
"Expected outlined function call");
6180 "Expected outlined function call to be located in loop preheader");
6182 if (OutlinedFnCallInstruction->
arg_size() > 1)
6189 LoopBodyArg, TripCount, OutlinedFn, NoLoop);
6191 for (
auto &ToBeDeletedItem : ToBeDeleted)
6192 ToBeDeletedItem->eraseFromParent();
6199 uint32_t SrcLocStrSize;
6203 case WorksharingLoopType::ForStaticLoop:
6204 Flag = OMP_IDENT_FLAG_WORK_LOOP;
6206 case WorksharingLoopType::DistributeStaticLoop:
6207 Flag = OMP_IDENT_FLAG_WORK_DISTRIBUTE;
6209 case WorksharingLoopType::DistributeForStaticLoop:
6210 Flag = OMP_IDENT_FLAG_WORK_DISTRIBUTE | OMP_IDENT_FLAG_WORK_LOOP;
6215 auto OI = std::make_unique<OutlineInfo>();
6220 SmallVector<Instruction *, 4> ToBeDeleted;
6222 OI->OuterAllocBB = AllocaIP.getBlock();
6245 SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet;
6247 OI->collectBlocks(ParallelRegionBlockSet, Blocks);
6249 CodeExtractorAnalysisCache CEAC(*OuterFn);
6250 CodeExtractor Extractor(Blocks,
6264 SetVector<Value *> SinkingCands, HoistingCands;
6268 Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
6275 for (
auto Use :
Users) {
6277 if (ParallelRegionBlockSet.
count(Inst->getParent())) {
6278 Inst->replaceUsesOfWith(CLI->
getIndVar(), NewLoopCntLoad);
6284 OI->ExcludeArgsFromAggregate.push_back(NewLoopCntLoad);
6291 OI->PostOutlineCB = [=, ToBeDeletedVec =
6292 std::move(ToBeDeleted)](
Function &OutlinedFn) {
6302 bool NeedsBarrier, omp::ScheduleKind SchedKind,
Value *ChunkSize,
6303 bool HasSimdModifier,
bool HasMonotonicModifier,
6304 bool HasNonmonotonicModifier,
bool HasOrderedClause,
6306 Value *DistScheduleChunkSize) {
6307 if (
Config.isTargetDevice())
6308 return applyWorkshareLoopTarget(
DL, CLI, AllocaIP, LoopType, NoLoop);
6310 SchedKind, ChunkSize, HasSimdModifier, HasMonotonicModifier,
6311 HasNonmonotonicModifier, HasOrderedClause, DistScheduleChunkSize);
6313 bool IsOrdered = (EffectiveScheduleType & OMPScheduleType::ModifierOrdered) ==
6314 OMPScheduleType::ModifierOrdered;
6316 if (HasDistSchedule) {
6317 DistScheduleSchedType = DistScheduleChunkSize
6318 ? OMPScheduleType::OrderedDistributeChunked
6319 : OMPScheduleType::OrderedDistribute;
6321 switch (EffectiveScheduleType & ~OMPScheduleType::ModifierMask) {
6322 case OMPScheduleType::BaseStatic:
6323 case OMPScheduleType::BaseDistribute:
6324 assert((!ChunkSize || !DistScheduleChunkSize) &&
6325 "No chunk size with static-chunked schedule");
6326 if (IsOrdered && !HasDistSchedule)
6327 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
6328 NeedsBarrier, ChunkSize);
6330 if (DistScheduleChunkSize)
6331 return applyStaticChunkedWorkshareLoop(
6332 DL, CLI, AllocaIP, NeedsBarrier, ChunkSize, EffectiveScheduleType,
6333 DistScheduleChunkSize, DistScheduleSchedType);
6334 return applyStaticWorkshareLoop(
DL, CLI, AllocaIP, LoopType, NeedsBarrier,
6337 case OMPScheduleType::BaseStaticChunked:
6338 case OMPScheduleType::BaseDistributeChunked:
6339 if (IsOrdered && !HasDistSchedule)
6340 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
6341 NeedsBarrier, ChunkSize);
6343 return applyStaticChunkedWorkshareLoop(
6344 DL, CLI, AllocaIP, NeedsBarrier, ChunkSize, EffectiveScheduleType,
6345 DistScheduleChunkSize, DistScheduleSchedType);
6347 case OMPScheduleType::BaseRuntime:
6348 case OMPScheduleType::BaseAuto:
6349 case OMPScheduleType::BaseGreedy:
6350 case OMPScheduleType::BaseBalanced:
6351 case OMPScheduleType::BaseSteal:
6352 case OMPScheduleType::BaseRuntimeSimd:
6354 "schedule type does not support user-defined chunk sizes");
6356 case OMPScheduleType::BaseGuidedSimd:
6357 case OMPScheduleType::BaseDynamicChunked:
6358 case OMPScheduleType::BaseGuidedChunked:
6359 case OMPScheduleType::BaseGuidedIterativeChunked:
6360 case OMPScheduleType::BaseGuidedAnalyticalChunked:
6361 case OMPScheduleType::BaseStaticBalancedChunked:
6362 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
6363 NeedsBarrier, ChunkSize);
6376 unsigned Bitwidth = Ty->getIntegerBitWidth();
6379 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_4u);
6382 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_8u);
6390static FunctionCallee
6392 unsigned Bitwidth = Ty->getIntegerBitWidth();
6395 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_4u);
6398 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_8u);
6405static FunctionCallee
6407 unsigned Bitwidth = Ty->getIntegerBitWidth();
6410 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_4u);
6413 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_8u);
6418OpenMPIRBuilder::applyDynamicWorkshareLoop(
DebugLoc DL, CanonicalLoopInfo *CLI,
6421 bool NeedsBarrier,
Value *Chunk) {
6422 assert(CLI->
isValid() &&
"Requires a valid canonical loop");
6424 "Require dedicated allocate IP");
6426 "Require valid schedule type");
6428 bool Ordered = (SchedType & OMPScheduleType::ModifierOrdered) ==
6429 OMPScheduleType::ModifierOrdered;
6434 uint32_t SrcLocStrSize;
6441 Type *IVTy =
IV->getType();
6446 Builder.SetInsertPoint(AllocaIP.getBlock()->getFirstNonPHIOrDbgOrAlloca());
6448 Value *PLastIter =
Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
6449 Value *PLowerBound =
Builder.CreateAlloca(IVTy,
nullptr,
"p.lowerbound");
6450 Value *PUpperBound =
Builder.CreateAlloca(IVTy,
nullptr,
"p.upperbound");
6451 Value *PStride =
Builder.CreateAlloca(IVTy,
nullptr,
"p.stride");
6460 Constant *One = ConstantInt::get(IVTy, 1);
6461 Builder.CreateStore(One, PLowerBound);
6463 Builder.CreateStore(UpperBound, PUpperBound);
6464 Builder.CreateStore(One, PStride);
6482 ConstantInt::get(I32Type,
static_cast<int>(SchedType));
6494 Builder.SetInsertPoint(OuterCond, OuterCond->getFirstInsertionPt());
6497 {SrcLoc, ThreadNum, PLastIter, PLowerBound, PUpperBound, PStride});
6498 Constant *Zero32 = ConstantInt::get(I32Type, 0);
6501 Builder.CreateSub(
Builder.CreateLoad(IVTy, PLowerBound), One,
"lb");
6502 Builder.CreateCondBr(MoreWork, Header, Exit);
6508 PI->setIncomingBlock(0, OuterCond);
6509 PI->setIncomingValue(0, LowerBound);
6514 Br->setSuccessor(OuterCond);
6520 UpperBound =
Builder.CreateLoad(IVTy, PUpperBound,
"ub");
6523 CI->setOperand(1, UpperBound);
6527 assert(BI->getSuccessor(1) == Exit);
6528 BI->setSuccessor(1, OuterCond);
6542 omp::Directive::OMPD_for,
false,
6545 return BarrierIP.takeError();
6564 auto HasRemainingUses = [&BBsToErase](
BasicBlock *BB) {
6569 if (BBsToErase.
count(UseInst->getParent()))
6576 while (BBsToErase.
remove_if(HasRemainingUses)) {
6587 assert(
Loops.size() >= 1 &&
"At least one loop required");
6588 size_t NumLoops =
Loops.size();
6592 return Loops.front();
6604 Loop->collectControlBlocks(OldControlBBs);
6608 if (ComputeIP.
isSet())
6615 Value *CollapsedTripCount =
nullptr;
6618 "All loops to collapse must be valid canonical loops");
6619 Value *OrigTripCount = L->getTripCount();
6620 if (!CollapsedTripCount) {
6621 CollapsedTripCount = OrigTripCount;
6626 CollapsedTripCount =
6627 Builder.CreateNUWMul(CollapsedTripCount, OrigTripCount);
6633 OrigPreheader->
getNextNode(), OrigAfter,
"collapsed");
6639 Builder.restoreIP(Result->getBodyIP());
6641 Value *Leftover = Result->getIndVar();
6643 NewIndVars.
resize(NumLoops);
6644 for (
int i = NumLoops - 1; i >= 1; --i) {
6645 Value *OrigTripCount =
Loops[i]->getTripCount();
6647 Value *NewIndVar =
Builder.CreateURem(Leftover, OrigTripCount);
6648 NewIndVars[i] = NewIndVar;
6650 Leftover =
Builder.CreateUDiv(Leftover, OrigTripCount);
6653 NewIndVars[0] = Leftover;
6662 BasicBlock *ContinueBlock = Result->getBody();
6664 auto ContinueWith = [&ContinueBlock, &ContinuePred,
DL](
BasicBlock *Dest,
6671 ContinueBlock =
nullptr;
6672 ContinuePred = NextSrc;
6679 for (
size_t i = 0; i < NumLoops - 1; ++i)
6680 ContinueWith(
Loops[i]->getBody(),
Loops[i + 1]->getHeader());
6686 for (
size_t i = NumLoops - 1; i > 0; --i)
6687 ContinueWith(
Loops[i]->getAfter(),
Loops[i - 1]->getLatch());
6690 ContinueWith(Result->getLatch(),
nullptr);
6697 for (
size_t i = 0; i < NumLoops; ++i)
6698 Loops[i]->getIndVar()->replaceAllUsesWith(NewIndVars[i]);
6712std::vector<CanonicalLoopInfo *>
6716 "Must pass as many tile sizes as there are loops");
6717 int NumLoops =
Loops.size();
6718 assert(NumLoops >= 1 &&
"At least one loop to tile required");
6730 Loop->collectControlBlocks(OldControlBBs);
6738 assert(L->isValid() &&
"All input loops must be valid canonical loops");
6739 OrigTripCounts.
push_back(L->getTripCount());
6750 for (
int i = 0; i < NumLoops - 1; ++i) {
6763 for (
int i = 0; i < NumLoops; ++i) {
6765 Value *OrigTripCount = OrigTripCounts[i];
6778 Value *FloorTripOverflow =
6779 Builder.CreateICmpNE(FloorTripRem, ConstantInt::get(IVType, 0));
6781 FloorTripOverflow =
Builder.CreateZExt(FloorTripOverflow, IVType);
6782 Value *FloorTripCount =
6783 Builder.CreateAdd(FloorCompleteTripCount, FloorTripOverflow,
6784 "omp_floor" +
Twine(i) +
".tripcount",
true);
6787 FloorCompleteCount.
push_back(FloorCompleteTripCount);
6793 std::vector<CanonicalLoopInfo *> Result;
6794 Result.reserve(NumLoops * 2);
6807 auto EmbeddNewLoop =
6808 [
this,
DL,
F, InnerEnter, &Enter, &
Continue, &OutroInsertBefore](
6811 DL, TripCount,
F, InnerEnter, OutroInsertBefore, Name);
6816 Enter = EmbeddedLoop->
getBody();
6818 OutroInsertBefore = EmbeddedLoop->
getLatch();
6819 return EmbeddedLoop;
6823 const Twine &NameBase) {
6826 EmbeddNewLoop(
P.value(), NameBase +
Twine(
P.index()));
6827 Result.push_back(EmbeddedLoop);
6831 EmbeddNewLoops(FloorCount,
"floor");
6837 for (
int i = 0; i < NumLoops; ++i) {
6841 Value *FloorIsEpilogue =
6843 Value *TileTripCount =
6850 EmbeddNewLoops(TileCounts,
"tile");
6855 for (std::pair<BasicBlock *, BasicBlock *>
P : InbetweenCode) {
6864 BodyEnter =
nullptr;
6865 BodyEntered = ExitBB;
6877 Builder.restoreIP(Result.back()->getBodyIP());
6878 for (
int i = 0; i < NumLoops; ++i) {
6881 Value *OrigIndVar = OrigIndVars[i];
6909 if (Properties.
empty())
6932 assert(
Loop->isValid() &&
"Expecting a valid CanonicalLoopInfo");
6936 assert(Latch &&
"A valid CanonicalLoopInfo must have a unique latch");
6944 if (
I.mayReadOrWriteMemory()) {
6948 I.setMetadata(LLVMContext::MD_access_group, AccessGroup);
6962 Loop->collectControlBlocks(oldControlBBs);
6967 assert(L->isValid() &&
"All input loops must be valid canonical loops");
6968 origTripCounts.
push_back(L->getTripCount());
6977 Builder.SetInsertPoint(TCBlock);
6978 Value *fusedTripCount =
nullptr;
6980 assert(L->isValid() &&
"All loops to fuse must be valid canonical loops");
6981 Value *origTripCount = L->getTripCount();
6982 if (!fusedTripCount) {
6983 fusedTripCount = origTripCount;
6986 Value *condTP =
Builder.CreateICmpSGT(fusedTripCount, origTripCount);
6987 fusedTripCount =
Builder.CreateSelect(condTP, fusedTripCount, origTripCount,
7001 for (
size_t i = 0; i <
Loops.size() - 1; ++i) {
7002 Loops[i]->getPreheader()->moveBefore(TCBlock);
7003 Loops[i]->getAfter()->moveBefore(TCBlock);
7007 for (
size_t i = 0; i <
Loops.size() - 1; ++i) {
7019 for (
size_t i = 0; i <
Loops.size(); ++i) {
7021 F->getContext(),
"omp.fused.inner.cond",
F,
Loops[i]->getBody());
7022 Builder.SetInsertPoint(condBlock);
7030 for (
size_t i = 0; i <
Loops.size() - 1; ++i) {
7031 Builder.SetInsertPoint(condBBs[i]);
7032 Builder.CreateCondBr(condValues[i],
Loops[i]->getBody(), condBBs[i + 1]);
7048 "omp.fused.pre_latch");
7081 const Twine &NamePrefix) {
7110 C, NamePrefix +
".if.then",
Cond->getParent(),
Cond->getNextNode());
7112 C, NamePrefix +
".if.else",
Cond->getParent(), CanonicalLoop->
getExit());
7115 Builder.SetInsertPoint(SplitBeforeIt);
7117 Builder.CreateCondBr(IfCond, ThenBlock, ElseBlock);
7120 spliceBB(IP, ThenBlock,
false, Builder.getCurrentDebugLocation());
7123 Builder.SetInsertPoint(ElseBlock);
7129 ExistingBlocks.
reserve(L->getNumBlocks() + 1);
7131 ExistingBlocks.
append(L->block_begin(), L->block_end());
7137 assert(LoopCond && LoopHeader &&
"Invalid loop structure");
7139 if (
Block == L->getLoopPreheader() ||
Block == L->getLoopLatch() ||
7146 if (
Block == ThenBlock)
7147 NewBB->
setName(NamePrefix +
".if.else");
7150 VMap[
Block] = NewBB;
7158 L->getLoopLatch()->splitBasicBlockBefore(
L->getLoopLatch()->begin(),
7159 NamePrefix +
".pre_latch");
7163 L->addBasicBlockToLoop(ThenBlock, LI);
7169 if (TargetTriple.
isX86()) {
7170 if (Features.
lookup(
"avx512f"))
7172 else if (Features.
lookup(
"avx"))
7176 if (TargetTriple.
isPPC())
7178 if (TargetTriple.
isWasm())
7185 Value *IfCond, OrderKind Order,
7195 if (!BB.hasTerminator())
7211 I->eraseFromParent();
7214 if (AlignedVars.
size()) {
7216 for (
auto &AlignedItem : AlignedVars) {
7217 Value *AlignedPtr = AlignedItem.first;
7218 Value *Alignment = AlignedItem.second;
7221 Builder.CreateAlignmentAssumption(
F->getDataLayout(), AlignedPtr,
7229 createIfVersion(CanonicalLoop, IfCond, VMap, LIA, LI, L,
"simd");
7242 Reachable.insert(
Block);
7252 if ((Safelen ==
nullptr) || (Order == OrderKind::OMP_ORDER_concurrent))
7268 Ctx, {
MDString::get(Ctx,
"llvm.loop.vectorize.enable"), BoolConst}));
7270 if (Simdlen || Safelen) {
7274 ConstantInt *VectorizeWidth = Simdlen ==
nullptr ? Safelen : Simdlen;
7300static std::unique_ptr<TargetMachine>
7304 StringRef CPU =
F->getFnAttribute(
"target-cpu").getValueAsString();
7305 StringRef Features =
F->getFnAttribute(
"target-features").getValueAsString();
7316 std::nullopt, OptLevel));
7334 if (!BB.hasTerminator())
7347 [&](
const Function &
F) {
return TM->getTargetTransformInfo(
F); });
7348 FAM.registerPass([&]() {
return TIRA; });
7362 I->eraseFromParent();
7365 assert(L &&
"Expecting CanonicalLoopInfo to be recognized as a loop");
7370 nullptr, ORE,
static_cast<int>(OptLevel),
7391 <<
" Threshold=" << UP.
Threshold <<
"\n"
7394 <<
" PartialOptSizeThreshold="
7414 Ptr = Load->getPointerOperand();
7416 Ptr = Store->getPointerOperand();
7423 if (Alloca->getParent() == &
F->getEntryBlock())
7443 int MaxTripCount = 0;
7444 bool MaxOrZero =
false;
7445 unsigned TripMultiple = 0;
7448 MaxTripCount, MaxOrZero, TripMultiple, UCE, UP, PP);
7449 unsigned Factor = UP.
Count;
7450 LLVM_DEBUG(
dbgs() <<
"Suggesting unroll factor of " << Factor <<
"\n");
7461 assert(Factor >= 0 &&
"Unroll factor must not be negative");
7477 Ctx, {
MDString::get(Ctx,
"llvm.loop.unroll.count"), FactorConst}));
7490 *UnrolledCLI =
Loop;
7495 "unrolling only makes sense with a factor of 2 or larger");
7497 Type *IndVarTy =
Loop->getIndVarType();
7504 std::vector<CanonicalLoopInfo *>
LoopNest =
7519 Ctx, {
MDString::get(Ctx,
"llvm.loop.unroll.count"), FactorConst})});
7522 (*UnrolledCLI)->assertOK();
7540 Value *Args[] = {Ident, ThreadId, BufSize, CpyBuf, CpyFn, DidItLD};
7559 if (!CPVars.
empty()) {
7564 Directive OMPD = Directive::OMPD_single;
7569 Value *Args[] = {Ident, ThreadId};
7578 if (
Error Err = FiniCB(IP))
7599 EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCBWrapper,
7606 for (
size_t I = 0, E = CPVars.
size();
I < E; ++
I)
7609 ConstantInt::get(Int64, 0), CPVars[
I],
7612 }
else if (!IsNowait) {
7615 omp::Directive::OMPD_unknown,
false,
7633 Directive::OMPD_scope,
nullptr,
nullptr,
7634 BodyGenCB, FiniCB,
false,
true,
7642 omp::Directive::OMPD_unknown,
7658 Directive OMPD = Directive::OMPD_critical;
7663 Value *LockVar = getOMPCriticalRegionLock(CriticalName);
7664 Value *Args[] = {Ident, ThreadId, LockVar};
7681 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
7689 const Twine &Name,
bool IsDependSource) {
7693 "OpenMP runtime requires depend vec with i64 type");
7706 for (
unsigned I = 0;
I < NumLoops; ++
I) {
7720 Value *Args[] = {Ident, ThreadId, DependBaseAddrGEP};
7738 Directive OMPD = Directive::OMPD_ordered;
7747 Value *Args[] = {Ident, ThreadId};
7757 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
7764 bool HasFinalize,
bool IsCancellable) {
7771 BasicBlock *EntryBB = Builder.GetInsertBlock();
7780 emitCommonDirectiveEntry(OMPD, EntryCall, ExitBB, Conditional);
7792 "Unexpected control flow graph state!!");
7794 emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize);
7796 return AfterIP.takeError();
7801 "Unexpected Insertion point location!");
7804 auto InsertBB = merged ? ExitPredBB : ExitBB;
7807 Builder.SetInsertPoint(InsertBB);
7809 return Builder.saveIP();
7813 Directive OMPD,
Value *EntryCall, BasicBlock *ExitBB,
bool Conditional) {
7815 if (!Conditional || !EntryCall)
7821 auto *UI =
new UnreachableInst(
Builder.getContext(), ThenBB);
7831 Builder.CreateCondBr(CallBool, ThenBB, ExitBB);
7835 UI->eraseFromParent();
7843 omp::Directive OMPD,
InsertPointTy FinIP, Instruction *ExitCall,
7851 "Unexpected finalization stack state!");
7854 assert(Fi.DK == OMPD &&
"Unexpected Directive for Finalization call!");
7856 if (
Error Err = Fi.mergeFiniBB(
Builder, FinIP.getBlock()))
7857 return std::move(Err);
7861 Builder.SetInsertPoint(FinIP.getBlock()->getTerminator());
7871 return IRBuilder<>::InsertPoint(ExitCall->
getParent(),
7905 "copyin.not.master.end");
7912 Builder.SetInsertPoint(OMP_Entry);
7913 Value *MasterPtr =
Builder.CreatePtrToInt(MasterAddr, IntPtrTy);
7914 Value *PrivatePtr =
Builder.CreatePtrToInt(PrivateAddr, IntPtrTy);
7915 Value *cmp =
Builder.CreateICmpNE(MasterPtr, PrivatePtr);
7916 Builder.CreateCondBr(cmp, CopyBegin, CopyEnd);
7918 Builder.SetInsertPoint(CopyBegin);
7936 Value *Args[] = {ThreadId,
Size, Allocator};
7959 return Builder.CreateCall(Fn, Args, Name);
7973 Value *Args[] = {ThreadId, Addr, Allocator};
7980 const Twine &Name) {
7988 M.getContext(),
M.getDataLayout().getPrefTypeAlign(Int64)));
7994 const Twine &Name) {
7996 Loc,
Builder.getInt64(
M.getDataLayout().getTypeAllocSize(VarType)), Name);
8001 const Twine &Name) {
8007 return Builder.CreateCall(Fn, Args, Name);
8012 const Twine &Name) {
8014 Loc, Addr,
Builder.getInt64(
M.getDataLayout().getTypeAllocSize(VarType)),
8021 Value *DependenceAddress,
bool HaveNowaitClause) {
8029 if (Device ==
nullptr)
8031 Constant *InteropTypeVal = ConstantInt::get(Int32, (
int)InteropType);
8032 if (NumDependences ==
nullptr) {
8033 NumDependences = ConstantInt::get(Int32, 0);
8037 Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause);
8039 Ident, ThreadId, InteropVar, InteropTypeVal,
8040 Device, NumDependences, DependenceAddress, HaveNowaitClauseVal};
8049 Value *NumDependences,
Value *DependenceAddress,
bool HaveNowaitClause) {
8057 if (Device ==
nullptr)
8059 if (NumDependences ==
nullptr) {
8060 NumDependences = ConstantInt::get(Int32, 0);
8064 Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause);
8066 Ident, ThreadId, InteropVar, Device,
8067 NumDependences, DependenceAddress, HaveNowaitClauseVal};
8076 Value *NumDependences,
8077 Value *DependenceAddress,
8078 bool HaveNowaitClause) {
8085 if (Device ==
nullptr)
8087 if (NumDependences ==
nullptr) {
8088 NumDependences = ConstantInt::get(Int32, 0);
8092 Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause);
8094 Ident, ThreadId, InteropVar, Device,
8095 NumDependences, DependenceAddress, HaveNowaitClauseVal};
8125 assert(!Attrs.MaxThreads.empty() && !Attrs.MaxTeams.empty() &&
8126 "expected num_threads and num_teams to be specified");
8146 const std::string DebugPrefix =
"_debug__";
8147 if (KernelName.
ends_with(DebugPrefix)) {
8148 KernelName = KernelName.
drop_back(DebugPrefix.length());
8149 Kernel =
M.getFunction(KernelName);
8155 if (Attrs.MinTeams > 1 || Attrs.MaxTeams.front() > 0)
8160 int32_t MaxThreadsVal = Attrs.MaxThreads.front();
8161 if (MaxThreadsVal < 0) {
8167 MaxThreadsVal = Attrs.MinThreads;
8171 if (MaxThreadsVal > 0)
8184 omp::RuntimeFunction::OMPRTL___kmpc_target_init);
8187 Twine DynamicEnvironmentName = KernelName +
"_dynamic_environment";
8188 Constant *DynamicEnvironmentInitializer =
8192 DynamicEnvironmentInitializer, DynamicEnvironmentName,
8194 DL.getDefaultGlobalsAddressSpace());
8198 DynamicEnvironmentGV->
getType() == DynamicEnvironmentPtr
8199 ? DynamicEnvironmentGV
8201 DynamicEnvironmentPtr);
8204 ConfigurationEnvironment, {
8205 UseGenericStateMachineVal,
8206 MayUseNestedParallelismVal,
8213 ReductionBufferLength,
8216 KernelEnvironment, {
8217 ConfigurationEnvironmentInitializer,
8221 std::string KernelEnvironmentName =
8222 (KernelName +
"_kernel_environment").str();
8225 KernelEnvironmentInitializer, KernelEnvironmentName,
8227 DL.getDefaultGlobalsAddressSpace());
8231 KernelEnvironmentGV->
getType() == KernelEnvironmentPtr
8232 ? KernelEnvironmentGV
8234 KernelEnvironmentPtr);
8235 Value *KernelLaunchEnvironment =
8238 KernelLaunchEnvironment =
8239 KernelLaunchEnvironment->
getType() == KernelLaunchEnvParamTy
8240 ? KernelLaunchEnvironment
8241 :
Builder.CreateAddrSpaceCast(KernelLaunchEnvironment,
8242 KernelLaunchEnvParamTy);
8244 Fn, {KernelEnvironment, KernelLaunchEnvironment});
8256 auto *UI =
Builder.CreateUnreachable();
8262 Builder.SetInsertPoint(WorkerExitBB);
8266 Builder.SetInsertPoint(CheckBBTI);
8267 Builder.CreateCondBr(ExecUserCode, UI->getParent(), WorkerExitBB);
8269 CheckBBTI->eraseFromParent();
8270 UI->eraseFromParent();
8278 int32_t TeamsReductionDataSize,
8279 int32_t TeamsReductionBufferLength) {
8284 omp::RuntimeFunction::OMPRTL___kmpc_target_deinit);
8288 if (!TeamsReductionBufferLength || !TeamsReductionDataSize)
8294 const std::string DebugPrefix =
"_debug__";
8296 KernelName = KernelName.
drop_back(DebugPrefix.length());
8297 auto *KernelEnvironmentGV =
8298 M.getNamedGlobal((KernelName +
"_kernel_environment").str());
8299 assert(KernelEnvironmentGV &&
"Expected kernel environment global\n");
8300 auto *KernelEnvironmentInitializer = KernelEnvironmentGV->getInitializer();
8302 KernelEnvironmentInitializer,
8303 ConstantInt::get(Int32, TeamsReductionDataSize), {0, 7});
8305 NewInitializer, ConstantInt::get(Int32, TeamsReductionBufferLength),
8307 KernelEnvironmentGV->setInitializer(NewInitializer);
8312 if (
Kernel.hasFnAttribute(Name)) {
8313 int32_t OldLimit =
Kernel.getFnAttributeAsParsedInteger(Name);
8319std::pair<int32_t, int32_t>
8321 int32_t ThreadLimit =
8322 Kernel.getFnAttributeAsParsedInteger(
"omp_target_thread_limit");
8325 const auto &Attr =
Kernel.getFnAttribute(
"amdgpu-flat-work-group-size");
8326 if (!Attr.isValid() || !Attr.isStringAttribute())
8327 return {0, ThreadLimit};
8328 auto [LBStr, UBStr] = Attr.getValueAsString().split(
',');
8331 return {0, ThreadLimit};
8332 UB = ThreadLimit ? std::min(ThreadLimit, UB) : UB;
8340 return {0, ThreadLimit ? std::min(ThreadLimit, UB) : UB};
8342 return {0, ThreadLimit};
8348 Kernel.addFnAttr(
"omp_target_thread_limit", std::to_string(UB));
8351 Kernel.addFnAttr(
"amdgpu-flat-work-group-size",
8359std::pair<int32_t, int32_t>
8362 return {0,
Kernel.getFnAttributeAsParsedInteger(
"omp_target_num_teams")};
8366 int32_t LB, int32_t UB) {
8374 Kernel.addFnAttr(
"omp_target_num_teams", std::to_string(LB));
8377void OpenMPIRBuilder::setOutlinedTargetRegionFunctionAttributes(
8386 else if (
T.isNVPTX())
8388 else if (
T.isSPIRV())
8393Constant *OpenMPIRBuilder::createOutlinedFunctionID(Function *OutlinedFn,
8394 StringRef EntryFnIDName) {
8395 if (
Config.isTargetDevice()) {
8396 assert(OutlinedFn &&
"The outlined function must exist if embedded");
8400 return new GlobalVariable(
8405Constant *OpenMPIRBuilder::createTargetRegionEntryAddr(Function *OutlinedFn,
8406 StringRef EntryFnName) {
8410 assert(!
M.getGlobalVariable(EntryFnName,
true) &&
8411 "Named kernel already exists?");
8412 return new GlobalVariable(
8425 if (
Config.isTargetDevice() || !
Config.openMPOffloadMandatory()) {
8429 OutlinedFn = *CBResult;
8431 OutlinedFn =
nullptr;
8437 if (!IsOffloadEntry)
8440 std::string EntryFnIDName =
8442 ? std::string(EntryFnName)
8446 EntryFnName, EntryFnIDName);
8454 setOutlinedTargetRegionFunctionAttributes(OutlinedFn);
8455 auto OutlinedFnID = createOutlinedFunctionID(OutlinedFn, EntryFnIDName);
8456 auto EntryAddr = createTargetRegionEntryAddr(OutlinedFn, EntryFnName);
8458 EntryInfo, EntryAddr, OutlinedFnID,
8460 return OutlinedFnID;
8478 bool IsStandAlone = !BodyGenCB;
8485 MapInfo = &GenMapInfoCB(
Builder.saveIP());
8487 AllocaIP,
Builder.saveIP(), *MapInfo, Info, CustomMapperCB,
8488 true, DeviceAddrCB))
8495 Value *PointerNum =
Builder.getInt32(Info.NumberOfPtrs);
8505 SrcLocInfo, DeviceID,
8512 assert(MapperFunc &&
"MapperFunc missing for standalone target data");
8516 if (Info.HasNoWait) {
8526 if (Info.HasNoWait) {
8530 emitBlock(OffloadContBlock, CurFn,
true);
8536 bool RequiresOuterTargetTask = Info.HasNoWait;
8537 if (!RequiresOuterTargetTask)
8538 cantFail(TaskBodyCB(
nullptr,
nullptr,
8542 {}, RTArgs, Info.HasNoWait));
8545 omp::OMPRTL___tgt_target_data_begin_mapper);
8549 for (
auto DeviceMap : Info.DevicePtrInfoMap) {
8553 Builder.CreateStore(LI, DeviceMap.second.second);
8590 Value *PointerNum =
Builder.getInt32(Info.NumberOfPtrs);
8599 Value *OffloadingArgs[] = {SrcLocInfo, DeviceID,
8622 return emitIfClause(IfCond, BeginThenGen, BeginElseGen, AllocaIP);
8623 return BeginThenGen(AllocaIP,
Builder.saveIP(), DeallocBlocks);
8638 return emitIfClause(IfCond, EndThenGen, EndElseGen, AllocaIP);
8639 return EndThenGen(AllocaIP,
Builder.saveIP(), DeallocBlocks);
8642 return emitIfClause(IfCond, BeginThenGen, EndElseGen, AllocaIP);
8643 return BeginThenGen(AllocaIP,
Builder.saveIP(), DeallocBlocks);
8654 bool IsGPUDistribute) {
8655 assert((IVSize == 32 || IVSize == 64) &&
8656 "IV size is not compatible with the omp runtime");
8658 if (IsGPUDistribute)
8660 ? (IVSigned ? omp::OMPRTL___kmpc_distribute_static_init_4
8661 : omp::OMPRTL___kmpc_distribute_static_init_4u)
8662 : (IVSigned ? omp::OMPRTL___kmpc_distribute_static_init_8
8663 : omp::OMPRTL___kmpc_distribute_static_init_8u);
8665 Name = IVSize == 32 ? (IVSigned ? omp::OMPRTL___kmpc_for_static_init_4
8666 : omp::OMPRTL___kmpc_for_static_init_4u)
8667 : (IVSigned ? omp::OMPRTL___kmpc_for_static_init_8
8668 : omp::OMPRTL___kmpc_for_static_init_8u);
8675 assert((IVSize == 32 || IVSize == 64) &&
8676 "IV size is not compatible with the omp runtime");
8678 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_init_4
8679 : omp::OMPRTL___kmpc_dispatch_init_4u)
8680 : (IVSigned ? omp::OMPRTL___kmpc_dispatch_init_8
8681 : omp::OMPRTL___kmpc_dispatch_init_8u);
8688 assert((IVSize == 32 || IVSize == 64) &&
8689 "IV size is not compatible with the omp runtime");
8691 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_next_4
8692 : omp::OMPRTL___kmpc_dispatch_next_4u)
8693 : (IVSigned ? omp::OMPRTL___kmpc_dispatch_next_8
8694 : omp::OMPRTL___kmpc_dispatch_next_8u);
8701 assert((IVSize == 32 || IVSize == 64) &&
8702 "IV size is not compatible with the omp runtime");
8704 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_fini_4
8705 : omp::OMPRTL___kmpc_dispatch_fini_4u)
8706 : (IVSigned ? omp::OMPRTL___kmpc_dispatch_fini_8
8707 : omp::OMPRTL___kmpc_dispatch_fini_8u);
8718 DenseMap<
Value *, std::tuple<Value *, unsigned>> &ValueReplacementMap) {
8726 auto GetUpdatedDIVariable = [&](
DILocalVariable *OldVar,
unsigned arg) {
8730 if (NewVar && (arg == NewVar->
getArg()))
8740 auto UpdateDebugRecord = [&](
auto *DR) {
8743 for (
auto Loc : DR->location_ops()) {
8744 auto Iter = ValueReplacementMap.find(
Loc);
8745 if (Iter != ValueReplacementMap.end()) {
8746 DR->replaceVariableLocationOp(
Loc, std::get<0>(Iter->second));
8747 ArgNo = std::get<1>(Iter->second) + 1;
8751 DR->setVariable(GetUpdatedDIVariable(OldVar, ArgNo));
8756 if (DVR->getNumVariableLocationOps() != 1u) {
8757 DVR->setKillLocation();
8760 Value *
Loc = DVR->getVariableLocationOp(0u);
8767 RequiredBB = &DVR->getFunction()->getEntryBlock();
8769 if (RequiredBB && RequiredBB != CurBB) {
8781 "Unexpected debug intrinsic");
8783 UpdateDebugRecord(&DVR);
8784 MoveDebugRecordToCorrectBlock(&DVR);
8787 for (
auto *DVR : DVRsToDelete)
8788 DVR->getMarker()->MarkedInstr->dropOneDbgRecord(DVR);
8792 Module *M = Func->getParent();
8795 DB.createQualifiedType(dwarf::DW_TAG_pointer_type,
nullptr);
8796 unsigned ArgNo = Func->arg_size();
8798 NewSP,
"dyn_ptr", ArgNo, NewSP->
getFile(), 0, VoidPtrTy,
8799 false, DINode::DIFlags::FlagArtificial);
8801 Argument *LastArg = Func->getArg(Func->arg_size() - 1);
8802 DB.insertDeclare(LastArg, Var, DB.createExpression(),
Loc,
8823 for (
auto &Arg : Inputs)
8824 ParameterTypes.
push_back(Arg->getType()->isPointerTy()
8828 for (
auto &Arg : Inputs)
8829 ParameterTypes.
push_back(Arg->getType());
8837 auto BB = Builder.GetInsertBlock();
8838 auto M = BB->getModule();
8849 if (TargetCpuAttr.isStringAttribute())
8850 Func->addFnAttr(TargetCpuAttr);
8852 auto TargetFeaturesAttr = ParentFn->
getFnAttribute(
"target-features");
8853 if (TargetFeaturesAttr.isStringAttribute())
8854 Func->addFnAttr(TargetFeaturesAttr);
8859 OMPBuilder.
emitUsed(
"llvm.compiler.used", {ExecMode});
8870 Builder.SetInsertPoint(EntryBB);
8876 BasicBlock *UserCodeEntryBB = Builder.GetInsertBlock();
8886 splitBB(Builder,
true,
"outlined.body");
8893 Builder.SetInsertPoint(ExitBB);
8900 Builder.CreateRetVoid();
8904 auto AllocaIP = Builder.saveIP();
8909 const auto &ArgRange =
make_range(Func->arg_begin(), Func->arg_end() - 1);
8941 if (Instr->getFunction() == Func)
8942 Instr->replaceUsesOfWith(
Input, InputCopy);
8948 for (
auto InArg :
zip(Inputs, ArgRange)) {
8950 Argument &Arg = std::get<1>(InArg);
8951 Value *InputCopy =
nullptr;
8954 Arg,
Input, InputCopy, AllocaIP, Builder.saveIP(),
8958 Builder.restoreIP(*AfterIP);
8959 ValueReplacementMap[
Input] = std::make_tuple(InputCopy, Arg.
getArgNo());
8979 DeferredReplacement.push_back(std::make_pair(
Input, InputCopy));
8986 ReplaceValue(
Input, InputCopy, Func);
8990 for (
auto Deferred : DeferredReplacement)
8991 ReplaceValue(std::get<0>(Deferred), std::get<1>(Deferred), Func);
8994 ValueReplacementMap);
9002 Value *TaskWithPrivates,
9003 Type *TaskWithPrivatesTy) {
9005 Type *TaskTy = OMPIRBuilder.Task;
9008 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskWithPrivates, 0);
9009 Value *Shareds = TaskT;
9019 if (TaskWithPrivatesTy != TaskTy)
9020 Shareds = Builder.CreateStructGEP(TaskTy, TaskT, 0);
9037 const size_t NumOffloadingArrays,
const int SharedArgsOperandNo) {
9042 assert((!NumOffloadingArrays || PrivatesTy) &&
9043 "PrivatesTy cannot be nullptr when there are offloadingArrays"
9076 Type *TaskPtrTy = OMPBuilder.TaskPtr;
9077 [[maybe_unused]]
Type *TaskTy = OMPBuilder.Task;
9083 ".omp_target_task_proxy_func",
9084 Builder.GetInsertBlock()->getModule());
9085 Value *ThreadId = ProxyFn->getArg(0);
9086 Value *TaskWithPrivates = ProxyFn->getArg(1);
9087 ThreadId->
setName(
"thread.id");
9088 TaskWithPrivates->
setName(
"task");
9090 bool HasShareds = SharedArgsOperandNo > 0;
9091 bool HasOffloadingArrays = NumOffloadingArrays > 0;
9094 Builder.SetInsertPoint(EntryBB);
9100 if (HasOffloadingArrays) {
9101 assert(TaskTy != TaskWithPrivatesTy &&
9102 "If there are offloading arrays to pass to the target"
9103 "TaskTy cannot be the same as TaskWithPrivatesTy");
9106 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskWithPrivates, 1);
9107 for (
unsigned int i = 0; i < NumOffloadingArrays; ++i)
9109 Builder.CreateStructGEP(PrivatesTy, Privates, i));
9113 auto *ArgStructAlloca =
9115 assert(ArgStructAlloca &&
9116 "Unable to find the alloca instruction corresponding to arguments "
9117 "for extracted function");
9119 std::optional<TypeSize> ArgAllocSize =
9121 assert(ArgStructType && ArgAllocSize &&
9122 "Unable to determine size of arguments for extracted function");
9123 uint64_t StructSize = ArgAllocSize->getFixedValue();
9126 Builder.CreateAlloca(ArgStructType,
nullptr,
"structArg");
9128 Value *SharedsSize = Builder.getInt64(StructSize);
9131 OMPBuilder, Builder, TaskWithPrivates, TaskWithPrivatesTy);
9133 Builder.CreateMemCpy(
9134 NewArgStructAlloca, NewArgStructAlloca->
getAlign(), LoadShared,
9136 KernelLaunchArgs.
push_back(NewArgStructAlloca);
9139 Builder.CreateRetVoid();
9145 return GEP->getSourceElementType();
9147 return Alloca->getAllocatedType();
9170 if (OffloadingArraysToPrivatize.
empty())
9171 return OMPIRBuilder.Task;
9174 for (
Value *V : OffloadingArraysToPrivatize) {
9175 assert(V->getType()->isPointerTy() &&
9176 "Expected pointer to array to privatize. Got a non-pointer value "
9179 assert(ArrayTy &&
"ArrayType cannot be nullptr");
9185 "struct.task_with_privates");
9199 EntryFnName, Inputs, CBFunc,
9204 EntryInfo, GenerateOutlinedFunction, IsOffloadEntry, OutlinedFn,
9341 TargetTaskAllocaBB->
begin());
9344 auto OI = std::make_unique<OutlineInfo>();
9345 OI->EntryBB = TargetTaskAllocaBB;
9346 OI->OuterAllocBB = AllocaIP.
getBlock();
9351 Builder, AllocaIP, ToBeDeleted, TargetTaskAllocaIP,
"global.tid",
false));
9354 Builder.restoreIP(TargetTaskBodyIP);
9355 if (
Error Err = TaskBodyCB(DeviceID, RTLoc, TargetTaskAllocaIP))
9373 bool NeedsTargetTask = HasNoWait && DeviceID;
9374 if (NeedsTargetTask) {
9380 OffloadingArraysToPrivatize.
push_back(V);
9381 OI->ExcludeArgsFromAggregate.push_back(V);
9385 OI->PostOutlineCB = [
this, ToBeDeleted, Dependencies, NeedsTargetTask,
9386 DeviceID, OffloadingArraysToPrivatize](
9389 "there must be a single user for the outlined function");
9403 const unsigned int NumStaleCIArgs = StaleCI->
arg_size();
9404 bool HasShareds = NumStaleCIArgs > OffloadingArraysToPrivatize.
size() + 1;
9406 NumStaleCIArgs == (OffloadingArraysToPrivatize.
size() + 2)) &&
9407 "Wrong number of arguments for StaleCI when shareds are present");
9408 int SharedArgOperandNo =
9409 HasShareds ? OffloadingArraysToPrivatize.
size() + 1 : 0;
9415 if (!OffloadingArraysToPrivatize.
empty())
9420 *
this,
Builder, StaleCI, PrivatesTy, TaskWithPrivatesTy,
9421 OffloadingArraysToPrivatize.
size(), SharedArgOperandNo);
9423 LLVM_DEBUG(
dbgs() <<
"Proxy task entry function created: " << *ProxyFn
9426 Builder.SetInsertPoint(StaleCI);
9443 OMPRTL___kmpc_omp_target_task_alloc);
9455 M.getDataLayout().getTypeStoreSize(TaskWithPrivatesTy));
9462 auto *ArgStructAlloca =
9464 assert(ArgStructAlloca &&
9465 "Unable to find the alloca instruction corresponding to arguments "
9466 "for extracted function");
9467 std::optional<TypeSize> ArgAllocSize =
9470 "Unable to determine size of arguments for extracted function");
9471 SharedsSize =
Builder.getInt64(ArgAllocSize->getFixedValue());
9490 TaskSize, SharedsSize,
9493 if (NeedsTargetTask) {
9494 assert(DeviceID &&
"Expected non-empty device ID.");
9504 *
this,
Builder, TaskData, TaskWithPrivatesTy);
9505 Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment,
9508 if (!OffloadingArraysToPrivatize.
empty()) {
9510 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskData, 1);
9511 for (
unsigned int i = 0; i < OffloadingArraysToPrivatize.
size(); ++i) {
9512 Value *PtrToPrivatize = OffloadingArraysToPrivatize[i];
9519 "ElementType should match ArrayType");
9522 Value *Dst =
Builder.CreateStructGEP(PrivatesTy, Privates, i);
9524 Dst, Alignment, PtrToPrivatize, Alignment,
9525 Builder.getInt64(
M.getDataLayout().getTypeStoreSize(ElementType)));
9529 Value *DepArray =
nullptr;
9530 Value *NumDeps =
nullptr;
9533 NumDeps = Dependencies.
NumDeps;
9534 }
else if (!Dependencies.
Deps.empty()) {
9536 NumDeps =
Builder.getInt32(Dependencies.
Deps.size());
9547 if (!NeedsTargetTask) {
9556 ConstantInt::get(
Builder.getInt32Ty(), 0),
9569 }
else if (DepArray) {
9577 {Ident, ThreadID, TaskData, NumDeps, DepArray,
9578 ConstantInt::get(
Builder.getInt32Ty(), 0),
9588 I->eraseFromParent();
9593 << *(
Builder.GetInsertBlock()) <<
"\n");
9595 << *(
Builder.GetInsertBlock()->getParent()->getParent())
9607 CustomMapperCB, IsNonContiguous, DeviceAddrCB))
9630 Builder.restoreIP(IP);
9636 return Builder.saveIP();
9639 bool HasDependencies = !Dependencies.
empty();
9640 bool RequiresOuterTargetTask = HasNoWait || HasDependencies;
9657 if (OutlinedFnID && DeviceID)
9659 EmitTargetCallFallbackCB, KArgs,
9660 DeviceID, RTLoc, TargetTaskAllocaIP);
9668 return EmitTargetCallFallbackCB(OMPBuilder.
Builder.
saveIP());
9675 auto &&EmitTargetCallElse =
9682 if (RequiresOuterTargetTask) {
9689 Dependencies, EmptyRTArgs, HasNoWait);
9691 return EmitTargetCallFallbackCB(Builder.saveIP());
9694 Builder.restoreIP(AfterIP);
9698 auto &&EmitTargetCallThen =
9702 Info.HasNoWait = HasNoWait;
9707 AllocaIP, Builder.saveIP(), Info, RTArgs, MapInfo, CustomMapperCB,
9713 for (
auto [DefaultVal, RuntimeVal] :
9715 NumTeamsC.
push_back(RuntimeVal ? RuntimeVal
9716 : Builder.getInt32(DefaultVal));
9720 auto InitMaxThreadsClause = [&Builder](
Value *
Clause) {
9722 Clause = Builder.CreateIntCast(
Clause, Builder.getInt32Ty(),
9726 auto CombineMaxThreadsClauses = [&Builder](
Value *
Clause,
Value *&Result) {
9729 Result ? Builder.CreateSelect(Builder.CreateICmpULT(Result,
Clause),
9737 Value *MaxThreadsClause =
9739 ? InitMaxThreadsClause(RuntimeAttrs.
MaxThreads)
9742 for (
auto [TeamsVal, TargetVal] :
zip_equal(
9744 Value *TeamsThreadLimitClause = InitMaxThreadsClause(TeamsVal);
9745 Value *NumThreads = InitMaxThreadsClause(TargetVal);
9747 CombineMaxThreadsClauses(TeamsThreadLimitClause, NumThreads);
9748 CombineMaxThreadsClauses(MaxThreadsClause, NumThreads);
9750 NumThreadsC.
push_back(NumThreads ? NumThreads : Builder.getInt32(0));
9753 unsigned NumTargetItems = Info.NumberOfPtrs;
9761 Builder.getInt64Ty(),
9763 : Builder.getInt64(0);
9767 DynCGroupMem = Builder.getInt32(0);
9770 NumTargetItems, RTArgs, TripCount, NumTeamsC, NumThreadsC, DynCGroupMem,
9771 HasNoWait, DynCGroupMemFallback);
9778 if (RequiresOuterTargetTask)
9780 RTLoc, AllocaIP, Dependencies,
9781 KArgs.
RTArgs, Info.HasNoWait);
9784 Builder, OutlinedFnID, EmitTargetCallFallbackCB, KArgs,
9785 RuntimeAttrs.
DeviceID, RTLoc, AllocaIP);
9788 Builder.restoreIP(AfterIP);
9795 if (!OutlinedFnID) {
9796 cantFail(EmitTargetCallElse(AllocaIP, Builder.saveIP(), DeallocBlocks));
9802 cantFail(EmitTargetCallThen(AllocaIP, Builder.saveIP(), DeallocBlocks));
9807 EmitTargetCallElse, AllocaIP));
9820 bool HasNowait,
Value *DynCGroupMem,
9834 *
this,
Builder, IsOffloadEntry, EntryInfo, DefaultAttrs, OutlinedFn,
9835 OutlinedFnID, Inputs, CBFunc, ArgAccessorFuncCB))
9841 if (!
Config.isTargetDevice())
9843 RuntimeAttrs, IfCond, OutlinedFn, OutlinedFnID, Inputs,
9844 GenMapInfoCB, CustomMapperCB, Dependencies, HasNowait,
9845 DynCGroupMem, DynCGroupMemFallback);
9859 return OS.
str().str();
9864 return OpenMPIRBuilder::getNameWithSeparators(Parts,
Config.firstSeparator(),
9870 auto &Elem = *
InternalVars.try_emplace(Name,
nullptr).first;
9872 assert(Elem.second->getValueType() == Ty &&
9873 "OMP internal variable has different type than requested");
9886 :
M.getTargetTriple().isAMDGPU()
9888 :
DL.getDefaultGlobalsAddressSpace();
9897 const llvm::Align PtrAlign =
DL.getPointerABIAlignment(AddressSpaceVal);
9898 GV->setAlignment(std::max(TypeAlign, PtrAlign));
9905Value *OpenMPIRBuilder::getOMPCriticalRegionLock(
StringRef CriticalName) {
9906 std::string Prefix =
Twine(
"gomp_critical_user_", CriticalName).
str();
9907 std::string Name = getNameWithSeparators({Prefix,
"var"},
".",
".");
9918 return SizePtrToInt;
9923 std::string VarName) {
9931 return MaptypesArrayGlobal;
9936 unsigned NumOperands,
9945 ArrI8PtrTy,
nullptr,
".offload_baseptrs");
9949 ArrI64Ty,
nullptr,
".offload_sizes");
9960 int64_t DeviceID,
unsigned NumOperands) {
9966 Value *ArgsBaseGEP =
9968 {Builder.getInt32(0), Builder.getInt32(0)});
9971 {Builder.getInt32(0), Builder.getInt32(0)});
9972 Value *ArgSizesGEP =
9974 {Builder.getInt32(0), Builder.getInt32(0)});
9978 Builder.getInt32(NumOperands),
9979 ArgsBaseGEP, ArgsGEP, ArgSizesGEP,
9980 MaptypesArg, MapnamesArg, NullPtr});
9987 assert((!ForEndCall || Info.separateBeginEndCalls()) &&
9988 "expected region end call to runtime only when end call is separate");
9990 auto VoidPtrTy = UnqualPtrTy;
9991 auto VoidPtrPtrTy = UnqualPtrTy;
9993 auto Int64PtrTy = UnqualPtrTy;
9995 if (!Info.NumberOfPtrs) {
10007 Info.RTArgs.BasePointersArray,
10010 ArrayType::get(VoidPtrTy, Info.NumberOfPtrs), Info.RTArgs.PointersArray,
10014 ArrayType::get(Int64Ty, Info.NumberOfPtrs), Info.RTArgs.SizesArray,
10018 ForEndCall && Info.RTArgs.MapTypesArrayEnd ? Info.RTArgs.MapTypesArrayEnd
10019 : Info.RTArgs.MapTypesArray,
10025 if (!Info.EmitDebug)
10029 ArrayType::get(VoidPtrTy, Info.NumberOfPtrs), Info.RTArgs.MapNamesArray,
10034 if (!Info.HasMapper)
10038 Builder.CreatePointerCast(Info.RTArgs.MappersArray, VoidPtrPtrTy);
10059 "struct.descriptor_dim");
10061 enum { OffsetFD = 0, CountFD, StrideFD };
10065 for (
unsigned I = 0, L = 0, E = NonContigInfo.
Dims.
size();
I < E; ++
I) {
10068 if (NonContigInfo.
Dims[
I] == 1)
10073 Builder.CreateAlloca(ArrayTy,
nullptr,
"dims");
10074 Builder.restoreIP(CodeGenIP);
10075 for (
unsigned II = 0, EE = NonContigInfo.
Dims[
I];
II < EE; ++
II) {
10076 unsigned RevIdx = EE -
II - 1;
10080 Value *OffsetLVal =
Builder.CreateStructGEP(DimTy, DimsLVal, OffsetFD);
10082 NonContigInfo.
Offsets[L][RevIdx], OffsetLVal,
10083 M.getDataLayout().getPrefTypeAlign(OffsetLVal->
getType()));
10085 Value *CountLVal =
Builder.CreateStructGEP(DimTy, DimsLVal, CountFD);
10087 NonContigInfo.
Counts[L][RevIdx], CountLVal,
10088 M.getDataLayout().getPrefTypeAlign(CountLVal->
getType()));
10090 Value *StrideLVal =
Builder.CreateStructGEP(DimTy, DimsLVal, StrideFD);
10092 NonContigInfo.
Strides[L][RevIdx], StrideLVal,
10093 M.getDataLayout().getPrefTypeAlign(CountLVal->
getType()));
10096 Builder.restoreIP(CodeGenIP);
10097 Value *DAddr =
Builder.CreatePointerBitCastOrAddrSpaceCast(
10098 DimsAddr,
Builder.getPtrTy());
10101 Info.RTArgs.PointersArray, 0,
I);
10103 DAddr,
P,
M.getDataLayout().getPrefTypeAlign(
Builder.getPtrTy()));
10108void OpenMPIRBuilder::emitUDMapperArrayInitOrDel(
10112 StringRef Prefix = IsInit ?
".init" :
".del";
10118 Builder.CreateICmpSGT(
Size, Builder.getInt64(1),
"omp.arrayinit.isarray");
10119 Value *DeleteBit = Builder.CreateAnd(
10122 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10123 OpenMPOffloadMappingFlags::OMP_MAP_DELETE)));
10128 Value *BaseIsBegin = Builder.CreateICmpNE(
Base, Begin);
10129 Cond = Builder.CreateOr(IsArray, BaseIsBegin);
10130 DeleteCond = Builder.CreateIsNull(
10135 DeleteCond =
Builder.CreateIsNotNull(
10151 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10152 OpenMPOffloadMappingFlags::OMP_MAP_TO |
10153 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
10154 MapTypeArg =
Builder.CreateOr(
10157 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10158 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT)));
10162 Value *OffloadingArgs[] = {MapperHandle,
Base, Begin,
10163 ArraySize, MapTypeArg, MapName};
10189 MapperFn->
addFnAttr(Attribute::NoInline);
10190 MapperFn->
addFnAttr(Attribute::NoUnwind);
10200 auto SavedIP =
Builder.saveIP();
10201 Builder.SetInsertPoint(EntryBB);
10213 TypeSize ElementSize =
M.getDataLayout().getTypeStoreSize(ElemTy);
10215 Value *PtrBegin = BeginIn;
10221 emitUDMapperArrayInitOrDel(MapperFn, MapperHandle, BaseIn, BeginIn,
Size,
10222 MapType, MapName, ElementSize, HeadBB,
10233 Builder.CreateICmpEQ(PtrBegin, PtrEnd,
"omp.arraymap.isempty");
10234 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
10240 Builder.CreatePHI(PtrBegin->
getType(), 2,
"omp.arraymap.ptrcurrent");
10241 PtrPHI->addIncoming(PtrBegin, HeadBB);
10246 return Info.takeError();
10250 Value *OffloadingArgs[] = {MapperHandle};
10254 Value *ShiftedPreviousSize =
10258 for (
unsigned I = 0;
I < Info->BasePointers.size(); ++
I) {
10259 Value *CurBaseArg = Info->BasePointers[
I];
10260 Value *CurBeginArg = Info->Pointers[
I];
10261 Value *CurSizeArg = Info->Sizes[
I];
10262 Value *CurNameArg = Info->Names.size()
10268 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10270 Value *MemberMapType =
10271 Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
10288 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10289 OpenMPOffloadMappingFlags::OMP_MAP_TO |
10290 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
10300 Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
10306 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10307 OpenMPOffloadMappingFlags::OMP_MAP_TO |
10308 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
10314 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10315 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
10316 Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
10322 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10323 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
10329 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10330 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
10331 Builder.CreateCondBr(IsFrom, FromBB, EndBB);
10337 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10338 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
10347 CurMapType->
addIncoming(MemberMapType, ToElseBB);
10349 Value *OffloadingArgs[] = {MapperHandle, CurBaseArg, CurBeginArg,
10350 CurSizeArg, CurMapType, CurNameArg};
10352 auto ChildMapperFn = CustomMapperCB(
I);
10353 if (!ChildMapperFn)
10354 return ChildMapperFn.takeError();
10355 if (*ChildMapperFn) {
10370 Value *PtrNext =
Builder.CreateConstGEP1_32(ElemTy, PtrPHI, 1,
10371 "omp.arraymap.next");
10372 PtrPHI->addIncoming(PtrNext, LastBB);
10373 Value *IsDone =
Builder.CreateICmpEQ(PtrNext, PtrEnd,
"omp.arraymap.isdone");
10375 Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
10380 emitUDMapperArrayInitOrDel(MapperFn, MapperHandle, BaseIn, BeginIn,
Size,
10381 MapType, MapName, ElementSize, DoneBB,
10395 bool IsNonContiguous,
10399 Info.clearArrayInfo();
10402 if (Info.NumberOfPtrs == 0)
10411 Info.RTArgs.BasePointersArray =
Builder.CreateAlloca(
10412 PointerArrayType,
nullptr,
".offload_baseptrs");
10414 Info.RTArgs.PointersArray =
Builder.CreateAlloca(
10415 PointerArrayType,
nullptr,
".offload_ptrs");
10417 PointerArrayType,
nullptr,
".offload_mappers");
10418 Info.RTArgs.MappersArray = MappersArray;
10425 ConstantInt::get(Int64Ty, 0));
10427 for (
unsigned I = 0, E = CombinedInfo.
Sizes.
size();
I < E; ++
I) {
10428 bool IsNonContigEntry =
10430 (
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10432 OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG) != 0);
10435 if (IsNonContigEntry) {
10437 "Index must be in-bounds for NON_CONTIG Dims array");
10439 assert(DimCount > 0 &&
"NON_CONTIG DimCount must be > 0");
10440 ConstSizes[
I] = ConstantInt::get(Int64Ty, DimCount);
10445 ConstSizes[
I] = CI;
10449 RuntimeSizes.
set(
I);
10452 if (RuntimeSizes.
all()) {
10454 Info.RTArgs.SizesArray =
Builder.CreateAlloca(
10455 SizeArrayType,
nullptr,
".offload_sizes");
10461 auto *SizesArrayGbl =
10466 if (!RuntimeSizes.
any()) {
10467 Info.RTArgs.SizesArray = SizesArrayGbl;
10469 unsigned IndexSize =
M.getDataLayout().getIndexSizeInBits(0);
10470 Align OffloadSizeAlign =
M.getDataLayout().getABIIntegerTypeAlignment(64);
10473 SizeArrayType,
nullptr,
".offload_sizes");
10477 Buffer,
M.getDataLayout().getPrefTypeAlign(Buffer->
getType()),
10478 SizesArrayGbl, OffloadSizeAlign,
10483 Info.RTArgs.SizesArray = Buffer;
10491 for (
auto mapFlag : CombinedInfo.
Types)
10493 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10497 Info.RTArgs.MapTypesArray = MapTypesArrayGbl;
10503 Info.RTArgs.MapNamesArray = MapNamesArrayGbl;
10504 Info.EmitDebug =
true;
10506 Info.RTArgs.MapNamesArray =
10508 Info.EmitDebug =
false;
10513 if (Info.separateBeginEndCalls()) {
10514 bool EndMapTypesDiffer =
false;
10516 if (
Type &
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
10517 OpenMPOffloadMappingFlags::OMP_MAP_PRESENT)) {
10518 Type &= ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
10519 OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
10520 EndMapTypesDiffer =
true;
10523 if (EndMapTypesDiffer) {
10525 Info.RTArgs.MapTypesArrayEnd = MapTypesArrayGbl;
10530 for (
unsigned I = 0;
I < Info.NumberOfPtrs; ++
I) {
10533 ArrayType::get(PtrTy, Info.NumberOfPtrs), Info.RTArgs.BasePointersArray,
10535 Builder.CreateAlignedStore(BPVal, BP,
10536 M.getDataLayout().getPrefTypeAlign(PtrTy));
10538 if (Info.requiresDevicePointerInfo()) {
10540 CodeGenIP =
Builder.saveIP();
10542 Info.DevicePtrInfoMap[BPVal] = {BP,
Builder.CreateAlloca(PtrTy)};
10543 Builder.restoreIP(CodeGenIP);
10545 DeviceAddrCB(
I, Info.DevicePtrInfoMap[BPVal].second);
10547 Info.DevicePtrInfoMap[BPVal] = {BP, BP};
10549 DeviceAddrCB(
I, BP);
10555 ArrayType::get(PtrTy, Info.NumberOfPtrs), Info.RTArgs.PointersArray, 0,
10558 Builder.CreateAlignedStore(PVal,
P,
10559 M.getDataLayout().getPrefTypeAlign(PtrTy));
10561 if (RuntimeSizes.
test(
I)) {
10563 ArrayType::get(Int64Ty, Info.NumberOfPtrs), Info.RTArgs.SizesArray,
10569 S,
M.getDataLayout().getPrefTypeAlign(PtrTy));
10572 unsigned IndexSize =
M.getDataLayout().getIndexSizeInBits(0);
10575 auto CustomMFunc = CustomMapperCB(
I);
10577 return CustomMFunc.takeError();
10579 MFunc =
Builder.CreatePointerCast(*CustomMFunc, PtrTy);
10582 PointerArrayType, MappersArray,
10585 MFunc, MAddr,
M.getDataLayout().getPrefTypeAlign(MAddr->
getType()));
10589 Info.NumberOfPtrs == 0)
10606 Builder.ClearInsertionPoint();
10637 auto CondConstant = CI->getSExtValue();
10639 return ThenGen(AllocaIP,
Builder.saveIP(), DeallocBlocks);
10641 return ElseGen(AllocaIP,
Builder.saveIP(), DeallocBlocks);
10651 Builder.CreateCondBr(
Cond, ThenBlock, ElseBlock);
10654 if (
Error Err = ThenGen(AllocaIP,
Builder.saveIP(), DeallocBlocks))
10660 if (
Error Err = ElseGen(AllocaIP,
Builder.saveIP(), DeallocBlocks))
10669bool OpenMPIRBuilder::checkAndEmitFlushAfterAtomic(
10673 "Unexpected Atomic Ordering.");
10675 bool Flush =
false;
10737 assert(
X.Var->getType()->isPointerTy() &&
10738 "OMP Atomic expects a pointer to target memory");
10739 Type *XElemTy =
X.ElemTy;
10742 "OMP atomic read expected a scalar type");
10744 Value *XRead =
nullptr;
10748 Builder.CreateLoad(XElemTy,
X.Var,
X.IsVolatile,
"omp.atomic.read");
10757 unsigned LoadSize =
DL.getTypeStoreSize(XElemTy);
10760 OldVal->
getAlign(),
true , AllocaIP,
X.Var);
10762 XRead = AtomicLoadRes.first;
10769 Builder.CreateLoad(IntCastTy,
X.Var,
X.IsVolatile,
"omp.atomic.load");
10772 XRead =
Builder.CreateBitCast(XLoad, XElemTy,
"atomic.flt.cast");
10774 XRead =
Builder.CreateIntToPtr(XLoad, XElemTy,
"atomic.ptr.cast");
10777 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Read);
10778 Builder.CreateStore(XRead, V.Var, V.IsVolatile);
10789 assert(
X.Var->getType()->isPointerTy() &&
10790 "OMP Atomic expects a pointer to target memory");
10791 Type *XElemTy =
X.ElemTy;
10794 "OMP atomic write expected a scalar type");
10802 unsigned LoadSize =
DL.getTypeStoreSize(XElemTy);
10805 OldVal->
getAlign(),
true , AllocaIP,
X.Var);
10813 Builder.CreateBitCast(Expr, IntCastTy,
"atomic.src.int.cast");
10818 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Write);
10825 AtomicUpdateCallbackTy &UpdateOp,
bool IsXBinopExpr,
10826 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
10832 Type *XTy =
X.Var->getType();
10834 "OMP Atomic expects a pointer to target memory");
10835 Type *XElemTy =
X.ElemTy;
10838 "OMP atomic update expected a scalar or struct type");
10841 "OpenMP atomic does not support LT or GT operations");
10845 AllocaIP,
X.Var,
X.ElemTy, Expr, AO, RMWOp, UpdateOp,
X.IsVolatile,
10846 IsXBinopExpr, IsIgnoreDenormalMode, IsFineGrainedMemory, IsRemoteMemory);
10848 return AtomicResult.takeError();
10849 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Update);
10854Value *OpenMPIRBuilder::emitRMWOpAsInstruction(
Value *Src1,
Value *Src2,
10858 return Builder.CreateAdd(Src1, Src2);
10860 return Builder.CreateSub(Src1, Src2);
10862 return Builder.CreateAnd(Src1, Src2);
10864 return Builder.CreateNeg(Builder.CreateAnd(Src1, Src2));
10866 return Builder.CreateOr(Src1, Src2);
10868 return Builder.CreateXor(Src1, Src2);
10892Expected<std::pair<Value *, Value *>> OpenMPIRBuilder::emitAtomicUpdate(
10895 AtomicUpdateCallbackTy &UpdateOp,
bool VolatileX,
bool IsXBinopExpr,
10896 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
10898 bool emitRMWOp =
false;
10906 emitRMWOp = XElemTy;
10909 emitRMWOp = (IsXBinopExpr && XElemTy);
10916 std::pair<Value *, Value *> Res;
10918 AtomicRMWInst *RMWInst =
10919 Builder.CreateAtomicRMW(RMWOp,
X, Expr, llvm::MaybeAlign(), AO);
10920 if (
T.isAMDGPU()) {
10921 if (IsIgnoreDenormalMode)
10922 RMWInst->
setMetadata(
"amdgpu.ignore.denormal.mode",
10924 if (!IsFineGrainedMemory)
10925 RMWInst->
setMetadata(
"amdgpu.no.fine.grained.memory",
10927 if (!IsRemoteMemory)
10931 Res.first = RMWInst;
10936 Res.second = Res.first;
10938 Res.second = emitRMWOpAsInstruction(Res.first, Expr, RMWOp);
10941 Builder.CreateLoad(XElemTy,
X,
X->getName() +
".atomic.load");
10946 OpenMPIRBuilder::AtomicInfo atomicInfo(
10948 OldVal->
getAlign(),
true , AllocaIP,
X);
10949 auto AtomicLoadRes = atomicInfo.EmitAtomicLoadLibcall(AO);
10952 CurBBTI = CurBBTI ? CurBBTI :
Builder.CreateUnreachable();
10959 AllocaInst *NewAtomicAddr =
Builder.CreateAlloca(XElemTy);
10960 NewAtomicAddr->
setName(
X->getName() +
"x.new.val");
10961 Builder.SetInsertPoint(ContBB);
10963 PHI->addIncoming(AtomicLoadRes.first, CurBB);
10965 Expected<Value *> CBResult = UpdateOp(OldExprVal,
Builder);
10968 Value *Upd = *CBResult;
10969 Builder.CreateStore(Upd, NewAtomicAddr);
10972 auto Result = atomicInfo.EmitAtomicCompareExchangeLibcall(
10973 AtomicLoadRes.second, NewAtomicAddr, AO, Failure);
10974 LoadInst *PHILoad =
Builder.CreateLoad(XElemTy,
Result.first);
10975 PHI->addIncoming(PHILoad,
Builder.GetInsertBlock());
10978 Res.first = OldExprVal;
10981 if (UnreachableInst *ExitTI =
10984 Builder.SetInsertPoint(ExitBB);
10986 Builder.SetInsertPoint(ExitTI);
10989 IntegerType *IntCastTy =
10992 Builder.CreateLoad(IntCastTy,
X,
X->getName() +
".atomic.load");
11001 CurBBTI = CurBBTI ? CurBBTI :
Builder.CreateUnreachable();
11008 AllocaInst *NewAtomicAddr =
Builder.CreateAlloca(XElemTy);
11009 NewAtomicAddr->
setName(
X->getName() +
"x.new.val");
11010 Builder.SetInsertPoint(ContBB);
11012 PHI->addIncoming(OldVal, CurBB);
11017 OldExprVal =
Builder.CreateBitCast(
PHI, XElemTy,
11018 X->getName() +
".atomic.fltCast");
11020 OldExprVal =
Builder.CreateIntToPtr(
PHI, XElemTy,
11021 X->getName() +
".atomic.ptrCast");
11025 Expected<Value *> CBResult = UpdateOp(OldExprVal,
Builder);
11028 Value *Upd = *CBResult;
11029 Builder.CreateStore(Upd, NewAtomicAddr);
11030 LoadInst *DesiredVal =
Builder.CreateLoad(IntCastTy, NewAtomicAddr);
11034 X,
PHI, DesiredVal, llvm::MaybeAlign(), AO, Failure);
11035 Result->setVolatile(VolatileX);
11036 Value *PreviousVal =
Builder.CreateExtractValue(Result, 0);
11037 Value *SuccessFailureVal =
Builder.CreateExtractValue(Result, 1);
11038 PHI->addIncoming(PreviousVal,
Builder.GetInsertBlock());
11039 Builder.CreateCondBr(SuccessFailureVal, ExitBB, ContBB);
11041 Res.first = OldExprVal;
11045 if (UnreachableInst *ExitTI =
11048 Builder.SetInsertPoint(ExitBB);
11050 Builder.SetInsertPoint(ExitTI);
11061 bool UpdateExpr,
bool IsPostfixUpdate,
bool IsXBinopExpr,
11062 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
11067 Type *XTy =
X.Var->getType();
11069 "OMP Atomic expects a pointer to target memory");
11070 Type *XElemTy =
X.ElemTy;
11073 "OMP atomic capture expected a scalar or struct type");
11075 "OpenMP atomic does not support LT or GT operations");
11082 AllocaIP,
X.Var,
X.ElemTy, Expr, AO, AtomicOp, UpdateOp,
X.IsVolatile,
11083 IsXBinopExpr, IsIgnoreDenormalMode, IsFineGrainedMemory, IsRemoteMemory);
11086 Value *CapturedVal =
11087 (IsPostfixUpdate ? AtomicResult->first : AtomicResult->second);
11088 Builder.CreateStore(CapturedVal, V.Var, V.IsVolatile);
11090 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Capture);
11102 IsPostfixUpdate, IsFailOnly, Failure);
11114 assert(
X.Var->getType()->isPointerTy() &&
11115 "OMP atomic expects a pointer to target memory");
11118 assert(V.Var->getType()->isPointerTy() &&
"v.var must be of pointer type");
11119 assert(V.ElemTy ==
X.ElemTy &&
"x and v must be of same type");
11122 bool IsInteger = E->getType()->isIntegerTy();
11124 if (
Op == OMPAtomicCompareOp::EQ) {
11139 Value *OldValue =
Builder.CreateExtractValue(Result, 0);
11141 OldValue =
Builder.CreateBitCast(OldValue,
X.ElemTy);
11143 "OldValue and V must be of same type");
11144 if (IsPostfixUpdate) {
11145 Builder.CreateStore(OldValue, V.Var, V.IsVolatile);
11147 Value *SuccessOrFail =
Builder.CreateExtractValue(Result, 1);
11160 CurBBTI = CurBBTI ? CurBBTI :
Builder.CreateUnreachable();
11162 CurBBTI,
X.Var->getName() +
".atomic.exit");
11168 Builder.CreateCondBr(SuccessOrFail, ExitBB, ContBB);
11170 Builder.SetInsertPoint(ContBB);
11171 Builder.CreateStore(OldValue, V.Var);
11177 Builder.SetInsertPoint(ExitBB);
11179 Builder.SetInsertPoint(ExitTI);
11182 Value *CapturedValue =
11183 Builder.CreateSelect(SuccessOrFail, E, OldValue);
11184 Builder.CreateStore(CapturedValue, V.Var, V.IsVolatile);
11190 assert(R.Var->getType()->isPointerTy() &&
11191 "r.var must be of pointer type");
11192 assert(R.ElemTy->isIntegerTy() &&
"r must be of integral type");
11194 Value *SuccessFailureVal =
Builder.CreateExtractValue(Result, 1);
11195 Value *ResultCast = R.IsSigned
11196 ?
Builder.CreateSExt(SuccessFailureVal, R.ElemTy)
11197 :
Builder.CreateZExt(SuccessFailureVal, R.ElemTy);
11198 Builder.CreateStore(ResultCast, R.Var, R.IsVolatile);
11201 assert((
Op == OMPAtomicCompareOp::MAX ||
Op == OMPAtomicCompareOp::MIN) &&
11202 "Op should be either max or min at this point");
11203 assert(!IsFailOnly &&
"IsFailOnly is only valid when the comparison is ==");
11214 if (IsXBinopExpr) {
11243 Value *CapturedValue =
nullptr;
11244 if (IsPostfixUpdate) {
11245 CapturedValue = OldValue;
11270 Value *NonAtomicCmp =
Builder.CreateCmp(Pred, OldValue, E);
11271 CapturedValue =
Builder.CreateSelect(NonAtomicCmp, E, OldValue);
11273 Builder.CreateStore(CapturedValue, V.Var, V.IsVolatile);
11277 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Compare);
11297 if (&OuterAllocaBB ==
Builder.GetInsertBlock()) {
11324 bool SubClausesPresent =
11325 (NumTeamsLower || NumTeamsUpper || ThreadLimit || IfExpr);
11327 if (!
Config.isTargetDevice() && SubClausesPresent) {
11328 assert((NumTeamsLower ==
nullptr || NumTeamsUpper !=
nullptr) &&
11329 "if lowerbound is non-null, then upperbound must also be non-null "
11330 "for bounds on num_teams");
11332 if (NumTeamsUpper ==
nullptr)
11333 NumTeamsUpper =
Builder.getInt32(0);
11335 if (NumTeamsLower ==
nullptr)
11336 NumTeamsLower = NumTeamsUpper;
11340 "argument to if clause must be an integer value");
11344 IfExpr =
Builder.CreateICmpNE(IfExpr,
11345 ConstantInt::get(IfExpr->
getType(), 0));
11346 NumTeamsUpper =
Builder.CreateSelect(
11347 IfExpr, NumTeamsUpper,
Builder.getInt32(1),
"numTeamsUpper");
11350 NumTeamsLower =
Builder.CreateSelect(
11351 IfExpr, NumTeamsLower,
Builder.getInt32(1),
"numTeamsLower");
11354 if (ThreadLimit ==
nullptr)
11355 ThreadLimit =
Builder.getInt32(0);
11359 Value *NumTeamsLowerInt32 =
11361 Value *NumTeamsUpperInt32 =
11363 Value *ThreadLimitInt32 =
11370 {Ident, ThreadNum, NumTeamsLowerInt32, NumTeamsUpperInt32,
11371 ThreadLimitInt32});
11376 if (
Error Err = BodyGenCB(AllocaIP, CodeGenIP, ExitBB))
11379 auto OI = std::make_unique<OutlineInfo>();
11380 OI->EntryBB = AllocaBB;
11381 OI->ExitBB = ExitBB;
11382 OI->OuterAllocBB = &OuterAllocaBB;
11388 Builder, OuterAllocaIP, ToBeDeleted, AllocaIP,
"gid",
true));
11390 Builder, OuterAllocaIP, ToBeDeleted, AllocaIP,
"tid",
true));
11392 auto HostPostOutlineCB = [
this, Ident,
11393 ToBeDeleted](
Function &OutlinedFn)
mutable {
11398 "there must be a single user for the outlined function");
11403 "Outlined function must have two or three arguments only");
11405 bool HasShared = OutlinedFn.
arg_size() == 3;
11413 assert(StaleCI &&
"Error while outlining - no CallInst user found for the "
11414 "outlined function.");
11415 Builder.SetInsertPoint(StaleCI);
11422 omp::RuntimeFunction::OMPRTL___kmpc_fork_teams),
11426 I->eraseFromParent();
11429 if (!
Config.isTargetDevice())
11430 OI->PostOutlineCB = HostPostOutlineCB;
11434 Builder.SetInsertPoint(ExitBB);
11447 if (OuterAllocaBB ==
Builder.GetInsertBlock()) {
11462 if (
Error Err = BodyGenCB(AllocaIP, CodeGenIP, ExitBB))
11467 if (
Config.isTargetDevice()) {
11468 auto OI = std::make_unique<OutlineInfo>();
11469 OI->OuterAllocBB = OuterAllocIP.
getBlock();
11470 OI->EntryBB = AllocaBB;
11471 OI->ExitBB = ExitBB;
11472 OI->OuterDeallocBBs.reserve(OuterDeallocBlocks.
size());
11473 copy(OuterDeallocBlocks, OI->OuterDeallocBBs.
end());
11477 Builder.SetInsertPoint(ExitBB);
11484 std::string VarName) {
11493 return MapNamesArrayGlobal;
11498void OpenMPIRBuilder::initializeTypes(
Module &M) {
11502 unsigned ProgramAS = M.getDataLayout().getProgramAddressSpace();
11503#define OMP_TYPE(VarName, InitValue) VarName = InitValue;
11504#define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \
11505 VarName##Ty = ArrayType::get(ElemTy, ArraySize); \
11506 VarName##PtrTy = PointerType::get(Ctx, DefaultTargetAS);
11507#define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \
11508 VarName = FunctionType::get(ReturnType, {__VA_ARGS__}, IsVarArg); \
11509 VarName##Ptr = PointerType::get(Ctx, ProgramAS);
11510#define OMP_STRUCT_TYPE(VarName, StructName, Packed, ...) \
11511 T = StructType::getTypeByName(Ctx, StructName); \
11513 T = StructType::create(Ctx, {__VA_ARGS__}, StructName, Packed); \
11515 VarName##Ptr = PointerType::get(Ctx, DefaultTargetAS);
11516#include "llvm/Frontend/OpenMP/OMPKinds.def"
11527 while (!Worklist.
empty()) {
11531 if (
BlockSet.insert(SuccBB).second)
11536std::unique_ptr<CodeExtractor>
11538 bool ArgsInZeroAddressSpace,
11540 return std::make_unique<CodeExtractor>(
11550 Suffix.
str(), ArgsInZeroAddressSpace);
11553std::unique_ptr<CodeExtractor> DeviceSharedMemOutlineInfo::createCodeExtractor(
11555 return std::make_unique<DeviceSharedMemCodeExtractor>(
11556 OMPBuilder, Blocks,
nullptr,
11564 OuterDeallocBBs.empty()
11567 Suffix.
str(), ArgsInZeroAddressSpace);
11577 Name.empty() ? Addr->
getName() : Name,
Size, Flags, 0);
11589 Fn->
addFnAttr(
"uniform-work-group-size");
11590 Fn->
addFnAttr(Attribute::MustProgress);
11608 auto &&GetMDInt = [
this](
unsigned V) {
11615 NamedMDNode *MD =
M.getOrInsertNamedMetadata(
"omp_offload.info");
11616 auto &&TargetRegionMetadataEmitter =
11617 [&
C, MD, &OrderedEntries, &GetMDInt, &GetMDString](
11632 GetMDInt(E.getKind()), GetMDInt(EntryInfo.DeviceID),
11633 GetMDInt(EntryInfo.FileID), GetMDString(EntryInfo.ParentName),
11634 GetMDInt(EntryInfo.Line), GetMDInt(EntryInfo.Count),
11635 GetMDInt(E.getOrder())};
11638 OrderedEntries[E.getOrder()] = std::make_pair(&E, EntryInfo);
11647 auto &&DeviceGlobalVarMetadataEmitter =
11648 [&
C, &OrderedEntries, &GetMDInt, &GetMDString, MD](
11658 Metadata *
Ops[] = {GetMDInt(E.getKind()), GetMDString(MangledName),
11659 GetMDInt(E.getFlags()), GetMDInt(E.getOrder())};
11663 OrderedEntries[E.getOrder()] = std::make_pair(&E, varInfo);
11670 DeviceGlobalVarMetadataEmitter);
11672 for (
const auto &E : OrderedEntries) {
11673 assert(E.first &&
"All ordered entries must exist!");
11674 if (
const auto *CE =
11677 if (!CE->getID() || !CE->getAddress()) {
11681 if (!
M.getNamedValue(FnName))
11689 }
else if (
const auto *CE =
dyn_cast<
11698 if (
Config.isTargetDevice() &&
Config.hasRequiresUnifiedSharedMemory())
11700 if (!CE->getAddress()) {
11705 if (CE->getVarSize() == 0)
11709 assert(((
Config.isTargetDevice() && !CE->getAddress()) ||
11710 (!
Config.isTargetDevice() && CE->getAddress())) &&
11711 "Declaret target link address is set.");
11712 if (
Config.isTargetDevice())
11714 if (!CE->getAddress()) {
11721 if (!CE->getAddress()) {
11734 if ((
GV->hasLocalLinkage() ||
GV->hasHiddenVisibility()) &&
11738 OMPTargetGlobalVarEntryIndirectVTable))
11747 Flags, CE->getLinkage(), CE->getVarName());
11750 Flags, CE->getLinkage());
11761 if (
Config.hasRequiresFlags() && !
Config.isTargetDevice())
11767 Config.getRequiresFlags());
11777 OS <<
"_" <<
Count;
11782 unsigned NewCount = getTargetRegionEntryInfoCount(EntryInfo);
11785 EntryInfo.
Line, NewCount);
11793 auto FileIDInfo = CallBack();
11797 FileID =
Status->getUniqueID().getFile();
11801 FileID =
hash_value(std::get<0>(FileIDInfo));
11805 std::get<1>(FileIDInfo));
11811 static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
11813 !(Remain & 1); Remain = Remain >> 1)
11831 if (
static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
11833 static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
11840 if (
static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
11846 Flags &=
~omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
11847 Flags |= MemberOfFlag;
11853 bool IsDeclaration,
bool IsExternallyVisible,
11855 std::vector<GlobalVariable *> &GeneratedRefs,
bool OpenMPSIMD,
11856 std::vector<Triple> TargetTriple,
Type *LlvmPtrTy,
11857 std::function<
Constant *()> GlobalInitializer,
11868 Config.hasRequiresUnifiedSharedMemory())) {
11873 if (!IsExternallyVisible)
11875 OS <<
"_decl_tgt_ref_ptr";
11878 Value *Ptr =
M.getNamedValue(PtrName);
11887 if (!
Config.isTargetDevice()) {
11888 if (GlobalInitializer)
11889 GV->setInitializer(GlobalInitializer());
11895 CaptureClause, DeviceClause, IsDeclaration, IsExternallyVisible,
11896 EntryInfo, MangledName, GeneratedRefs, OpenMPSIMD, TargetTriple,
11897 GlobalInitializer, VariableLinkage, LlvmPtrTy,
cast<Constant>(Ptr));
11909 bool IsDeclaration,
bool IsExternallyVisible,
11911 std::vector<GlobalVariable *> &GeneratedRefs,
bool OpenMPSIMD,
11912 std::vector<Triple> TargetTriple,
11913 std::function<
Constant *()> GlobalInitializer,
11917 (TargetTriple.empty() && !
Config.isTargetDevice()))
11928 !
Config.hasRequiresUnifiedSharedMemory()) {
11930 VarName = MangledName;
11933 if (!IsDeclaration)
11935 M.getDataLayout().getTypeSizeInBits(LlvmVal->
getValueType()), 8);
11938 Linkage = (VariableLinkage) ? VariableLinkage() : LlvmVal->
getLinkage();
11942 if (
Config.isTargetDevice() &&
11951 if (!
M.getNamedValue(RefName)) {
11955 GvAddrRef->setConstant(
true);
11957 GvAddrRef->setInitializer(Addr);
11958 GeneratedRefs.push_back(GvAddrRef);
11967 if (
Config.isTargetDevice()) {
11968 VarName = (Addr) ? Addr->
getName() :
"";
11972 CaptureClause, DeviceClause, IsDeclaration, IsExternallyVisible,
11973 EntryInfo, MangledName, GeneratedRefs, OpenMPSIMD, TargetTriple,
11974 LlvmPtrTy, GlobalInitializer, VariableLinkage);
11975 VarName = (Addr) ? Addr->
getName() :
"";
11977 VarSize =
M.getDataLayout().getPointerSize();
11996 auto &&GetMDInt = [MN](
unsigned Idx) {
12001 auto &&GetMDString = [MN](
unsigned Idx) {
12003 return V->getString();
12006 switch (GetMDInt(0)) {
12010 case OffloadEntriesInfoManager::OffloadEntryInfo::
12011 OffloadingEntryInfoTargetRegion: {
12021 case OffloadEntriesInfoManager::OffloadEntryInfo::
12022 OffloadingEntryInfoDeviceGlobalVar:
12035 if (HostFilePath.
empty())
12039 if (std::error_code Err = Buf.getError()) {
12041 "OpenMPIRBuilder: " +
12049 if (std::error_code Err =
M.getError()) {
12051 (
"error parsing host file inside of OpenMPIRBuilder: " + Err.message())
12065 "expected a valid insertion block for creating an iterator loop");
12075 Builder.getCurrentDebugLocation(),
"omp.it.cont");
12087 T->eraseFromParent();
12096 if (!BodyBr || BodyBr->getSuccessor() != CLI->
getLatch()) {
12098 "iterator bodygen must terminate the canonical body with an "
12099 "unconditional branch to the loop latch",
12123 for (
const auto &
ParamAttr : ParamAttrs) {
12166 return std::string(Out.
str());
12174 unsigned VecRegSize;
12176 ISADataTy ISAData[] = {
12195 for (
char Mask :
Masked) {
12196 for (
const ISADataTy &
Data : ISAData) {
12199 Out <<
"_ZGV" <<
Data.ISA << Mask;
12201 assert(NumElts &&
"Non-zero simdlen/cdtsize expected");
12215template <
typename T>
12218 StringRef MangledName,
bool OutputBecomesInput,
12222 Out << Prefix << ISA << LMask << VLEN;
12223 if (OutputBecomesInput)
12225 Out << ParSeq <<
'_' << MangledName;
12234 bool OutputBecomesInput,
12239 OutputBecomesInput, Fn);
12241 OutputBecomesInput, Fn);
12245 OutputBecomesInput, Fn);
12247 OutputBecomesInput, Fn);
12251 OutputBecomesInput, Fn);
12253 OutputBecomesInput, Fn);
12258 OutputBecomesInput, Fn);
12269 char ISA,
unsigned NarrowestDataSize,
bool OutputBecomesInput) {
12270 assert((ISA ==
'n' || ISA ==
's') &&
"Expected ISA either 's' or 'n'.");
12282 OutputBecomesInput, Fn);
12289 OutputBecomesInput, Fn);
12291 OutputBecomesInput, Fn);
12295 OutputBecomesInput, Fn);
12299 OutputBecomesInput, Fn);
12308 OutputBecomesInput, Fn);
12315 MangledName, OutputBecomesInput, Fn);
12317 MangledName, OutputBecomesInput, Fn);
12321 MangledName, OutputBecomesInput, Fn);
12325 MangledName, OutputBecomesInput, Fn);
12335 return OffloadEntriesTargetRegion.empty() &&
12336 OffloadEntriesDeviceGlobalVar.empty();
12339unsigned OffloadEntriesInfoManager::getTargetRegionEntryInfoCount(
12341 auto It = OffloadEntriesTargetRegionCount.find(
12342 getTargetRegionEntryCountKey(EntryInfo));
12343 if (It == OffloadEntriesTargetRegionCount.end())
12348void OffloadEntriesInfoManager::incrementTargetRegionEntryInfoCount(
12350 OffloadEntriesTargetRegionCount[getTargetRegionEntryCountKey(EntryInfo)] =
12351 EntryInfo.
Count + 1;
12357 OffloadEntriesTargetRegion[EntryInfo] =
12360 ++OffloadingEntriesNum;
12366 assert(EntryInfo.
Count == 0 &&
"expected default EntryInfo");
12369 EntryInfo.
Count = getTargetRegionEntryInfoCount(EntryInfo);
12373 if (OMPBuilder->Config.isTargetDevice()) {
12378 auto &Entry = OffloadEntriesTargetRegion[EntryInfo];
12379 Entry.setAddress(Addr);
12381 Entry.setFlags(Flags);
12387 "Target region entry already registered!");
12389 OffloadEntriesTargetRegion[EntryInfo] = Entry;
12390 ++OffloadingEntriesNum;
12392 incrementTargetRegionEntryInfoCount(EntryInfo);
12399 EntryInfo.
Count = getTargetRegionEntryInfoCount(EntryInfo);
12401 auto It = OffloadEntriesTargetRegion.find(EntryInfo);
12402 if (It == OffloadEntriesTargetRegion.end()) {
12406 if (!IgnoreAddressId && (It->second.getAddress() || It->second.getID()))
12414 for (
const auto &It : OffloadEntriesTargetRegion) {
12415 Action(It.first, It.second);
12421 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
12422 ++OffloadingEntriesNum;
12428 if (OMPBuilder->Config.isTargetDevice()) {
12432 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
12434 if (Entry.getVarSize() == 0) {
12435 Entry.setVarSize(VarSize);
12436 Entry.setLinkage(Linkage);
12440 Entry.setVarSize(VarSize);
12441 Entry.setLinkage(Linkage);
12442 Entry.setAddress(Addr);
12445 auto &Entry = OffloadEntriesDeviceGlobalVar[VarName];
12446 assert(Entry.isValid() && Entry.getFlags() == Flags &&
12447 "Entry not initialized!");
12448 if (Entry.getVarSize() == 0) {
12449 Entry.setVarSize(VarSize);
12450 Entry.setLinkage(Linkage);
12457 OffloadEntriesDeviceGlobalVar.try_emplace(VarName, OffloadingEntriesNum,
12458 Addr, VarSize, Flags, Linkage,
12461 OffloadEntriesDeviceGlobalVar.try_emplace(
12462 VarName, OffloadingEntriesNum, Addr, VarSize, Flags, Linkage,
"");
12463 ++OffloadingEntriesNum;
12470 for (
const auto &E : OffloadEntriesDeviceGlobalVar)
12471 Action(E.getKey(), E.getValue());
12478void CanonicalLoopInfo::collectControlBlocks(
12485 BBs.
append({getPreheader(), Header,
Cond, Latch, Exit, getAfter()});
12497void CanonicalLoopInfo::setTripCount(
Value *TripCount) {
12509void CanonicalLoopInfo::mapIndVar(
12519 for (
Use &U : OldIV->
uses()) {
12523 if (
User->getParent() == getCond())
12525 if (
User->getParent() == getLatch())
12531 Value *NewIV = Updater(OldIV);
12534 for (Use *U : ReplacableUses)
12555 "Preheader must terminate with unconditional branch");
12557 "Preheader must jump to header");
12561 "Header must terminate with unconditional branch");
12562 assert(Header->getSingleSuccessor() == Cond &&
12563 "Header must jump to exiting block");
12566 assert(Cond->getSinglePredecessor() == Header &&
12567 "Exiting block only reachable from header");
12570 "Exiting block must terminate with conditional branch");
12572 "Exiting block's first successor jump to the body");
12574 "Exiting block's second successor must exit the loop");
12578 "Body only reachable from exiting block");
12583 "Latch must terminate with unconditional branch");
12584 assert(Latch->getSingleSuccessor() == Header &&
"Latch must jump to header");
12587 assert(Latch->getSinglePredecessor() !=
nullptr);
12592 "Exit block must terminate with unconditional branch");
12593 assert(Exit->getSingleSuccessor() == After &&
12594 "Exit block must jump to after block");
12598 "After block only reachable from exit block");
12602 assert(IndVar &&
"Canonical induction variable not found?");
12604 "Induction variable must be an integer");
12606 "Induction variable must be a PHI in the loop header");
12612 auto *NextIndVar =
cast<PHINode>(IndVar)->getIncomingValue(1);
12620 assert(TripCount &&
"Loop trip count not found?");
12622 "Trip count and induction variable must have the same type");
12626 "Exit condition must be a signed less-than comparison");
12628 "Exit condition must compare the induction variable");
12630 "Exit condition must compare with the trip count");
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Expand Atomic instructions
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This header defines various interfaces for pass management in LLVM.
iv Induction Variable Users
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
static cl::opt< unsigned > TileSize("fuse-matrix-tile-size", cl::init(4), cl::Hidden, cl::desc("Tile size for matrix instruction fusion using square-shaped tiles."))
uint64_t IntrinsicInst * II
#define OMP_KERNEL_ARG_VERSION
Provides definitions for Target specific Grid Values.
static Value * removeASCastIfPresent(Value *V)
static void createTargetLoopWorkshareCall(OpenMPIRBuilder *OMPBuilder, WorksharingLoopType LoopType, BasicBlock *InsertBlock, Value *Ident, Value *LoopBodyArg, Value *TripCount, Function &LoopBodyFn, bool NoLoop)
Value * createFakeIntVal(IRBuilderBase &Builder, OpenMPIRBuilder::InsertPointTy OuterAllocaIP, llvm::SmallVectorImpl< Instruction * > &ToBeDeleted, OpenMPIRBuilder::InsertPointTy InnerAllocaIP, const Twine &Name="", bool AsPtr=true, bool Is64Bit=false)
static Function * createTargetParallelWrapper(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn)
Create wrapper function used to gather the outlined function's argument structure from a shared buffe...
static void redirectTo(BasicBlock *Source, BasicBlock *Target, DebugLoc DL)
Make Source branch to Target.
static FunctionCallee getKmpcDistForStaticInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
static void applyParallelAccessesMetadata(CanonicalLoopInfo *CLI, LLVMContext &Ctx, Loop *Loop, LoopInfo &LoopInfo, SmallVector< Metadata * > &LoopMDList)
static void addAArch64VectorName(T VLEN, StringRef LMask, StringRef Prefix, char ISA, StringRef ParSeq, StringRef MangledName, bool OutputBecomesInput, llvm::Function *Fn)
static FunctionCallee getKmpcForDynamicFiniForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for finalizing the dynamic loop using depending on type.
static Expected< Function * > createOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, StringRef FuncName, SmallVectorImpl< Value * > &Inputs, OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc, OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB)
static void FixupDebugInfoForOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, Function *Func, DenseMap< Value *, std::tuple< Value *, unsigned > > &ValueReplacementMap)
static OMPScheduleType getOpenMPOrderingScheduleType(OMPScheduleType BaseScheduleType, bool HasOrderedClause)
Adds ordering modifier flags to schedule type.
static OMPScheduleType getOpenMPMonotonicityScheduleType(OMPScheduleType ScheduleType, bool HasSimdModifier, bool HasMonotonic, bool HasNonmonotonic, bool HasOrderedClause)
Adds monotonicity modifier flags to schedule type.
static std::string mangleVectorParameters(ArrayRef< llvm::OpenMPIRBuilder::DeclareSimdAttrTy > ParamAttrs)
Mangle the parameter part of the vector function name according to their OpenMP classification.
static bool isGenericKernel(Function &Fn)
static void workshareLoopTargetCallback(OpenMPIRBuilder *OMPIRBuilder, CanonicalLoopInfo *CLI, Value *Ident, Function &OutlinedFn, const SmallVector< Instruction *, 4 > &ToBeDeleted, WorksharingLoopType LoopType, bool NoLoop)
static bool isValidWorkshareLoopScheduleType(OMPScheduleType SchedType)
static llvm::CallInst * emitNoUnwindRuntimeCall(IRBuilder<> &Builder, llvm::FunctionCallee Callee, ArrayRef< llvm::Value * > Args, const llvm::Twine &Name)
static Error populateReductionFunction(Function *ReductionFunc, ArrayRef< OpenMPIRBuilder::ReductionInfo > ReductionInfos, IRBuilder<> &Builder, ArrayRef< bool > IsByRef, bool IsGPU)
static Function * getFreshReductionFunc(Module &M)
static void raiseUserConstantDataAllocasToEntryBlock(IRBuilderBase &Builder, Function *Function)
static FunctionCallee getKmpcForDynamicNextForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for updating the next loop using OpenMP dynamic scheduling depending...
static bool isConflictIP(IRBuilder<>::InsertPoint IP1, IRBuilder<>::InsertPoint IP2)
Return whether IP1 and IP2 are ambiguous, i.e.
static void checkReductionInfos(ArrayRef< OpenMPIRBuilder::ReductionInfo > ReductionInfos, bool IsGPU)
static Type * getOffloadingArrayType(Value *V)
static OMPScheduleType getOpenMPBaseScheduleType(llvm::omp::ScheduleKind ClauseKind, bool HasChunks, bool HasSimdModifier, bool HasDistScheduleChunks)
Determine which scheduling algorithm to use, determined from schedule clause arguments.
static OMPScheduleType computeOpenMPScheduleType(ScheduleKind ClauseKind, bool HasChunks, bool HasSimdModifier, bool HasMonotonicModifier, bool HasNonmonotonicModifier, bool HasOrderedClause, bool HasDistScheduleChunks)
Determine the schedule type using schedule and ordering clause arguments.
static FunctionCallee getKmpcForDynamicInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for initializing loop bounds using OpenMP dynamic scheduling dependi...
static std::optional< omp::OMPTgtExecModeFlags > getTargetKernelExecMode(Function &Kernel)
Given a function, if it represents the entry point of a target kernel, this returns the execution mod...
static StructType * createTaskWithPrivatesTy(OpenMPIRBuilder &OMPIRBuilder, ArrayRef< Value * > OffloadingArraysToPrivatize)
static cl::opt< double > UnrollThresholdFactor("openmp-ir-builder-unroll-threshold-factor", cl::Hidden, cl::desc("Factor for the unroll threshold to account for code " "simplifications still taking place"), cl::init(1.5))
static int32_t computeHeuristicUnrollFactor(CanonicalLoopInfo *CLI)
Heuristically determine the best-performant unroll factor for CLI.
static void emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, OpenMPIRBuilder::InsertPointTy AllocaIP, ArrayRef< BasicBlock * > DeallocBlocks, OpenMPIRBuilder::TargetDataInfo &Info, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, const OpenMPIRBuilder::TargetKernelRuntimeAttrs &RuntimeAttrs, Value *IfCond, Function *OutlinedFn, Constant *OutlinedFnID, SmallVectorImpl< Value * > &Args, OpenMPIRBuilder::GenMapInfoCallbackTy GenMapInfoCB, OpenMPIRBuilder::CustomMapperCallbackTy CustomMapperCB, const OpenMPIRBuilder::DependenciesInfo &Dependencies, bool HasNoWait, Value *DynCGroupMem, OMPDynGroupprivateFallbackType DynCGroupMemFallback)
static Value * emitTaskDependencies(OpenMPIRBuilder &OMPBuilder, const SmallVectorImpl< OpenMPIRBuilder::DependData > &Dependencies)
static Error emitTargetOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, bool IsOffloadEntry, TargetRegionEntryInfo &EntryInfo, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, Function *&OutlinedFn, Constant *&OutlinedFnID, SmallVectorImpl< Value * > &Inputs, OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc, OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB)
static void updateNVPTXAttr(Function &Kernel, StringRef Name, int32_t Value, bool Min)
static OpenMPIRBuilder::InsertPointTy getInsertPointAfterInstr(Instruction *I)
static void redirectAllPredecessorsTo(BasicBlock *OldTarget, BasicBlock *NewTarget, DebugLoc DL)
Redirect all edges that branch to OldTarget to NewTarget.
static void hoistNonEntryAllocasToEntryBlock(llvm::BasicBlock &Block)
static std::unique_ptr< TargetMachine > createTargetMachine(Function *F, CodeGenOptLevel OptLevel)
Create the TargetMachine object to query the backend for optimization preferences.
static FunctionCallee getKmpcForStaticInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
static void addAccessGroupMetadata(BasicBlock *Block, MDNode *AccessGroup, LoopInfo &LI)
Attach llvm.access.group metadata to the memref instructions of Block.
static void addBasicBlockMetadata(BasicBlock *BB, ArrayRef< Metadata * > Properties)
Attach metadata Properties to the basic block described by BB.
static void restoreIPandDebugLoc(llvm::IRBuilderBase &Builder, llvm::IRBuilderBase::InsertPoint IP)
This is wrapper over IRBuilderBase::restoreIP that also restores the current debug location to the la...
static LoadInst * loadSharedDataFromTaskDescriptor(OpenMPIRBuilder &OMPIRBuilder, IRBuilderBase &Builder, Value *TaskWithPrivates, Type *TaskWithPrivatesTy)
Given a task descriptor, TaskWithPrivates, return the pointer to the block of pointers containing sha...
static cl::opt< bool > OptimisticAttributes("openmp-ir-builder-optimistic-attributes", cl::Hidden, cl::desc("Use optimistic attributes describing " "'as-if' properties of runtime calls."), cl::init(false))
static bool hasGridValue(const Triple &T)
static FunctionCallee getKmpcForStaticLoopForType(Type *Ty, OpenMPIRBuilder *OMPBuilder, WorksharingLoopType LoopType)
static const omp::GV & getGridValue(const Triple &T, Function *Kernel)
static void addAArch64AdvSIMDNDSNames(unsigned NDS, StringRef Mask, StringRef Prefix, char ISA, StringRef ParSeq, StringRef MangledName, bool OutputBecomesInput, llvm::Function *Fn)
static Function * emitTargetTaskProxyFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, CallInst *StaleCI, StructType *PrivatesTy, StructType *TaskWithPrivatesTy, const size_t NumOffloadingArrays, const int SharedArgsOperandNo)
Create an entry point for a target task with the following.
static void addLoopMetadata(CanonicalLoopInfo *Loop, ArrayRef< Metadata * > Properties)
Attach loop metadata Properties to the loop described by Loop.
static void removeUnusedBlocksFromParent(ArrayRef< BasicBlock * > BBs)
Determine which blocks in BBs are reachable from outside and remove the ones that are not reachable f...
static void targetParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn, Function *OuterFn, BasicBlock *OuterAllocaBB, Value *Ident, Value *IfCondition, Value *NumThreads, Instruction *PrivTID, AllocaInst *PrivTIDAddr, Value *ThreadID, const SmallVector< Instruction *, 4 > &ToBeDeleted)
static void hostParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn, Function *OuterFn, Value *Ident, Value *IfCondition, Instruction *PrivTID, AllocaInst *PrivTIDAddr, const SmallVector< Instruction *, 4 > &ToBeDeleted)
FunctionAnalysisManager FAM
This file defines the Pass Instrumentation classes that provide instrumentation points into the pass ...
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
std::unordered_set< BasicBlock * > BlockSet
This file implements the SmallBitVector class.
This file defines the SmallSet class.
static SymbolRef::Type getType(const Symbol *Sym)
Defines the virtual file system interface vfs::FileSystem.
static cl::opt< unsigned > MaxThreads("xcore-max-threads", cl::Optional, cl::desc("Maximum number of threads (for emulation thread-local storage)"), cl::Hidden, cl::value_desc("number"), cl::init(8))
static const uint32_t IV[8]
Class for arbitrary precision integers.
An arbitrary precision integer that knows its signedness.
static APSInt getUnsigned(uint64_t X)
This class represents a conversion between pointers from one address space to another.
an instruction to allocate memory on the stack
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
PointerType * getType() const
Overload to return most specific pointer type.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
unsigned getAddressSpace() const
Return the address space for the allocation.
LLVM_ABI std::optional< TypeSize > getAllocationSize(const DataLayout &DL) const
Get allocation size in bytes.
LLVM_ABI bool isArrayAllocation() const
Return true if there is an allocation size parameter to the allocation instruction that is not 1.
void setAlignment(Align Align)
const Value * getArraySize() const
Get the number of elements allocated.
bool registerPass(PassBuilderT &&PassBuilder)
Register an analysis pass with the manager.
This class represents an incoming formal argument to a Function.
unsigned getArgNo() const
Return the index of this formal argument in its containing function.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
Get the array size.
bool empty() const
Check if the array is empty.
Class to represent array types.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
A function analysis which provides an AssumptionCache.
LLVM_ABI AssumptionCache run(Function &F, FunctionAnalysisManager &)
A cache of @llvm.assume calls within a function.
An instruction that atomically checks whether a specified value is in a memory location,...
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
LLVM_ABI std::pair< LoadInst *, AllocaInst * > EmitAtomicLoadLibcall(AtomicOrdering AO)
LLVM_ABI void EmitAtomicStoreLibcall(AtomicOrdering AO, Value *Source)
an instruction that atomically reads a memory location, combines it with another value,...
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ USubCond
Subtract only if no unsigned overflow.
@ FMinimum
*p = minimum(old, v) minimum matches the behavior of llvm.minimum.
@ Min
*p = old <signed v ? old : v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ FMaximum
*p = maximum(old, v) maximum matches the behavior of llvm.maximum.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ UMax
*p = old >unsigned v ? old : v
@ FMaximumNum
*p = maximumnum(old, v) maximumnum matches the behavior of llvm.maximumnum.
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
@ FMinimumNum
*p = minimumnum(old, v) minimumnum matches the behavior of llvm.minimumnum.
This class holds the attributes for a particular argument, parameter, function, or return value.
LLVM_ABI AttributeSet addAttributes(LLVMContext &C, AttributeSet AS) const
Add attributes to the attribute set.
LLVM_ABI AttributeSet addAttribute(LLVMContext &C, Attribute::AttrKind Kind) const
Add an argument attribute.
static LLVM_ABI Attribute getWithAlignment(LLVMContext &Context, Align Alignment)
Return a uniquified Attribute object that has the specific alignment set.
LLVM Basic Block Representation.
LLVM_ABI void replaceSuccessorsPhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block's successors to refer to basic block New instead of basic bl...
iterator begin()
Instruction iterator methods.
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="")
Split the basic block into two basic blocks at the specified instruction.
const Function * getParent() const
Return the enclosing method, or null if none.
reverse_iterator rbegin()
bool hasTerminator() const LLVM_READONLY
Returns whether the block has a terminator.
const Instruction & back() const
LLVM_ABI BasicBlock * splitBasicBlockBefore(iterator I, const Twine &BBName="")
Split the basic block into two basic blocks at the specified instruction and insert the new basic blo...
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
LLVM_ABI void insertDbgRecordBefore(DbgRecord *DR, InstListType::iterator Here)
Insert a DbgRecord into a block at the position given by Here.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
LLVM_ABI InstListType::const_iterator getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
LLVM_ABI const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Instruction & front() const
InstListType::reverse_iterator reverse_iterator
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
const Instruction * getTerminatorOrNull() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
LLVM_ABI SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
InstListType::iterator iterator
Instruction iterators...
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
LLVM_ABI void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Value * getArgOperand(unsigned i) const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
Class to represented the control flow structure of an OpenMP canonical loop.
Value * getTripCount() const
Returns the llvm::Value containing the number of loop iterations.
BasicBlock * getHeader() const
The header is the entry for each iteration.
LLVM_ABI void assertOK() const
Consistency self-check.
Type * getIndVarType() const
Return the type of the induction variable (and the trip count).
BasicBlock * getBody() const
The body block is the single entry for a loop iteration and not controlled by CanonicalLoopInfo.
bool isValid() const
Returns whether this object currently represents the IR of a loop.
void setLastIter(Value *IterVar)
Sets the last iteration variable for this loop.
OpenMPIRBuilder::InsertPointTy getAfterIP() const
Return the insertion point for user code after the loop.
OpenMPIRBuilder::InsertPointTy getBodyIP() const
Return the insertion point for user code in the body.
BasicBlock * getAfter() const
The after block is intended for clean-up code such as lifetime end markers.
Function * getFunction() const
LLVM_ABI void invalidate()
Invalidate this loop.
BasicBlock * getLatch() const
Reaching the latch indicates the end of the loop body code.
OpenMPIRBuilder::InsertPointTy getPreheaderIP() const
Return the insertion point for user code before the loop.
BasicBlock * getCond() const
The condition block computes whether there is another loop iteration.
BasicBlock * getExit() const
Reaching the exit indicates no more iterations are being executed.
LLVM_ABI BasicBlock * getPreheader() const
The preheader ensures that there is only a single edge entering the loop.
Instruction * getIndVar() const
Returns the instruction representing the current logical induction variable.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ ICMP_ULT
unsigned less than
@ ICMP_ULE
unsigned less or equal
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
static LLVM_ABI Constant * getString(LLVMContext &Context, StringRef Initializer, bool AddNull=true, bool ByteString=false)
This method constructs a CDS and initializes it with a text string.
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getTruncOrBitCast(Constant *C, Type *Ty)
static LLVM_ABI Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
static LLVM_ABI Constant * getSizeOf(Type *Ty)
getSizeOf constant expr - computes the (alloc) size of a type (in address-units, not bits) in a targe...
static LLVM_ABI Constant * getAddrSpaceCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
This is an important base class in LLVM.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
DILocalScope * getScope() const
Get the local scope for this variable.
DINodeArray getAnnotations() const
Subprogram description. Uses SubclassData1.
uint32_t getAlignInBits() const
StringRef getName() const
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Record of a variable value-assignment, aka a non instruction representation of the dbg....
Analysis pass which computes a DominatorTree.
LLVM_ABI DominatorTree run(Function &F, FunctionAnalysisManager &)
Run the analysis pass over a function and produce a dominator tree.
bool properlyDominates(const DomTreeNodeBase< NodeT > *A, const DomTreeNodeBase< NodeT > *B) const
properlyDominates - Returns true iff A dominates B and A != B.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Represents either an error or a value T.
Lightweight error class with error context and mandatory checking.
static ErrorSuccess success()
Create a success value.
Tagged union holding either a T or a Error.
Error takeError()
Take ownership of the stored error.
reference get()
Returns a reference to the stored T value.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Class to represent function types.
Type * getParamType(unsigned i) const
Parameter type accessors.
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
void addFnAttr(Attribute::AttrKind Kind)
Add function attributes to this function.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
const BasicBlock & getEntryBlock() const
FunctionType * getFunctionType() const
Returns the FunctionType for me.
void removeFromParent()
removeFromParent - This method unlinks 'this' from the containing module, but does not delete it.
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
AttributeList getAttributes() const
Return the attribute list for this Function.
const Function & getFunction() const
void setAttributes(AttributeList Attrs)
Set the attribute list for this Function.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
adds the attribute to the list of attributes for the given arg.
Function::iterator insert(Function::iterator Position, BasicBlock *BB)
Insert BB in the basic block list at Position.
Type * getReturnType() const
Returns the type of the ret val.
void setCallingConv(CallingConv::ID CC)
Argument * getArg(unsigned i) const
bool hasMetadata() const
Return true if this GlobalObject has any metadata attached to it.
LLVM_ABI void addMetadata(unsigned KindID, MDNode &MD)
Add a metadata attachment.
LinkageTypes getLinkage() const
void setLinkage(LinkageTypes LT)
Module * getParent()
Get the module that this global value is contained inside of...
void setDSOLocal(bool Local)
PointerType * getType() const
Global values are always pointers.
@ HiddenVisibility
The GV is hidden.
@ ProtectedVisibility
The GV is protected.
void setVisibility(VisibilityTypes V)
LinkageTypes
An enumeration for the kinds of linkage for global values.
@ PrivateLinkage
Like Internal, but omit from symbol table.
@ CommonLinkage
Tentative definitions.
@ InternalLinkage
Rename collisions when linking (static functions).
@ WeakODRLinkage
Same, but only replaced by something equivalent.
@ WeakAnyLinkage
Keep one copy of named function when linking (weak)
@ AppendingLinkage
Special purpose, only applies to global arrays.
@ LinkOnceODRLinkage
Same, but only replaced by something equivalent.
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
InsertPoint - A saved insertion point.
BasicBlock * getBlock() const
bool isSet() const
Returns true if this insert point is set.
BasicBlock::iterator getPoint() const
Common base class shared among various IRBuilders.
InsertPoint saveIP() const
Returns the current insert point.
void restoreIP(InsertPoint IP)
Sets the current insert point to a previously-saved location.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
LLVM_ABI const DebugLoc & getStableDebugLoc() const
Fetch the debug location for this node, unless this is a debug intrinsic, in which case fetch the deb...
LLVM_ABI void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI void moveBeforePreserving(InstListType::iterator MovePos)
Perform a moveBefore operation, while signalling that the caller intends to preserve the original ord...
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void insertAfter(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately after the specified instruction.
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
Align getAlign() const
Return the alignment of the access that is being performed.
Analysis pass that exposes the LoopInfo for a function.
LLVM_ABI LoopInfo run(Function &F, FunctionAnalysisManager &AM)
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
This class represents a loop nest and can be used to query its properties.
Represents a single loop in the control flow graph.
LLVM_ABI MDNode * createCallbackEncoding(unsigned CalleeArgNo, ArrayRef< int > Arguments, bool VarArgsArePassed)
Return metadata describing a callback (see llvm::AbstractCallSite).
LLVM_ABI void replaceOperandWith(unsigned I, Metadata *New)
Replace a specific operand.
static MDTuple * getDistinct(LLVMContext &Context, ArrayRef< Metadata * > MDs)
ArrayRef< MDOperand > operands() const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
This class implements a map that also provides access to all stored values in a deterministic order.
A Module instance is used to store all the information related to an LLVM module.
LLVMContext & getContext() const
Get the global data context.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
iterator_range< op_iterator > operands()
LLVM_ABI void addOperand(MDNode *M)
Device global variable entries info.
Target region entries info.
Base class of the entries info.
Class that manages information about offload code regions and data.
function_ref< void(StringRef, const OffloadEntryInfoDeviceGlobalVar &)> OffloadDeviceGlobalVarEntryInfoActTy
Applies action Action on all registered entries.
OMPTargetDeviceClauseKind
Kind of device clause for declare target variables and functions NOTE: Currently not used as a part o...
@ OMPTargetDeviceClauseAny
The target is marked for all devices.
LLVM_ABI void registerDeviceGlobalVarEntryInfo(StringRef VarName, Constant *Addr, int64_t VarSize, OMPTargetGlobalVarEntryKind Flags, GlobalValue::LinkageTypes Linkage)
Register device global variable entry.
LLVM_ABI void initializeDeviceGlobalVarEntryInfo(StringRef Name, OMPTargetGlobalVarEntryKind Flags, unsigned Order)
Initialize device global variable entry.
LLVM_ABI void actOnDeviceGlobalVarEntriesInfo(const OffloadDeviceGlobalVarEntryInfoActTy &Action)
OMPTargetRegionEntryKind
Kind of the target registry entry.
@ OMPTargetRegionEntryTargetRegion
Mark the entry as target region.
LLVM_ABI void getTargetRegionEntryFnName(SmallVectorImpl< char > &Name, const TargetRegionEntryInfo &EntryInfo)
LLVM_ABI bool hasTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, bool IgnoreAddressId=false) const
Return true if a target region entry with the provided information exists.
LLVM_ABI void registerTargetRegionEntryInfo(TargetRegionEntryInfo EntryInfo, Constant *Addr, Constant *ID, OMPTargetRegionEntryKind Flags)
Register target region entry.
LLVM_ABI void actOnTargetRegionEntriesInfo(const OffloadTargetRegionEntryInfoActTy &Action)
LLVM_ABI void initializeTargetRegionEntryInfo(const TargetRegionEntryInfo &EntryInfo, unsigned Order)
Initialize target region entry.
OMPTargetGlobalVarEntryKind
Kind of the global variable entry..
@ OMPTargetGlobalVarEntryEnter
Mark the entry as a declare target enter.
@ OMPTargetGlobalRegisterRequires
Mark the entry as a register requires global.
@ OMPTargetGlobalVarEntryIndirect
Mark the entry as a declare target indirect global.
@ OMPTargetGlobalVarEntryLink
Mark the entry as a to declare target link.
@ OMPTargetGlobalVarEntryTo
Mark the entry as a to declare target.
@ OMPTargetGlobalVarEntryIndirectVTable
Mark the entry as a declare target indirect vtable.
function_ref< void(const TargetRegionEntryInfo &EntryInfo, const OffloadEntryInfoTargetRegion &)> OffloadTargetRegionEntryInfoActTy
brief Applies action Action on all registered entries.
bool hasDeviceGlobalVarEntryInfo(StringRef VarName) const
Checks if the variable with the given name has been registered already.
LLVM_ABI bool empty() const
Return true if a there are no entries defined.
std::optional< bool > IsTargetDevice
Flag to define whether to generate code for the role of the OpenMP host (if set to false) or device (...
std::optional< bool > IsGPU
Flag for specifying if the compilation is done for an accelerator.
LLVM_ABI int64_t getRequiresFlags() const
Returns requires directive clauses as flags compatible with those expected by libomptarget.
std::optional< bool > OpenMPOffloadMandatory
Flag for specifying if offloading is mandatory.
LLVM_ABI void setHasRequiresReverseOffload(bool Value)
LLVM_ABI OpenMPIRBuilderConfig()
LLVM_ABI bool hasRequiresUnifiedSharedMemory() const
LLVM_ABI void setHasRequiresUnifiedSharedMemory(bool Value)
unsigned getDefaultTargetAS() const
LLVM_ABI bool hasRequiresDynamicAllocators() const
LLVM_ABI void setHasRequiresUnifiedAddress(bool Value)
bool isTargetDevice() const
LLVM_ABI void setHasRequiresDynamicAllocators(bool Value)
LLVM_ABI bool hasRequiresReverseOffload() const
bool hasRequiresFlags() const
LLVM_ABI bool hasRequiresUnifiedAddress() const
Struct that keeps the information that should be kept throughout a 'target data' region.
An interface to create LLVM-IR for OpenMP directives.
LLVM_ABI InsertPointOrErrorTy createOrderedThreadsSimd(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool IsThreads)
Generator for 'omp ordered [threads | simd]'.
LLVM_ABI void emitAArch64DeclareSimdFunction(llvm::Function *Fn, unsigned VLENVal, llvm::ArrayRef< DeclareSimdAttrTy > ParamAttrs, DeclareSimdBranch Branch, char ISA, unsigned NarrowestDataSize, bool OutputBecomesInput)
Emit AArch64 vector-function ABI attributes for a declare simd function.
LLVM_ABI Constant * getOrCreateIdent(Constant *SrcLocStr, uint32_t SrcLocStrSize, omp::IdentFlag Flags=omp::IdentFlag(0), unsigned Reserve2Flags=0)
Return an ident_t* encoding the source location SrcLocStr and Flags.
LLVM_ABI FunctionCallee getOrCreateRuntimeFunction(Module &M, omp::RuntimeFunction FnID)
Return the function declaration for the runtime function with FnID.
LLVM_ABI InsertPointOrErrorTy createCancel(const LocationDescription &Loc, Value *IfCondition, omp::Directive CanceledDirective)
Generator for 'omp cancel'.
std::function< Expected< Function * >(StringRef FunctionName)> FunctionGenCallback
Functions used to generate a function with the given name.
LLVM_ABI CallInst * createOMPAllocShared(const LocationDescription &Loc, Value *Size, const Twine &Name=Twine(""))
Create a runtime call for kmpc_alloc_shared.
ReductionGenCBKind
Enum class for the RedctionGen CallBack type to be used.
LLVM_ABI CanonicalLoopInfo * collapseLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops, InsertPointTy ComputeIP)
Collapse a loop nest into a single loop.
LLVM_ABI void createTaskyield(const LocationDescription &Loc)
Generator for 'omp taskyield'.
std::function< Error(InsertPointTy CodeGenIP)> FinalizeCallbackTy
Callback type for variable finalization (think destructors).
LLVM_ABI void emitBranch(BasicBlock *Target)
LLVM_ABI Error emitCancelationCheckImpl(Value *CancelFlag, omp::Directive CanceledDirective)
Generate control flow and cleanup for cancellation.
static LLVM_ABI void writeThreadBoundsForKernel(const Triple &T, Function &Kernel, int32_t LB, int32_t UB)
LLVM_ABI void emitTaskwaitImpl(const LocationDescription &Loc)
Generate a taskwait runtime call.
LLVM_ABI Constant * registerTargetRegionFunction(TargetRegionEntryInfo &EntryInfo, Function *OutlinedFunction, StringRef EntryFnName, StringRef EntryFnIDName)
Registers the given function and sets up the attribtues of the function Returns the FunctionID.
LLVM_ABI GlobalVariable * emitKernelExecutionMode(StringRef KernelName, omp::OMPTgtExecModeFlags Mode)
Emit the kernel execution mode.
LLVM_ABI void initialize()
Initialize the internal state, this will put structures types and potentially other helpers into the ...
LLVM_ABI void createTargetDeinit(const LocationDescription &Loc, int32_t TeamsReductionDataSize=0, int32_t TeamsReductionBufferLength=1024)
Create a runtime call for kmpc_target_deinit.
LLVM_ABI InsertPointTy createAtomicWrite(const LocationDescription &Loc, AtomicOpValue &X, Value *Expr, AtomicOrdering AO, InsertPointTy AllocaIP)
Emit atomic write for : X = Expr — Only Scalar data types.
LLVM_ABI void loadOffloadInfoMetadata(Module &M)
Loads all the offload entries information from the host IR metadata.
function_ref< MapInfosTy &(InsertPointTy CodeGenIP)> GenMapInfoCallbackTy
Callback type for creating the map infos for the kernel parameters.
LLVM_ABI Error emitOffloadingArrays(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo, TargetDataInfo &Info, CustomMapperCallbackTy CustomMapperCB, bool IsNonContiguous=false, function_ref< void(unsigned int, Value *)> DeviceAddrCB=nullptr)
Emit the arrays used to pass the captures and map information to the offloading runtime library.
LLVM_ABI void unrollLoopFull(DebugLoc DL, CanonicalLoopInfo *Loop)
Fully unroll a loop.
function_ref< Error(InsertPointTy CodeGenIP, Value *IndVar)> LoopBodyGenCallbackTy
Callback type for loop body code generation.
LLVM_ABI InsertPointOrErrorTy emitScanReduction(const LocationDescription &Loc, ArrayRef< llvm::OpenMPIRBuilder::ReductionInfo > ReductionInfos, ScanInfo *ScanRedInfo)
This function performs the scan reduction of the values updated in the input phase.
LLVM_ABI void emitFlush(const LocationDescription &Loc)
Generate a flush runtime call.
LLVM_ABI InsertPointOrErrorTy createScope(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool IsNowait)
Generator for 'omp scope'.
static LLVM_ABI std::pair< int32_t, int32_t > readThreadBoundsForKernel(const Triple &T, Function &Kernel)
}
OpenMPIRBuilderConfig Config
The OpenMPIRBuilder Configuration.
LLVM_ABI CallInst * createOMPInteropDestroy(const LocationDescription &Loc, Value *InteropVar, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_destroy.
LLVM_ABI void emitUsed(StringRef Name, ArrayRef< llvm::WeakTrackingVH > List)
Emit the llvm.used metadata.
LLVM_ABI InsertPointOrErrorTy createSingle(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, bool IsNowait, ArrayRef< llvm::Value * > CPVars={}, ArrayRef< llvm::Function * > CPFuncs={})
Generator for 'omp single'.
LLVM_ABI InsertPointOrErrorTy createTeams(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, Value *NumTeamsLower=nullptr, Value *NumTeamsUpper=nullptr, Value *ThreadLimit=nullptr, Value *IfExpr=nullptr)
Generator for #omp teams
std::forward_list< CanonicalLoopInfo > LoopInfos
Collection of owned canonical loop objects that eventually need to be free'd.
LLVM_ABI void createTaskwait(const LocationDescription &Loc)
Generator for 'omp taskwait'.
LLVM_ABI llvm::StructType * getKmpTaskAffinityInfoTy()
Return the LLVM struct type matching runtime kmp_task_affinity_info_t.
LLVM_ABI CanonicalLoopInfo * createLoopSkeleton(DebugLoc DL, Value *TripCount, Function *F, BasicBlock *PreInsertBefore, BasicBlock *PostInsertBefore, const Twine &Name={})
Create the control flow structure of a canonical OpenMP loop.
LLVM_ABI std::string createPlatformSpecificName(ArrayRef< StringRef > Parts) const
Get the create a name using the platform specific separators.
LLVM_ABI FunctionCallee createDispatchNextFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_next_* runtime function for the specified size IVSize and sign IVSigned.
static LLVM_ABI void getKernelArgsVector(TargetKernelArgs &KernelArgs, IRBuilderBase &Builder, SmallVector< Value * > &ArgsVector)
Create the kernel args vector used by emitTargetKernel.
LLVM_ABI InsertPointOrErrorTy createTarget(const LocationDescription &Loc, bool IsOffloadEntry, OpenMPIRBuilder::InsertPointTy AllocaIP, OpenMPIRBuilder::InsertPointTy CodeGenIP, ArrayRef< BasicBlock * > DeallocBlocks, TargetDataInfo &Info, TargetRegionEntryInfo &EntryInfo, const TargetKernelDefaultAttrs &DefaultAttrs, const TargetKernelRuntimeAttrs &RuntimeAttrs, Value *IfCond, SmallVectorImpl< Value * > &Inputs, GenMapInfoCallbackTy GenMapInfoCB, TargetBodyGenCallbackTy BodyGenCB, TargetGenArgAccessorsCallbackTy ArgAccessorFuncCB, CustomMapperCallbackTy CustomMapperCB, const DependenciesInfo &Dependencies={}, bool HasNowait=false, Value *DynCGroupMem=nullptr, omp::OMPDynGroupprivateFallbackType DynCGroupMemFallback=omp::OMPDynGroupprivateFallbackType::Abort)
Generator for 'omp target'.
LLVM_ABI void unrollLoopHeuristic(DebugLoc DL, CanonicalLoopInfo *Loop)
Fully or partially unroll a loop.
LLVM_ABI omp::OpenMPOffloadMappingFlags getMemberOfFlag(unsigned Position)
Get OMP_MAP_MEMBER_OF flag with extra bits reserved based on the position given.
LLVM_ABI void addAttributes(omp::RuntimeFunction FnID, Function &Fn)
Add attributes known for FnID to Fn.
Module & M
The underlying LLVM-IR module.
StringMap< Constant * > SrcLocStrMap
Map to remember source location strings.
LLVM_ABI void createMapperAllocas(const LocationDescription &Loc, InsertPointTy AllocaIP, unsigned NumOperands, struct MapperAllocas &MapperAllocas)
Create the allocas instruction used in call to mapper functions.
LLVM_ABI Constant * getOrCreateSrcLocStr(StringRef LocStr, uint32_t &SrcLocStrSize)
Return the (LLVM-IR) string describing the source location LocStr.
LLVM_ABI Error emitTargetRegionFunction(TargetRegionEntryInfo &EntryInfo, FunctionGenCallback &GenerateFunctionCallback, bool IsOffloadEntry, Function *&OutlinedFn, Constant *&OutlinedFnID)
Create a unique name for the entry function using the source location information of the current targ...
LLVM_ABI InsertPointOrErrorTy createIteratorLoop(LocationDescription Loc, llvm::Value *TripCount, IteratorBodyGenTy BodyGen, llvm::StringRef Name="iterator")
Create a canonical iterator loop at the current insertion point.
LLVM_ABI Expected< SmallVector< llvm::CanonicalLoopInfo * > > createCanonicalScanLoops(const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop, InsertPointTy ComputeIP, const Twine &Name, ScanInfo *ScanRedInfo)
Generator for the control flow structure of an OpenMP canonical loops if the parent directive has an ...
LLVM_ABI FunctionCallee createDispatchFiniFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_fini_* runtime function for the specified size IVSize and sign IVSigned.
function_ref< InsertPointOrErrorTy( InsertPointTy AllocaIP, InsertPointTy CodeGenIP, ArrayRef< BasicBlock * > DeallocBlocks)> TargetBodyGenCallbackTy
LLVM_ABI void unrollLoopPartial(DebugLoc DL, CanonicalLoopInfo *Loop, int32_t Factor, CanonicalLoopInfo **UnrolledCLI)
Partially unroll a loop.
function_ref< Error(Value *DeviceID, Value *RTLoc, IRBuilderBase::InsertPoint TargetTaskAllocaIP)> TargetTaskBodyCallbackTy
Callback type for generating the bodies of device directives that require outer target tasks (e....
Expected< MapInfosTy & > MapInfosOrErrorTy
LLVM_ABI void emitTaskyieldImpl(const LocationDescription &Loc)
Generate a taskyield runtime call.
LLVM_ABI void emitMapperCall(const LocationDescription &Loc, Function *MapperFunc, Value *SrcLocInfo, Value *MaptypesArg, Value *MapnamesArg, struct MapperAllocas &MapperAllocas, int64_t DeviceID, unsigned NumOperands)
Create the call for the target mapper function.
LLVM_ABI InsertPointOrErrorTy createDistribute(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< BasicBlock * > DeallocBlocks, BodyGenCallbackTy BodyGenCB)
Generator for #omp distribute
LLVM_ABI InsertPointOrErrorTy createTask(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< BasicBlock * > DeallocBlocks, BodyGenCallbackTy BodyGenCB, bool Tied=true, Value *Final=nullptr, Value *IfCondition=nullptr, const DependenciesInfo &Dependencies={}, const AffinityData &Affinities={}, bool Mergeable=false, Value *EventHandle=nullptr, Value *Priority=nullptr)
Generator for #omp taskloop
function_ref< Expected< Function * >(unsigned int)> CustomMapperCallbackTy
LLVM_ABI InsertPointTy createAtomicCompare(const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V, AtomicOpValue &R, Value *E, Value *D, AtomicOrdering AO, omp::OMPAtomicCompareOp Op, bool IsXBinopExpr, bool IsPostfixUpdate, bool IsFailOnly)
Emit atomic compare for constructs: — Only scalar data types cond-expr-stmt: x = x ordop expr ?
LLVM_ABI InsertPointTy createOrderedDepend(const LocationDescription &Loc, InsertPointTy AllocaIP, unsigned NumLoops, ArrayRef< llvm::Value * > StoreValues, const Twine &Name, bool IsDependSource)
Generator for 'omp ordered depend (source | sink)'.
LLVM_ABI InsertPointTy createCopyinClauseBlocks(InsertPointTy IP, Value *MasterAddr, Value *PrivateAddr, llvm::IntegerType *IntPtrTy, bool BranchtoEnd=true)
Generate conditional branch and relevant BasicBlocks through which private threads copy the 'copyin' ...
function_ref< InsertPointOrErrorTy( InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value &Original, Value &Inner, Value *&ReplVal)> PrivatizeCallbackTy
Callback type for variable privatization (think copy & default constructor).
LLVM_ABI bool isFinalized()
Check whether the finalize function has already run.
SmallVector< FinalizationInfo, 8 > FinalizationStack
The finalization stack made up of finalize callbacks currently in-flight, wrapped into FinalizationIn...
LLVM_ABI std::vector< CanonicalLoopInfo * > tileLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops, ArrayRef< Value * > TileSizes)
Tile a loop nest.
LLVM_ABI CallInst * createOMPInteropInit(const LocationDescription &Loc, Value *InteropVar, omp::OMPInteropType InteropType, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_init.
LLVM_ABI Error emitIfClause(Value *Cond, BodyGenCallbackTy ThenGen, BodyGenCallbackTy ElseGen, InsertPointTy AllocaIP={}, ArrayRef< BasicBlock * > DeallocBlocks={})
Emits code for OpenMP 'if' clause using specified BodyGenCallbackTy Here is the logic: if (Cond) { Th...
LLVM_ABI void finalize(Function *Fn=nullptr)
Finalize the underlying module, e.g., by outlining regions.
LLVM_ABI Function * getOrCreateRuntimeFunctionPtr(omp::RuntimeFunction FnID)
void addOutlineInfo(std::unique_ptr< OutlineInfo > &&OI)
Add a new region that will be outlined later.
LLVM_ABI InsertPointTy createTargetInit(const LocationDescription &Loc, const llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs)
The omp target interface.
LLVM_ABI InsertPointOrErrorTy createReductions(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< ReductionInfo > ReductionInfos, ArrayRef< bool > IsByRef, bool IsNoWait=false, bool IsTeamsReduction=false)
Generator for 'omp reduction'.
const Triple T
The target triple of the underlying module.
DenseMap< std::pair< Constant *, uint64_t >, Constant * > IdentMap
Map to remember existing ident_t*.
LLVM_ABI CallInst * createOMPFree(const LocationDescription &Loc, Value *Addr, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_free.
LLVM_ABI FunctionCallee createForStaticInitFunction(unsigned IVSize, bool IVSigned, bool IsGPUDistribute)
Returns __kmpc_for_static_init_* runtime function for the specified size IVSize and sign IVSigned.
LLVM_ABI CallInst * createOMPAlloc(const LocationDescription &Loc, Value *Size, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_alloc.
LLVM_ABI void emitNonContiguousDescriptor(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo, TargetDataInfo &Info)
Emit an array of struct descriptors to be assigned to the offload args.
LLVM_ABI InsertPointOrErrorTy createSection(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB)
Generator for 'omp section'.
LLVM_ABI InsertPointOrErrorTy createTaskgroup(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< BasicBlock * > DeallocBlocks, BodyGenCallbackTy BodyGenCB)
Generator for the taskgroup construct.
LLVM_ABI InsertPointOrErrorTy createParallel(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< BasicBlock * > DeallocBlocks, BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, Value *IfCondition, Value *NumThreads, omp::ProcBindKind ProcBind, bool IsCancellable)
Generator for 'omp parallel'.
function_ref< InsertPointOrErrorTy(InsertPointTy)> EmitFallbackCallbackTy
Callback function type for functions emitting the host fallback code that is executed when the kernel...
static LLVM_ABI TargetRegionEntryInfo getTargetEntryUniqueInfo(FileIdentifierInfoCallbackTy CallBack, vfs::FileSystem &VFS, StringRef ParentName="")
Creates a unique info for a target entry when provided a filename and line number from.
LLVM_ABI void emitTaskDependency(IRBuilderBase &Builder, Value *Entry, const DependData &Dep)
Store one kmp_depend_info entry at the given Entry pointer.
LLVM_ABI void emitBlock(BasicBlock *BB, Function *CurFn, bool IsFinished=false)
LLVM_ABI Value * getOrCreateThreadID(Value *Ident)
Return the current thread ID.
LLVM_ABI InsertPointOrErrorTy createMaster(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB)
Generator for 'omp master'.
LLVM_ABI InsertPointOrErrorTy createTargetData(const LocationDescription &Loc, InsertPointTy AllocaIP, InsertPointTy CodeGenIP, ArrayRef< BasicBlock * > DeallocBlocks, Value *DeviceID, Value *IfCond, TargetDataInfo &Info, GenMapInfoCallbackTy GenMapInfoCB, CustomMapperCallbackTy CustomMapperCB, omp::RuntimeFunction *MapperFunc=nullptr, function_ref< InsertPointOrErrorTy(InsertPointTy CodeGenIP, BodyGenTy BodyGenType)> BodyGenCB=nullptr, function_ref< void(unsigned int, Value *)> DeviceAddrCB=nullptr, Value *SrcLocInfo=nullptr)
Generator for 'omp target data'.
CallInst * createRuntimeFunctionCall(FunctionCallee Callee, ArrayRef< Value * > Args, StringRef Name="")
LLVM_ABI InsertPointOrErrorTy emitKernelLaunch(const LocationDescription &Loc, Value *OutlinedFnID, EmitFallbackCallbackTy EmitTargetCallFallbackCB, TargetKernelArgs &Args, Value *DeviceID, Value *RTLoc, InsertPointTy AllocaIP)
Generate a target region entry call and host fallback call.
StringMap< GlobalVariable *, BumpPtrAllocator > InternalVars
An ordered map of auto-generated variables to their unique names.
LLVM_ABI InsertPointOrErrorTy createCancellationPoint(const LocationDescription &Loc, omp::Directive CanceledDirective)
Generator for 'omp cancellation point'.
LLVM_ABI CallInst * createOMPAlignedAlloc(const LocationDescription &Loc, Value *Align, Value *Size, Value *Allocator, std::string Name="")
Create a runtime call for kmpc_align_alloc.
LLVM_ABI FunctionCallee createDispatchInitFunction(unsigned IVSize, bool IVSigned)
Returns __kmpc_dispatch_init_* runtime function for the specified size IVSize and sign IVSigned.
LLVM_ABI InsertPointOrErrorTy createScan(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< llvm::Value * > ScanVars, ArrayRef< llvm::Type * > ScanVarsType, bool IsInclusive, ScanInfo *ScanRedInfo)
This directive split and directs the control flow to input phase blocks or scan phase blocks based on...
LLVM_ABI CallInst * createOMPFreeShared(const LocationDescription &Loc, Value *Addr, Value *Size, const Twine &Name=Twine(""))
Create a runtime call for kmpc_free_shared.
LLVM_ABI CallInst * createOMPInteropUse(const LocationDescription &Loc, Value *InteropVar, Value *Device, Value *NumDependences, Value *DependenceAddress, bool HaveNowaitClause)
Create a runtime call for __tgt_interop_use.
IRBuilder<>::InsertPoint InsertPointTy
Type used throughout for insertion points.
LLVM_ABI GlobalVariable * getOrCreateInternalVariable(Type *Ty, const StringRef &Name, std::optional< unsigned > AddressSpace={})
Gets (if variable with the given name already exist) or creates internal global variable with the spe...
LLVM_ABI GlobalVariable * createOffloadMapnames(SmallVectorImpl< llvm::Constant * > &Names, std::string VarName)
Create the global variable holding the offload names information.
std::forward_list< ScanInfo > ScanInfos
Collection of owned ScanInfo objects that eventually need to be free'd.
static LLVM_ABI void writeTeamsForKernel(const Triple &T, Function &Kernel, int32_t LB, int32_t UB)
LLVM_ABI Value * calculateCanonicalLoopTripCount(const LocationDescription &Loc, Value *Start, Value *Stop, Value *Step, bool IsSigned, bool InclusiveStop, const Twine &Name="loop")
Calculate the trip count of a canonical loop.
LLVM_ABI InsertPointOrErrorTy createBarrier(const LocationDescription &Loc, omp::Directive Kind, bool ForceSimpleCall=false, bool CheckCancelFlag=true)
Emitter methods for OpenMP directives.
LLVM_ABI void setCorrectMemberOfFlag(omp::OpenMPOffloadMappingFlags &Flags, omp::OpenMPOffloadMappingFlags MemberOfFlag)
Given an initial flag set, this function modifies it to contain the passed in MemberOfFlag generated ...
LLVM_ABI Error emitOffloadingArraysAndArgs(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &Info, TargetDataRTArgs &RTArgs, MapInfosTy &CombinedInfo, CustomMapperCallbackTy CustomMapperCB, bool IsNonContiguous=false, bool ForEndCall=false, function_ref< void(unsigned int, Value *)> DeviceAddrCB=nullptr)
Allocates memory for and populates the arrays required for offloading (offload_{baseptrs|ptrs|mappers...
LLVM_ABI Constant * getOrCreateDefaultSrcLocStr(uint32_t &SrcLocStrSize)
Return the (LLVM-IR) string describing the default source location.
LLVM_ABI InsertPointOrErrorTy createCritical(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, StringRef CriticalName, Value *HintInst)
Generator for 'omp critical'.
LLVM_ABI void createOffloadEntry(Constant *ID, Constant *Addr, uint64_t Size, int32_t Flags, GlobalValue::LinkageTypes, StringRef Name="")
Creates offloading entry for the provided entry ID ID, address Addr, size Size, and flags Flags.
static LLVM_ABI unsigned getOpenMPDefaultSimdAlign(const Triple &TargetTriple, const StringMap< bool > &Features)
Get the default alignment value for given target.
LLVM_ABI unsigned getFlagMemberOffset()
Get the offset of the OMP_MAP_MEMBER_OF field.
LLVM_ABI InsertPointOrErrorTy applyWorkshareLoop(DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP, bool NeedsBarrier, llvm::omp::ScheduleKind SchedKind=llvm::omp::OMP_SCHEDULE_Default, Value *ChunkSize=nullptr, bool HasSimdModifier=false, bool HasMonotonicModifier=false, bool HasNonmonotonicModifier=false, bool HasOrderedClause=false, omp::WorksharingLoopType LoopType=omp::WorksharingLoopType::ForStaticLoop, bool NoLoop=false, bool HasDistSchedule=false, Value *DistScheduleChunkSize=nullptr)
Modifies the canonical loop to be a workshare loop.
LLVM_ABI InsertPointOrErrorTy createAtomicCapture(const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X, AtomicOpValue &V, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool UpdateExpr, bool IsPostfixUpdate, bool IsXBinopExpr, bool IsIgnoreDenormalMode=false, bool IsFineGrainedMemory=false, bool IsRemoteMemory=false)
Emit atomic update for constructs: — Only Scalar data types V = X; X = X BinOp Expr ,...
LLVM_ABI void createOffloadEntriesAndInfoMetadata(EmitMetadataErrorReportFunctionTy &ErrorReportFunction)
LLVM_ABI void applySimd(CanonicalLoopInfo *Loop, MapVector< Value *, Value * > AlignedVars, Value *IfCond, omp::OrderKind Order, ConstantInt *Simdlen, ConstantInt *Safelen)
Add metadata to simd-ize a loop.
SmallVector< std::unique_ptr< OutlineInfo >, 16 > OutlineInfos
Collection of regions that need to be outlined during finalization.
LLVM_ABI InsertPointOrErrorTy createAtomicUpdate(const LocationDescription &Loc, InsertPointTy AllocaIP, AtomicOpValue &X, Value *Expr, AtomicOrdering AO, AtomicRMWInst::BinOp RMWOp, AtomicUpdateCallbackTy &UpdateOp, bool IsXBinopExpr, bool IsIgnoreDenormalMode=false, bool IsFineGrainedMemory=false, bool IsRemoteMemory=false)
Emit atomic update for constructs: X = X BinOp Expr ,or X = Expr BinOp X For complex Operations: X = ...
std::function< std::tuple< std::string, uint64_t >()> FileIdentifierInfoCallbackTy
bool isLastFinalizationInfoCancellable(omp::Directive DK)
Return true if the last entry in the finalization stack is of kind DK and cancellable.
LLVM_ABI InsertPointTy emitTargetKernel(const LocationDescription &Loc, InsertPointTy AllocaIP, Value *&Return, Value *Ident, Value *DeviceID, Value *NumTeams, Value *NumThreads, Value *HostPtr, ArrayRef< Value * > KernelArgs)
Generate a target region entry call.
LLVM_ABI GlobalVariable * createOffloadMaptypes(SmallVectorImpl< uint64_t > &Mappings, std::string VarName)
Create the global variable holding the offload mappings information.
LLVM_ABI ~OpenMPIRBuilder()
LLVM_ABI CallInst * createCachedThreadPrivate(const LocationDescription &Loc, llvm::Value *Pointer, llvm::ConstantInt *Size, const llvm::Twine &Name=Twine(""))
Create a runtime call for kmpc_threadprivate_cached.
IRBuilder Builder
The LLVM-IR Builder used to create IR.
LLVM_ABI GlobalValue * createGlobalFlag(unsigned Value, StringRef Name)
Create a hidden global flag Name in the module with initial value Value.
LLVM_ABI void emitOffloadingArraysArgument(IRBuilderBase &Builder, OpenMPIRBuilder::TargetDataRTArgs &RTArgs, OpenMPIRBuilder::TargetDataInfo &Info, bool ForEndCall=false)
Emit the arguments to be passed to the runtime library based on the arrays of base pointers,...
LLVM_ABI InsertPointOrErrorTy createMasked(const LocationDescription &Loc, BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB, Value *Filter)
Generator for 'omp masked'.
LLVM_ABI Expected< CanonicalLoopInfo * > createCanonicalLoop(const LocationDescription &Loc, LoopBodyGenCallbackTy BodyGenCB, Value *TripCount, const Twine &Name="loop")
Generator for the control flow structure of an OpenMP canonical loop.
function_ref< Expected< InsertPointTy >( InsertPointTy AllocaIP, InsertPointTy CodeGenIP, Value *DestPtr, Value *SrcPtr)> TaskDupCallbackTy
Callback type for task duplication function code generation.
LLVM_ABI Value * getSizeInBytes(Value *BasePtr)
Computes the size of type in bytes.
llvm::function_ref< llvm::Error( InsertPointTy BodyIP, llvm::Value *LinearIV)> IteratorBodyGenTy
LLVM_ABI InsertPointOrErrorTy createReductionsGPU(const LocationDescription &Loc, InsertPointTy AllocaIP, InsertPointTy CodeGenIP, ArrayRef< ReductionInfo > ReductionInfos, ArrayRef< bool > IsByRef, bool IsNoWait=false, bool IsTeamsReduction=false, ReductionGenCBKind ReductionGenCBKind=ReductionGenCBKind::MLIR, std::optional< omp::GV > GridValue={}, unsigned ReductionBufNum=1024, Value *SrcLocInfo=nullptr)
Design of OpenMP reductions on the GPU.
LLVM_ABI Expected< Function * > emitUserDefinedMapper(function_ref< MapInfosOrErrorTy(InsertPointTy CodeGenIP, llvm::Value *PtrPHI, llvm::Value *BeginArg)> PrivAndGenMapInfoCB, llvm::Type *ElemTy, StringRef FuncName, CustomMapperCallbackTy CustomMapperCB)
Emit the user-defined mapper function.
LLVM_ABI FunctionCallee createDispatchDeinitFunction()
Returns __kmpc_dispatch_deinit runtime function.
LLVM_ABI void registerTargetGlobalVariable(OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause, OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause, bool IsDeclaration, bool IsExternallyVisible, TargetRegionEntryInfo EntryInfo, StringRef MangledName, std::vector< GlobalVariable * > &GeneratedRefs, bool OpenMPSIMD, std::vector< Triple > TargetTriple, std::function< Constant *()> GlobalInitializer, std::function< GlobalValue::LinkageTypes()> VariableLinkage, Type *LlvmPtrTy, Constant *Addr)
Registers a target variable for device or host.
BodyGenTy
Type of BodyGen to use for region codegen.
LLVM_ABI CanonicalLoopInfo * fuseLoops(DebugLoc DL, ArrayRef< CanonicalLoopInfo * > Loops)
Fuse a sequence of loops.
LLVM_ABI void emitX86DeclareSimdFunction(llvm::Function *Fn, unsigned NumElements, const llvm::APSInt &VLENVal, llvm::ArrayRef< DeclareSimdAttrTy > ParamAttrs, DeclareSimdBranch Branch)
Emit x86 vector-function ABI attributes for a declare simd function.
SmallVector< llvm::Function *, 16 > ConstantAllocaRaiseCandidates
A collection of candidate target functions that's constant allocas will attempt to be raised on a cal...
OffloadEntriesInfoManager OffloadInfoManager
Info manager to keep track of target regions.
static LLVM_ABI std::pair< int32_t, int32_t > readTeamBoundsForKernel(const Triple &T, Function &Kernel)
Read/write a bounds on teams for Kernel.
const std::string ompOffloadInfoName
OMP Offload Info Metadata name string.
Expected< InsertPointTy > InsertPointOrErrorTy
Type used to represent an insertion point or an error value.
LLVM_ABI InsertPointTy createCopyPrivate(const LocationDescription &Loc, llvm::Value *BufSize, llvm::Value *CpyBuf, llvm::Value *CpyFn, llvm::Value *DidIt)
Generator for __kmpc_copyprivate.
LLVM_ABI InsertPointOrErrorTy createSections(const LocationDescription &Loc, InsertPointTy AllocaIP, ArrayRef< StorableBodyGenCallbackTy > SectionCBs, PrivatizeCallbackTy PrivCB, FinalizeCallbackTy FiniCB, bool IsCancellable, bool IsNowait)
Generator for 'omp sections'.
std::function< void(EmitMetadataErrorKind, TargetRegionEntryInfo)> EmitMetadataErrorReportFunctionTy
Callback function type.
function_ref< InsertPointOrErrorTy( Argument &Arg, Value *Input, Value *&RetVal, InsertPointTy AllocaIP, InsertPointTy CodeGenIP, ArrayRef< InsertPointTy > DeallocIPs)> TargetGenArgAccessorsCallbackTy
LLVM_ABI Expected< ScanInfo * > scanInfoInitialize()
Creates a ScanInfo object, allocates and returns the pointer.
LLVM_ABI InsertPointOrErrorTy emitTargetTask(TargetTaskBodyCallbackTy TaskBodyCB, Value *DeviceID, Value *RTLoc, OpenMPIRBuilder::InsertPointTy AllocaIP, const DependenciesInfo &Dependencies, const TargetDataRTArgs &RTArgs, bool HasNoWait)
Generate a target-task for the target construct.
LLVM_ABI InsertPointTy createAtomicRead(const LocationDescription &Loc, AtomicOpValue &X, AtomicOpValue &V, AtomicOrdering AO, InsertPointTy AllocaIP)
Emit atomic Read for : V = X — Only Scalar data types.
function_ref< Error(InsertPointTy AllocaIP, InsertPointTy CodeGenIP, ArrayRef< BasicBlock * > DeallocBlocks)> BodyGenCallbackTy
Callback type for body (=inner region) code generation.
bool updateToLocation(const LocationDescription &Loc)
Update the internal location to Loc.
LLVM_ABI void createFlush(const LocationDescription &Loc)
Generator for 'omp flush'.
LLVM_ABI Constant * getAddrOfDeclareTargetVar(OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause, OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause, bool IsDeclaration, bool IsExternallyVisible, TargetRegionEntryInfo EntryInfo, StringRef MangledName, std::vector< GlobalVariable * > &GeneratedRefs, bool OpenMPSIMD, std::vector< Triple > TargetTriple, Type *LlvmPtrTy, std::function< Constant *()> GlobalInitializer, std::function< GlobalValue::LinkageTypes()> VariableLinkage)
Retrieve (or create if non-existent) the address of a declare target variable, used in conjunction wi...
EmitMetadataErrorKind
The kind of errors that can occur when emitting the offload entries and metadata.
@ EMIT_MD_DECLARE_TARGET_ERROR
@ EMIT_MD_GLOBAL_VAR_INDIRECT_ERROR
@ EMIT_MD_GLOBAL_VAR_LINK_ERROR
@ EMIT_MD_TARGET_REGION_ERROR
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Pseudo-analysis pass that exposes the PassInstrumentation to pass managers.
Class to represent pointers.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
Analysis pass that exposes the ScalarEvolution for a function.
LLVM_ABI ScalarEvolution run(Function &F, FunctionAnalysisManager &AM)
The main scalar evolution driver.
ScanInfo holds the information to assist in lowering of Scan reduction.
llvm::SmallDenseMap< llvm::Value *, llvm::Value * > * ScanBuffPtrs
Maps the private reduction variable to the pointer of the temporary buffer.
llvm::BasicBlock * OMPScanLoopExit
Exit block of loop body.
llvm::Value * IV
Keeps track of value of iteration variable for input/scan loop to be used for Scan directive lowering...
llvm::BasicBlock * OMPAfterScanBlock
Dominates the body of the loop before scan directive.
llvm::BasicBlock * OMPScanInit
Block before loop body where scan initializations are done.
llvm::BasicBlock * OMPBeforeScanBlock
Dominates the body of the loop before scan directive.
llvm::BasicBlock * OMPScanFinish
Block after loop body where scan finalizations are done.
llvm::Value * Span
Stores the span of canonical loop being lowered to be used for temporary buffer allocation or Finaliz...
bool OMPFirstScanLoop
If true, it indicates Input phase is lowered; else it indicates ScanPhase is lowered.
llvm::BasicBlock * OMPScanDispatch
Controls the flow to before or after scan blocks.
A vector that has set insertion semantics.
bool remove_if(UnaryPredicate P)
Remove items from the set vector based on a predicate function.
bool empty() const
Determine if the SetVector is empty or not.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
bool test(unsigned Idx) const
bool all() const
Returns true if all bits are set.
bool any() const
Returns true if any bit is set.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
bool remove_if(UnaryPredicate P)
Remove elements that match the given predicate.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
void append(StringRef RHS)
Append from a StringRef.
StringRef str() const
Explicit conversion to StringRef.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAlignment(Align Align)
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
ValueTy lookup(StringRef Key) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Represent a constant reference to a string, i.e.
std::string str() const
Get the contents as an std::string.
constexpr bool empty() const
Check if the string is empty.
constexpr size_t size() const
Get the string size.
size_t count(char C) const
Return the number of occurrences of C in the string.
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
StringRef drop_back(size_t N=1) const
Return a StringRef equal to 'this' but with the last N elements dropped.
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
static LLVM_ABI StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
Type * getElementType(unsigned N) const
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
Analysis pass providing the TargetTransformInfo.
LLVM_ABI Result run(const Function &F, FunctionAnalysisManager &)
TargetTransformInfo Result
Analysis pass providing the TargetLibraryInfo.
Target - Wrapper for Target specific information.
TargetMachine * createTargetMachine(const Triple &TT, StringRef CPU, StringRef Features, const TargetOptions &Options, std::optional< Reloc::Model > RM, std::optional< CodeModel::Model > CM=std::nullopt, CodeGenOptLevel OL=CodeGenOptLevel::Default, bool JIT=false) const
createTargetMachine - Create a target specific machine implementation for the specified Triple.
Triple - Helper class for working with autoconf configuration names.
bool isPPC() const
Tests whether the target is PowerPC (32- or 64-bit LE or BE).
bool isX86() const
Tests whether the target is x86 (32- or 64-bit).
bool isWasm() const
Tests whether the target is wasm (32- and 64-bit).
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
LLVM_ABI unsigned getIntegerBitWidth() const
LLVM_ABI Type * getStructElementType(unsigned N) const
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
bool isStructTy() const
True if this is an instance of StructType.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isVoidTy() const
Return true if this is 'void'.
Unconditional Branch instruction.
static UncondBrInst * Create(BasicBlock *Target, InsertPosition InsertBefore=nullptr)
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
This function has undefined behavior.
Produce an estimate of the unrolled cost of the specified loop.
LLVM_ABI bool canUnroll(OptimizationRemarkEmitter *ORE=nullptr, const Loop *L=nullptr) const
Whether it is legal to unroll this loop.
uint64_t getRolledLoopSize() const
A Use represents the edge between a Value definition and its users.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
LLVM_ABI bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
LLVM_ABI User * getUniqueUndroppableUser()
Return true if there is exactly one unique user of this value that cannot be dropped (that user can h...
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
LLVM_ABI bool replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)
Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...
iterator_range< use_iterator > uses()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
The virtual file system interface.
llvm::ErrorOr< std::unique_ptr< llvm::MemoryBuffer > > getBufferForFile(const Twine &Name, int64_t FileSize=-1, bool RequiresNullTerminator=true, bool IsVolatile=false, bool IsText=true)
This is a convenience method that opens a file, gets its content and then closes the file.
virtual llvm::ErrorOr< Status > status(const Twine &Path)=0
Get the status of the entry at Path, if one exists.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
Flag
These should be considered private to the implementation of the MCInstrDesc class.
constexpr StringLiteral MaxNTID("nvvm.maxntid")
constexpr StringLiteral MaxClusterRank("nvvm.maxclusterrank")
initializer< Ty > init(const Ty &Val)
@ User
could "use" a pointer
LLVM_ABI GlobalVariable * emitOffloadingEntry(Module &M, object::OffloadKind Kind, Constant *Addr, StringRef Name, uint64_t Size, uint32_t Flags, uint64_t Data, Constant *AuxAddr=nullptr)
OpenMPOffloadMappingFlags
Values for bit flags used to specify the mapping type for offloading.
@ OMP_MAP_PTR_AND_OBJ
The element being mapped is a pointer-pointee pair; both the pointer and the pointee should be mapped...
@ OMP_MAP_MEMBER_OF
The 16 MSBs of the flags indicate whether the entry is member of some struct/class.
IdentFlag
IDs for all omp runtime library ident_t flag encodings (see their defintion in openmp/runtime/src/kmp...
RuntimeFunction
IDs for all omp runtime library (RTL) functions.
constexpr const GV & getAMDGPUGridValues()
static constexpr GV SPIRVGridValues
For generic SPIR-V GPUs.
OMPDynGroupprivateFallbackType
The fallback types for the dyn_groupprivate clause.
static constexpr GV NVPTXGridValues
For Nvidia GPUs.
@ OMP_TGT_EXEC_MODE_SPMD_NO_LOOP
@ OMP_TGT_EXEC_MODE_GENERIC
Function * Kernel
Summary of a kernel (=entry point for target offloading).
WorksharingLoopType
A type of worksharing loop construct.
OMPAtomicCompareOp
Atomic compare operations. Currently OpenMP only supports ==, >, and <.
NodeAddr< PhiNode * > Phi
friend class Instruction
Iterator for Instructions in a `BasicBlock.
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
LLVM_ABI BasicBlock * splitBBWithSuffix(IRBuilderBase &Builder, bool CreateBranch, llvm::Twine Suffix=".split")
Like splitBB, but reuses the current block's name for the new name.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
FunctionAddr VTableAddr Value
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
hash_code hash_value(const FixedPointSemantics &Val)
LLVM_ABI Expected< std::unique_ptr< Module > > parseBitcodeFile(MemoryBufferRef Buffer, LLVMContext &Context, ParserCallbacks Callbacks={})
Read the specified bitcode file, returning the module.
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
@ LLVM_MARK_AS_BITMASK_ENUM
LLVM_ABI BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, bool MapAtoms=true)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
unsigned getPointerAddressSpace(const Type *T)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
auto successors(const MachineBasicBlock *BB)
LLVM_ABI std::error_code inconvertibleErrorCode()
The value returned by this function can be returned from convertToErrorCode for Error values where no...
testing::Matcher< const detail::ErrorHolder & > Failed()
constexpr from_range_t from_range
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
LLVM_ABI BasicBlock * splitBB(IRBuilderBase::InsertPoint IP, bool CreateBranch, DebugLoc DL, llvm::Twine Name={})
Split a BasicBlock at an InsertPoint, even if the block is degenerate (missing the terminator).
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
std::string utostr(uint64_t X, bool isNeg=false)
ErrorOr< T > expectedToErrorOrAndEmitErrors(LLVMContext &Ctx, Expected< T > Val)
bool isa_and_nonnull(const Y &Val)
auto dyn_cast_or_null(const Y &Val)
LLVM_ABI bool convertUsersOfConstantsToInstructions(ArrayRef< Constant * > Consts, Function *RestrictToFunc=nullptr, bool RemoveDeadConstants=true, bool IncludeSelf=false)
Replace constant expressions users of the given constants with instructions.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
FunctionAddr VTableAddr uintptr_t uintptr_t Version
auto reverse(ContainerTy &&C)
TargetTransformInfo::PeelingPreferences gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, std::optional< bool > UserAllowPeeling, std::optional< bool > UserAllowProfileBasedPeeling, bool UnrollingSpecficValues=false)
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
CodeGenOptLevel
Code generation optimization level.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Error make_error(ArgTs &&... Args)
Make a Error instance representing failure using the given error info type.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
LLVM_ABI bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
@ Mul
Product of integers.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the specified block at the specified instruction.
FunctionAddr VTableAddr Next
DWARFExpression::Operation Op
LLVM_ABI void remapInstructionsInBlocks(ArrayRef< BasicBlock * > Blocks, ValueToValueMapTy &VMap)
Remaps instructions in Blocks using the mapping in VMap.
ArrayRef(const T &OneElt) -> ArrayRef< T >
OutputIt copy(R &&Range, OutputIt Out)
LLVM_ABI TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, llvm::OptimizationRemarkEmitter &ORE, int OptLevel, std::optional< unsigned > UserThreshold, std::optional< unsigned > UserCount, std::optional< bool > UserAllowPartial, std::optional< bool > UserRuntime, std::optional< bool > UserUpperBound, std::optional< unsigned > UserFullUnrollMaxCount)
Gather the various unrolling parameters based on the defaults, compiler flags, TTI overrides and user...
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
LLVM_ABI void spliceBB(IRBuilderBase::InsertPoint IP, BasicBlock *New, bool CreateBranch, DebugLoc DL)
Move the instruction after an InsertPoint to the beginning of another BasicBlock.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto predecessors(const MachineBasicBlock *BB)
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
LLVM_ABI Constant * ConstantFoldInsertValueInstruction(Constant *Agg, Constant *Val, ArrayRef< unsigned > Idxs)
Attempt to constant fold an insertvalue instruction with the specified operands and indices.
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI void DeleteDeadBlocks(ArrayRef< BasicBlock * > BBs, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified blocks from BB.
bool to_integer(StringRef S, N &Num, unsigned Base=0)
Convert the string S to an integer of the specified type using the radix Base. If Base is 0,...
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
LLVM_ABI void computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, AssumptionCache *AC, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, OptimizationRemarkEmitter *ORE, unsigned TripCount, unsigned MaxTripCount, bool MaxOrZero, unsigned TripMultiple, const UnrollCostEstimator &UCE, TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP)
This struct is a compact representation of a valid (non-zero power of two) alignment.
static LLVM_ABI void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
A struct to pack the relevant information for an OpenMP affinity clause.
a struct to pack relevant information while generating atomic Ops
A struct to pack the relevant information for an OpenMP depend clause.
omp::RTLDependenceKindTy DepKind
A struct to pack static and dynamic dependency information for a task.
SmallVector< DependData > Deps
Error mergeFiniBB(IRBuilderBase &Builder, BasicBlock *ExistingFiniBB)
For cases where there is an unavoidable existing finalization block (e.g.
Expected< BasicBlock * > getFiniBB(IRBuilderBase &Builder)
The basic block to which control should be transferred to implement the FiniCB.
Description of a LLVM-IR insertion point (IP) and a debug/source location (filename,...
MapNonContiguousArrayTy Offsets
MapNonContiguousArrayTy Counts
MapNonContiguousArrayTy Strides
This structure contains combined information generated for mappable clauses, including base pointers,...
MapDeviceInfoArrayTy DevicePointers
MapValuesArrayTy BasePointers
MapValuesArrayTy Pointers
StructNonContiguousInfo NonContigInfo
Helper that contains information about regions we need to outline during finalization.
LLVM_ABI void collectBlocks(SmallPtrSetImpl< BasicBlock * > &BlockSet, SmallVectorImpl< BasicBlock * > &BlockVector)
Collect all blocks in between EntryBB and ExitBB in both the given vector and set.
BasicBlock * OuterAllocBB
virtual LLVM_ABI std::unique_ptr< CodeExtractor > createCodeExtractor(ArrayRef< BasicBlock * > Blocks, bool ArgsInZeroAddressSpace, Twine Suffix=Twine(""))
Create a CodeExtractor instance based on the information stored in this structure,...
Information about an OpenMP reduction.
EvalKind EvaluationKind
Reduction evaluation kind - scalar, complex or aggregate.
ReductionGenAtomicCBTy AtomicReductionGen
Callback for generating the atomic reduction body, may be null.
ReductionGenCBTy ReductionGen
Callback for generating the reduction body.
Value * Variable
Reduction variable of pointer type.
Value * PrivateVariable
Thread-private partial reduction variable.
ReductionGenClangCBTy ReductionGenClang
Clang callback for generating the reduction body.
Type * ElementType
Reduction element type, must match pointee type of variable.
ReductionGenDataPtrPtrCBTy DataPtrPtrGen
Container for the arguments used to pass data to the runtime library.
Value * SizesArray
The array of sizes passed to the runtime library.
Value * PointersArray
The array of section pointers passed to the runtime library.
Value * MappersArray
The array of user-defined mappers passed to the runtime library.
Value * MapTypesArrayEnd
The array of map types passed to the runtime library for the end of the region, or nullptr if there a...
Value * BasePointersArray
The array of base pointer passed to the runtime library.
Value * MapTypesArray
The array of map types passed to the runtime library for the beginning of the region or for the entir...
Value * MapNamesArray
The array of original declaration names of mapped pointers sent to the runtime library for debugging.
Data structure that contains the needed information to construct the kernel args vector.
ArrayRef< Value * > NumThreads
The number of threads.
TargetDataRTArgs RTArgs
Arguments passed to the runtime library.
Value * NumIterations
The number of iterations.
Value * DynCGroupMem
The size of the dynamic shared memory.
unsigned NumTargetItems
Number of arguments passed to the runtime library.
bool HasNoWait
True if the kernel has 'no wait' clause.
ArrayRef< Value * > NumTeams
The number of teams.
omp::OMPDynGroupprivateFallbackType DynCGroupMemFallback
The fallback mechanism for the shared memory.
Container to pass the default attributes with which a kernel must be launched, used to set kernel att...
omp::OMPTgtExecModeFlags ExecFlags
SmallVector< int32_t, 3 > MaxTeams
Container to pass LLVM IR runtime values or constants related to the number of teams and threads with...
Value * DeviceID
Device ID value used in the kernel launch.
SmallVector< Value *, 3 > MaxTeams
Value * MaxThreads
'parallel' construct 'num_threads' clause value, if present and it is an SPMD kernel.
Value * LoopTripCount
Total number of iterations of the SPMD or Generic-SPMD kernel or null if it is a generic kernel.
SmallVector< Value *, 3 > TargetThreadLimit
SmallVector< Value *, 3 > TeamsThreadLimit
Data structure to contain the information needed to uniquely identify a target entry.
static LLVM_ABI void getTargetRegionEntryFnName(SmallVectorImpl< char > &Name, StringRef ParentName, unsigned DeviceID, unsigned FileID, unsigned Line, unsigned Count)
static constexpr const char * KernelNamePrefix
The prefix used for kernel names.
static LLVM_ABI const Target * lookupTarget(const Triple &TheTriple, std::string &Error)
lookupTarget - Lookup a target based on a target triple.
Defines various target-specific GPU grid values that must be consistent between host RTL (plugin),...