66#define DEBUG_TYPE "openmp-ir-builder"
73 cl::desc(
"Use optimistic attributes describing "
74 "'as-if' properties of runtime calls."),
78 "openmp-ir-builder-unroll-threshold-factor",
cl::Hidden,
79 cl::desc(
"Factor for the unroll threshold to account for code "
80 "simplifications still taking place"),
91 if (!IP1.isSet() || !IP2.isSet())
93 return IP1.getBlock() == IP2.getBlock() && IP1.getPoint() == IP2.getPoint();
98 switch (SchedType & ~OMPScheduleType::MonotonicityMask) {
99 case OMPScheduleType::UnorderedStaticChunked:
100 case OMPScheduleType::UnorderedStatic:
101 case OMPScheduleType::UnorderedDynamicChunked:
102 case OMPScheduleType::UnorderedGuidedChunked:
103 case OMPScheduleType::UnorderedRuntime:
104 case OMPScheduleType::UnorderedAuto:
105 case OMPScheduleType::UnorderedTrapezoidal:
106 case OMPScheduleType::UnorderedGreedy:
107 case OMPScheduleType::UnorderedBalanced:
108 case OMPScheduleType::UnorderedGuidedIterativeChunked:
109 case OMPScheduleType::UnorderedGuidedAnalyticalChunked:
110 case OMPScheduleType::UnorderedSteal:
111 case OMPScheduleType::UnorderedStaticBalancedChunked:
112 case OMPScheduleType::UnorderedGuidedSimd:
113 case OMPScheduleType::UnorderedRuntimeSimd:
114 case OMPScheduleType::OrderedStaticChunked:
115 case OMPScheduleType::OrderedStatic:
116 case OMPScheduleType::OrderedDynamicChunked:
117 case OMPScheduleType::OrderedGuidedChunked:
118 case OMPScheduleType::OrderedRuntime:
119 case OMPScheduleType::OrderedAuto:
120 case OMPScheduleType::OrderdTrapezoidal:
121 case OMPScheduleType::NomergeUnorderedStaticChunked:
122 case OMPScheduleType::NomergeUnorderedStatic:
123 case OMPScheduleType::NomergeUnorderedDynamicChunked:
124 case OMPScheduleType::NomergeUnorderedGuidedChunked:
125 case OMPScheduleType::NomergeUnorderedRuntime:
126 case OMPScheduleType::NomergeUnorderedAuto:
127 case OMPScheduleType::NomergeUnorderedTrapezoidal:
128 case OMPScheduleType::NomergeUnorderedGreedy:
129 case OMPScheduleType::NomergeUnorderedBalanced:
130 case OMPScheduleType::NomergeUnorderedGuidedIterativeChunked:
131 case OMPScheduleType::NomergeUnorderedGuidedAnalyticalChunked:
132 case OMPScheduleType::NomergeUnorderedSteal:
133 case OMPScheduleType::NomergeOrderedStaticChunked:
134 case OMPScheduleType::NomergeOrderedStatic:
135 case OMPScheduleType::NomergeOrderedDynamicChunked:
136 case OMPScheduleType::NomergeOrderedGuidedChunked:
137 case OMPScheduleType::NomergeOrderedRuntime:
138 case OMPScheduleType::NomergeOrderedAuto:
139 case OMPScheduleType::NomergeOrderedTrapezoidal:
140 case OMPScheduleType::OrderedDistributeChunked:
141 case OMPScheduleType::OrderedDistribute:
149 SchedType & OMPScheduleType::MonotonicityMask;
150 if (MonotonicityFlags == OMPScheduleType::MonotonicityMask)
162 Builder.restoreIP(IP);
172 Kernel->getFnAttribute(
"target-features").getValueAsString();
173 if (Features.
count(
"+wavefrontsize64"))
188 bool HasSimdModifier,
bool HasDistScheduleChunks) {
190 switch (ClauseKind) {
191 case OMP_SCHEDULE_Default:
192 case OMP_SCHEDULE_Static:
193 return HasChunks ? OMPScheduleType::BaseStaticChunked
194 : OMPScheduleType::BaseStatic;
195 case OMP_SCHEDULE_Dynamic:
196 return OMPScheduleType::BaseDynamicChunked;
197 case OMP_SCHEDULE_Guided:
198 return HasSimdModifier ? OMPScheduleType::BaseGuidedSimd
199 : OMPScheduleType::BaseGuidedChunked;
200 case OMP_SCHEDULE_Auto:
202 case OMP_SCHEDULE_Runtime:
203 return HasSimdModifier ? OMPScheduleType::BaseRuntimeSimd
204 : OMPScheduleType::BaseRuntime;
205 case OMP_SCHEDULE_Distribute:
206 return HasDistScheduleChunks ? OMPScheduleType::BaseDistributeChunked
207 : OMPScheduleType::BaseDistribute;
215 bool HasOrderedClause) {
216 assert((BaseScheduleType & OMPScheduleType::ModifierMask) ==
217 OMPScheduleType::None &&
218 "Must not have ordering nor monotonicity flags already set");
221 ? OMPScheduleType::ModifierOrdered
222 : OMPScheduleType::ModifierUnordered;
223 OMPScheduleType OrderingScheduleType = BaseScheduleType | OrderingModifier;
226 if (OrderingScheduleType ==
227 (OMPScheduleType::BaseGuidedSimd | OMPScheduleType::ModifierOrdered))
228 return OMPScheduleType::OrderedGuidedChunked;
229 else if (OrderingScheduleType == (OMPScheduleType::BaseRuntimeSimd |
230 OMPScheduleType::ModifierOrdered))
231 return OMPScheduleType::OrderedRuntime;
233 return OrderingScheduleType;
239 bool HasSimdModifier,
bool HasMonotonic,
240 bool HasNonmonotonic,
bool HasOrderedClause) {
241 assert((ScheduleType & OMPScheduleType::MonotonicityMask) ==
242 OMPScheduleType::None &&
243 "Must not have monotonicity flags already set");
244 assert((!HasMonotonic || !HasNonmonotonic) &&
245 "Monotonic and Nonmonotonic are contradicting each other");
248 return ScheduleType | OMPScheduleType::ModifierMonotonic;
249 }
else if (HasNonmonotonic) {
250 return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
260 if ((BaseScheduleType == OMPScheduleType::BaseStatic) ||
261 (BaseScheduleType == OMPScheduleType::BaseStaticChunked) ||
267 return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
275 bool HasSimdModifier,
bool HasMonotonicModifier,
276 bool HasNonmonotonicModifier,
bool HasOrderedClause,
277 bool HasDistScheduleChunks) {
279 ClauseKind, HasChunks, HasSimdModifier, HasDistScheduleChunks);
283 OrderedSchedule, HasSimdModifier, HasMonotonicModifier,
284 HasNonmonotonicModifier, HasOrderedClause);
299 assert(!Br->isConditional() &&
300 "BB's terminator must be an unconditional branch (or degenerate)");
303 Br->setSuccessor(0,
Target);
308 NewBr->setDebugLoc(
DL);
314 "Target BB must not have PHI nodes");
334 NewBr->setDebugLoc(
DL);
342 spliceBB(Builder.saveIP(), New, CreateBranch,
DebugLoc);
346 Builder.SetInsertPoint(Old);
350 Builder.SetCurrentDebugLocation(
DebugLoc);
359 spliceBB(IP, New, CreateBranch,
DL);
360 New->replaceSuccessorsPhiUsesWith(Old, New);
369 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
371 Builder.SetInsertPoint(Builder.GetInsertBlock());
374 Builder.SetCurrentDebugLocation(
DebugLoc);
383 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
385 Builder.SetInsertPoint(Builder.GetInsertBlock());
388 Builder.SetCurrentDebugLocation(
DebugLoc);
395 return splitBB(Builder, CreateBranch, Old->
getName() + Suffix);
402 OpenMPIRBuilder::InsertPointTy OuterAllocaIP,
404 OpenMPIRBuilder::InsertPointTy InnerAllocaIP,
405 const Twine &Name =
"",
bool AsPtr =
true) {
406 Builder.restoreIP(OuterAllocaIP);
409 Builder.CreateAlloca(Builder.getInt32Ty(),
nullptr, Name +
".addr");
413 FakeVal = FakeValAddr;
416 Builder.CreateLoad(Builder.getInt32Ty(), FakeValAddr, Name +
".val");
421 Builder.restoreIP(InnerAllocaIP);
425 Builder.CreateLoad(Builder.getInt32Ty(), FakeVal, Name +
".use");
441enum OpenMPOffloadingRequiresDirFlags {
443 OMP_REQ_UNDEFINED = 0x000,
445 OMP_REQ_NONE = 0x001,
447 OMP_REQ_REVERSE_OFFLOAD = 0x002,
449 OMP_REQ_UNIFIED_ADDRESS = 0x004,
451 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008,
453 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010,
459OpenMPIRBuilderConfig::OpenMPIRBuilderConfig()
460 : RequiresFlags(OMP_REQ_UNDEFINED) {}
462OpenMPIRBuilderConfig::OpenMPIRBuilderConfig(
463 bool IsTargetDevice,
bool IsGPU,
bool OpenMPOffloadMandatory,
464 bool HasRequiresReverseOffload,
bool HasRequiresUnifiedAddress,
465 bool HasRequiresUnifiedSharedMemory,
bool HasRequiresDynamicAllocators)
466 : IsTargetDevice(IsTargetDevice), IsGPU(IsGPU),
467 OpenMPOffloadMandatory(OpenMPOffloadMandatory),
468 RequiresFlags(OMP_REQ_UNDEFINED) {
469 if (HasRequiresReverseOffload)
470 RequiresFlags |= OMP_REQ_REVERSE_OFFLOAD;
471 if (HasRequiresUnifiedAddress)
472 RequiresFlags |= OMP_REQ_UNIFIED_ADDRESS;
473 if (HasRequiresUnifiedSharedMemory)
474 RequiresFlags |= OMP_REQ_UNIFIED_SHARED_MEMORY;
475 if (HasRequiresDynamicAllocators)
476 RequiresFlags |= OMP_REQ_DYNAMIC_ALLOCATORS;
479bool OpenMPIRBuilderConfig::hasRequiresReverseOffload()
const {
480 return RequiresFlags & OMP_REQ_REVERSE_OFFLOAD;
483bool OpenMPIRBuilderConfig::hasRequiresUnifiedAddress()
const {
484 return RequiresFlags & OMP_REQ_UNIFIED_ADDRESS;
487bool OpenMPIRBuilderConfig::hasRequiresUnifiedSharedMemory()
const {
488 return RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY;
491bool OpenMPIRBuilderConfig::hasRequiresDynamicAllocators()
const {
492 return RequiresFlags & OMP_REQ_DYNAMIC_ALLOCATORS;
495int64_t OpenMPIRBuilderConfig::getRequiresFlags()
const {
496 return hasRequiresFlags() ? RequiresFlags
497 :
static_cast<int64_t
>(OMP_REQ_NONE);
500void OpenMPIRBuilderConfig::setHasRequiresReverseOffload(
bool Value) {
502 RequiresFlags |= OMP_REQ_REVERSE_OFFLOAD;
504 RequiresFlags &= ~OMP_REQ_REVERSE_OFFLOAD;
507void OpenMPIRBuilderConfig::setHasRequiresUnifiedAddress(
bool Value) {
509 RequiresFlags |= OMP_REQ_UNIFIED_ADDRESS;
511 RequiresFlags &= ~OMP_REQ_UNIFIED_ADDRESS;
514void OpenMPIRBuilderConfig::setHasRequiresUnifiedSharedMemory(
bool Value) {
516 RequiresFlags |= OMP_REQ_UNIFIED_SHARED_MEMORY;
518 RequiresFlags &= ~OMP_REQ_UNIFIED_SHARED_MEMORY;
521void OpenMPIRBuilderConfig::setHasRequiresDynamicAllocators(
bool Value) {
523 RequiresFlags |= OMP_REQ_DYNAMIC_ALLOCATORS;
525 RequiresFlags &= ~OMP_REQ_DYNAMIC_ALLOCATORS;
532void OpenMPIRBuilder::getKernelArgsVector(TargetKernelArgs &KernelArgs,
536 Value *PointerNum = Builder.getInt32(KernelArgs.NumTargetItems);
538 constexpr size_t MaxDim = 3;
541 Value *HasNoWaitFlag = Builder.getInt64(KernelArgs.HasNoWait);
543 Value *DynCGroupMemFallbackFlag =
544 Builder.getInt64(
static_cast<uint64_t>(KernelArgs.DynCGroupMemFallback));
545 DynCGroupMemFallbackFlag = Builder.CreateShl(DynCGroupMemFallbackFlag, 2);
546 Value *Flags = Builder.CreateOr(HasNoWaitFlag, DynCGroupMemFallbackFlag);
548 assert(!KernelArgs.NumTeams.empty() && !KernelArgs.NumThreads.empty());
551 Builder.CreateInsertValue(ZeroArray, KernelArgs.NumTeams[0], {0});
552 Value *NumThreads3D =
553 Builder.CreateInsertValue(ZeroArray, KernelArgs.NumThreads[0], {0});
555 seq<unsigned>(1, std::min(KernelArgs.NumTeams.size(), MaxDim)))
557 Builder.CreateInsertValue(NumTeams3D, KernelArgs.NumTeams[
I], {I});
559 seq<unsigned>(1, std::min(KernelArgs.NumThreads.size(), MaxDim)))
561 Builder.CreateInsertValue(NumThreads3D, KernelArgs.NumThreads[
I], {I});
563 ArgsVector = {Version,
565 KernelArgs.RTArgs.BasePointersArray,
566 KernelArgs.RTArgs.PointersArray,
567 KernelArgs.RTArgs.SizesArray,
568 KernelArgs.RTArgs.MapTypesArray,
569 KernelArgs.RTArgs.MapNamesArray,
570 KernelArgs.RTArgs.MappersArray,
571 KernelArgs.NumIterations,
575 KernelArgs.DynCGroupMem};
583 auto FnAttrs =
Attrs.getFnAttrs();
584 auto RetAttrs =
Attrs.getRetAttrs();
586 for (
size_t ArgNo = 0; ArgNo < Fn.
arg_size(); ++ArgNo)
591 bool Param =
true) ->
void {
592 bool HasSignExt = AS.hasAttribute(Attribute::SExt);
593 bool HasZeroExt = AS.hasAttribute(Attribute::ZExt);
594 if (HasSignExt || HasZeroExt) {
595 assert(AS.getNumAttributes() == 1 &&
596 "Currently not handling extension attr combined with others.");
598 if (
auto AK = TargetLibraryInfo::getExtAttrForI32Param(
T, HasSignExt))
601 TargetLibraryInfo::getExtAttrForI32Return(
T, HasSignExt))
608#define OMP_ATTRS_SET(VarName, AttrSet) AttributeSet VarName = AttrSet;
609#include "llvm/Frontend/OpenMP/OMPKinds.def"
613#define OMP_RTL_ATTRS(Enum, FnAttrSet, RetAttrSet, ArgAttrSets) \
615 FnAttrs = FnAttrs.addAttributes(Ctx, FnAttrSet); \
616 addAttrSet(RetAttrs, RetAttrSet, false); \
617 for (size_t ArgNo = 0; ArgNo < ArgAttrSets.size(); ++ArgNo) \
618 addAttrSet(ArgAttrs[ArgNo], ArgAttrSets[ArgNo]); \
619 Fn.setAttributes(AttributeList::get(Ctx, FnAttrs, RetAttrs, ArgAttrs)); \
621#include "llvm/Frontend/OpenMP/OMPKinds.def"
635#define OMP_RTL(Enum, Str, IsVarArg, ReturnType, ...) \
637 FnTy = FunctionType::get(ReturnType, ArrayRef<Type *>{__VA_ARGS__}, \
639 Fn = M.getFunction(Str); \
641#include "llvm/Frontend/OpenMP/OMPKinds.def"
647#define OMP_RTL(Enum, Str, ...) \
649 Fn = Function::Create(FnTy, GlobalValue::ExternalLinkage, Str, M); \
651#include "llvm/Frontend/OpenMP/OMPKinds.def"
655 if (FnID == OMPRTL___kmpc_fork_call || FnID == OMPRTL___kmpc_fork_teams) {
665 LLVMContext::MD_callback,
667 2, {-1, -1},
true)}));
673 addAttributes(FnID, *Fn);
680 assert(Fn &&
"Failed to create OpenMP runtime function");
688 assert(Fn &&
"Failed to create OpenMP runtime function pointer");
695 CallInst *
Call = Builder.CreateCall(Callee, Args, Name);
696 Call->setCallingConv(Config.getRuntimeCC());
700void OpenMPIRBuilder::initialize() { initializeTypes(M); }
711 for (
auto Inst =
Block->getReverseIterator()->begin();
712 Inst !=
Block->getReverseIterator()->end();) {
725void OpenMPIRBuilder::finalize(
Function *Fn) {
729 for (OutlineInfo &OI : OutlineInfos) {
732 if (Fn && OI.getFunction() != Fn) {
737 ParallelRegionBlockSet.
clear();
739 OI.collectBlocks(ParallelRegionBlockSet, Blocks);
749 bool ArgsInZeroAddressSpace = Config.isTargetDevice();
758 ".omp_par", ArgsInZeroAddressSpace);
762 <<
" Exit: " << OI.ExitBB->getName() <<
"\n");
763 assert(Extractor.isEligible() &&
764 "Expected OpenMP outlining to be possible!");
766 for (
auto *V : OI.ExcludeArgsFromAggregate)
767 Extractor.excludeArgFromAggregate(V);
769 Function *OutlinedFn = Extractor.extractCodeRegion(CEAC);
773 if (TargetCpuAttr.isStringAttribute())
776 auto TargetFeaturesAttr = OuterFn->
getFnAttribute(
"target-features");
777 if (TargetFeaturesAttr.isStringAttribute())
778 OutlinedFn->
addFnAttr(TargetFeaturesAttr);
781 LLVM_DEBUG(
dbgs() <<
" Outlined function: " << *OutlinedFn <<
"\n");
783 "OpenMP outlined functions should not return a value!");
788 M.getFunctionList().insertAfter(OuterFn->
getIterator(), OutlinedFn);
795 assert(OI.EntryBB->getUniquePredecessor() == &ArtificialEntry);
802 "Expected instructions to add in the outlined region entry");
804 End = ArtificialEntry.
rend();
809 if (
I.isTerminator()) {
811 if (OI.EntryBB->getTerminator())
812 OI.EntryBB->getTerminator()->adoptDbgRecords(
813 &ArtificialEntry,
I.getIterator(),
false);
817 I.moveBeforePreserving(*OI.EntryBB, OI.EntryBB->getFirstInsertionPt());
820 OI.EntryBB->moveBefore(&ArtificialEntry);
827 if (OI.PostOutlineCB)
828 OI.PostOutlineCB(*OutlinedFn);
832 OutlineInfos = std::move(DeferredOutlines);
853 for (
Function *
F : ConstantAllocaRaiseCandidates)
856 EmitMetadataErrorReportFunctionTy &&ErrorReportFn =
857 [](EmitMetadataErrorKind Kind,
858 const TargetRegionEntryInfo &EntryInfo) ->
void {
859 errs() <<
"Error of kind: " << Kind
860 <<
" when emitting offload entries and metadata during "
861 "OMPIRBuilder finalization \n";
864 if (!OffloadInfoManager.empty())
865 createOffloadEntriesAndInfoMetadata(ErrorReportFn);
867 if (Config.EmitLLVMUsedMetaInfo.value_or(
false)) {
868 std::vector<WeakTrackingVH> LLVMCompilerUsed = {
869 M.getGlobalVariable(
"__openmp_nvptx_data_transfer_temporary_storage")};
870 emitUsed(
"llvm.compiler.used", LLVMCompilerUsed);
876bool OpenMPIRBuilder::isFinalized() {
return IsFinalized; }
878OpenMPIRBuilder::~OpenMPIRBuilder() {
879 assert(OutlineInfos.empty() &&
"There must be no outstanding outlinings");
887 ConstantInt::get(I32Ty,
Value), Name);
899 UsedArray.
resize(List.size());
900 for (
unsigned I = 0,
E = List.size();
I !=
E; ++
I)
904 if (UsedArray.
empty())
911 GV->setSection(
"llvm.metadata");
915OpenMPIRBuilder::emitKernelExecutionMode(
StringRef KernelName,
917 auto *Int8Ty = Builder.getInt8Ty();
920 ConstantInt::get(Int8Ty,
Mode),
Twine(KernelName,
"_exec_mode"));
928 unsigned Reserve2Flags) {
930 LocFlags |= OMP_IDENT_FLAG_KMPC;
933 IdentMap[{SrcLocStr,
uint64_t(LocFlags) << 31 | Reserve2Flags}];
937 ConstantInt::get(Int32,
uint32_t(LocFlags)),
938 ConstantInt::get(Int32, Reserve2Flags),
939 ConstantInt::get(Int32, SrcLocStrSize), SrcLocStr};
941 size_t SrcLocStrArgIdx = 4;
942 if (OpenMPIRBuilder::Ident->getElementType(SrcLocStrArgIdx)
946 SrcLocStr, OpenMPIRBuilder::Ident->getElementType(SrcLocStrArgIdx));
953 if (
GV.getValueType() == OpenMPIRBuilder::Ident &&
GV.hasInitializer())
954 if (
GV.getInitializer() == Initializer)
959 M, OpenMPIRBuilder::Ident,
962 M.getDataLayout().getDefaultGlobalsAddressSpace());
974 SrcLocStrSize = LocStr.
size();
975 Constant *&SrcLocStr = SrcLocStrMap[LocStr];
983 if (
GV.isConstant() &&
GV.hasInitializer() &&
984 GV.getInitializer() == Initializer)
987 SrcLocStr = Builder.CreateGlobalString(
988 LocStr,
"", M.getDataLayout().getDefaultGlobalsAddressSpace(),
996 unsigned Line,
unsigned Column,
1002 Buffer.
append(FunctionName);
1004 Buffer.
append(std::to_string(Line));
1006 Buffer.
append(std::to_string(Column));
1009 return getOrCreateSrcLocStr(Buffer.
str(), SrcLocStrSize);
1013OpenMPIRBuilder::getOrCreateDefaultSrcLocStr(
uint32_t &SrcLocStrSize) {
1014 StringRef UnknownLoc =
";unknown;unknown;0;0;;";
1015 return getOrCreateSrcLocStr(UnknownLoc, SrcLocStrSize);
1023 return getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1025 if (
DIFile *DIF = DIL->getFile())
1026 if (std::optional<StringRef> Source = DIF->getSource())
1031 return getOrCreateSrcLocStr(
Function, FileName, DIL->getLine(),
1032 DIL->getColumn(), SrcLocStrSize);
1035Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(
const LocationDescription &
Loc,
1037 return getOrCreateSrcLocStr(
Loc.DL, SrcLocStrSize,
1038 Loc.IP.getBlock()->getParent());
1041Value *OpenMPIRBuilder::getOrCreateThreadID(
Value *Ident) {
1042 return createRuntimeFunctionCall(
1043 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num), Ident,
1044 "omp_global_thread_num");
1047OpenMPIRBuilder::InsertPointOrErrorTy
1048OpenMPIRBuilder::createBarrier(
const LocationDescription &
Loc,
Directive Kind,
1049 bool ForceSimpleCall,
bool CheckCancelFlag) {
1050 if (!updateToLocation(
Loc))
1059 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_FOR;
1062 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SECTIONS;
1065 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SINGLE;
1068 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_EXPL;
1071 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL;
1076 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1078 getOrCreateIdent(SrcLocStr, SrcLocStrSize, BarrierLocFlags),
1079 getOrCreateThreadID(getOrCreateIdent(SrcLocStr, SrcLocStrSize))};
1084 bool UseCancelBarrier =
1085 !ForceSimpleCall && isLastFinalizationInfoCancellable(OMPD_parallel);
1088 getOrCreateRuntimeFunctionPtr(UseCancelBarrier
1089 ? OMPRTL___kmpc_cancel_barrier
1090 : OMPRTL___kmpc_barrier),
1093 if (UseCancelBarrier && CheckCancelFlag)
1094 if (
Error Err = emitCancelationCheckImpl(Result, OMPD_parallel))
1097 return Builder.saveIP();
1100OpenMPIRBuilder::InsertPointOrErrorTy
1101OpenMPIRBuilder::createCancel(
const LocationDescription &
Loc,
1103 omp::Directive CanceledDirective) {
1104 if (!updateToLocation(
Loc))
1108 auto *UI = Builder.CreateUnreachable();
1113 Builder.SetInsertPoint(ThenTI);
1115 Value *CancelKind =
nullptr;
1116 switch (CanceledDirective) {
1117#define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
1118 case DirectiveEnum: \
1119 CancelKind = Builder.getInt32(Value); \
1121#include "llvm/Frontend/OpenMP/OMPKinds.def"
1127 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1128 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1129 Value *
Args[] = {Ident, getOrCreateThreadID(Ident), CancelKind};
1131 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancel), Args);
1132 auto ExitCB = [
this, CanceledDirective,
Loc](InsertPointTy IP) ->
Error {
1133 if (CanceledDirective == OMPD_parallel) {
1135 Builder.restoreIP(IP);
1136 return createBarrier(LocationDescription(Builder.saveIP(),
Loc.DL),
1137 omp::Directive::OMPD_unknown,
1146 if (
Error Err = emitCancelationCheckImpl(Result, CanceledDirective, ExitCB))
1150 Builder.SetInsertPoint(UI->getParent());
1151 UI->eraseFromParent();
1153 return Builder.saveIP();
1156OpenMPIRBuilder::InsertPointOrErrorTy
1157OpenMPIRBuilder::createCancellationPoint(
const LocationDescription &
Loc,
1158 omp::Directive CanceledDirective) {
1159 if (!updateToLocation(
Loc))
1163 auto *UI = Builder.CreateUnreachable();
1164 Builder.SetInsertPoint(UI);
1166 Value *CancelKind =
nullptr;
1167 switch (CanceledDirective) {
1168#define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
1169 case DirectiveEnum: \
1170 CancelKind = Builder.getInt32(Value); \
1172#include "llvm/Frontend/OpenMP/OMPKinds.def"
1178 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1179 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1180 Value *
Args[] = {Ident, getOrCreateThreadID(Ident), CancelKind};
1182 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancellationpoint), Args);
1183 auto ExitCB = [
this, CanceledDirective,
Loc](InsertPointTy IP) ->
Error {
1184 if (CanceledDirective == OMPD_parallel) {
1186 Builder.restoreIP(IP);
1187 return createBarrier(LocationDescription(Builder.saveIP(),
Loc.DL),
1188 omp::Directive::OMPD_unknown,
1197 if (
Error Err = emitCancelationCheckImpl(Result, CanceledDirective, ExitCB))
1201 Builder.SetInsertPoint(UI->getParent());
1202 UI->eraseFromParent();
1204 return Builder.saveIP();
1207OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitTargetKernel(
1208 const LocationDescription &
Loc, InsertPointTy AllocaIP,
Value *&Return,
1211 if (!updateToLocation(
Loc))
1214 Builder.restoreIP(AllocaIP);
1215 auto *KernelArgsPtr =
1216 Builder.CreateAlloca(OpenMPIRBuilder::KernelArgs,
nullptr,
"kernel_args");
1217 updateToLocation(
Loc);
1221 Builder.CreateStructGEP(OpenMPIRBuilder::KernelArgs, KernelArgsPtr,
I);
1222 Builder.CreateAlignedStore(
1224 M.getDataLayout().getPrefTypeAlign(KernelArgs[
I]->getType()));
1228 NumThreads, HostPtr, KernelArgsPtr};
1230 Return = createRuntimeFunctionCall(
1231 getOrCreateRuntimeFunction(M, OMPRTL___tgt_target_kernel),
1234 return Builder.saveIP();
1237OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitKernelLaunch(
1238 const LocationDescription &
Loc,
Value *OutlinedFnID,
1239 EmitFallbackCallbackTy EmitTargetCallFallbackCB, TargetKernelArgs &Args,
1240 Value *DeviceID,
Value *RTLoc, InsertPointTy AllocaIP) {
1242 if (!updateToLocation(
Loc))
1255 assert(OutlinedFnID &&
"Invalid outlined function ID!");
1259 Value *Return =
nullptr;
1263 getKernelArgsVector(Args, Builder, ArgsVector);
1278 Builder.restoreIP(emitTargetKernel(
1279 Builder, AllocaIP, Return, RTLoc, DeviceID,
Args.NumTeams.front(),
1280 Args.NumThreads.front(), OutlinedFnID, ArgsVector));
1287 Builder.CreateCondBr(
Failed, OffloadFailedBlock, OffloadContBlock);
1289 auto CurFn = Builder.GetInsertBlock()->getParent();
1290 emitBlock(OffloadFailedBlock, CurFn);
1291 InsertPointOrErrorTy AfterIP = EmitTargetCallFallbackCB(Builder.saveIP());
1293 return AfterIP.takeError();
1294 Builder.restoreIP(*AfterIP);
1295 emitBranch(OffloadContBlock);
1296 emitBlock(OffloadContBlock, CurFn,
true);
1297 return Builder.saveIP();
1300Error OpenMPIRBuilder::emitCancelationCheckImpl(
1301 Value *CancelFlag, omp::Directive CanceledDirective,
1302 FinalizeCallbackTy ExitCB) {
1303 assert(isLastFinalizationInfoCancellable(CanceledDirective) &&
1304 "Unexpected cancellation!");
1309 if (Builder.GetInsertPoint() == BB->
end()) {
1315 NonCancellationBlock =
SplitBlock(BB, &*Builder.GetInsertPoint());
1317 Builder.SetInsertPoint(BB);
1323 Value *
Cmp = Builder.CreateIsNull(CancelFlag);
1324 Builder.CreateCondBr(Cmp, NonCancellationBlock, CancellationBlock,
1329 Builder.SetInsertPoint(CancellationBlock);
1331 if (
Error Err = ExitCB(Builder.saveIP()))
1333 auto &FI = FinalizationStack.back();
1334 if (
Error Err = FI.FiniCB(Builder.saveIP()))
1338 Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->
begin());
1357 OutlinedFn.
addFnAttr(Attribute::NoUnwind);
1360 "Expected at least tid and bounded tid as arguments");
1361 unsigned NumCapturedVars = OutlinedFn.
arg_size() - 2;
1364 assert(CI &&
"Expected call instruction to outlined function");
1365 CI->
getParent()->setName(
"omp_parallel");
1367 Builder.SetInsertPoint(CI);
1368 Type *PtrTy = OMPIRBuilder->VoidPtr;
1372 OpenMPIRBuilder ::InsertPointTy CurrentIP = Builder.saveIP();
1376 Value *Args = ArgsAlloca;
1380 Args = Builder.CreatePointerCast(ArgsAlloca, PtrTy);
1381 Builder.restoreIP(CurrentIP);
1384 for (
unsigned Idx = 0; Idx < NumCapturedVars; Idx++) {
1386 Value *StoreAddress = Builder.CreateConstInBoundsGEP2_64(
1388 Builder.CreateStore(V, StoreAddress);
1392 IfCondition ? Builder.CreateSExtOrTrunc(IfCondition, OMPIRBuilder->Int32)
1393 : Builder.getInt32(1);
1396 Value *Parallel51CallArgs[] = {
1400 NumThreads ? NumThreads : Builder.getInt32(-1),
1401 Builder.getInt32(-1),
1405 Builder.getInt64(NumCapturedVars)};
1408 OMPIRBuilder->getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_parallel_51);
1410 OMPIRBuilder->createRuntimeFunctionCall(RTLFn, Parallel51CallArgs);
1413 << *Builder.GetInsertBlock()->getParent() <<
"\n");
1416 Builder.SetInsertPoint(PrivTID);
1418 Builder.CreateStore(Builder.CreateLoad(OMPIRBuilder->Int32, OutlinedAI),
1425 I->eraseFromParent();
1442 OMPIRBuilder->getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_call_if);
1445 OMPIRBuilder->getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_call);
1448 if (!
F->hasMetadata(LLVMContext::MD_callback)) {
1456 F->addMetadata(LLVMContext::MD_callback,
1465 OutlinedFn.
addFnAttr(Attribute::NoUnwind);
1468 "Expected at least tid and bounded tid as arguments");
1469 unsigned NumCapturedVars = OutlinedFn.
arg_size() - 2;
1472 CI->
getParent()->setName(
"omp_parallel");
1473 Builder.SetInsertPoint(CI);
1476 Value *ForkCallArgs[] = {Ident, Builder.getInt32(NumCapturedVars),
1480 RealArgs.
append(std::begin(ForkCallArgs), std::end(ForkCallArgs));
1482 Value *
Cond = Builder.CreateSExtOrTrunc(IfCondition, OMPIRBuilder->Int32);
1489 auto PtrTy = OMPIRBuilder->VoidPtr;
1490 if (IfCondition && NumCapturedVars == 0) {
1495 OMPIRBuilder->createRuntimeFunctionCall(RTLFn, RealArgs);
1498 << *Builder.GetInsertBlock()->getParent() <<
"\n");
1501 Builder.SetInsertPoint(PrivTID);
1503 Builder.CreateStore(Builder.CreateLoad(OMPIRBuilder->Int32, OutlinedAI),
1510 I->eraseFromParent();
1514OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createParallel(
1515 const LocationDescription &
Loc, InsertPointTy OuterAllocaIP,
1516 BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB,
1517 FinalizeCallbackTy FiniCB,
Value *IfCondition,
Value *NumThreads,
1518 omp::ProcBindKind ProcBind,
bool IsCancellable) {
1521 if (!updateToLocation(
Loc))
1525 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1526 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1527 Value *ThreadID = getOrCreateThreadID(Ident);
1533 bool ArgsInZeroAddressSpace = Config.isTargetDevice();
1537 if (NumThreads && !Config.isTargetDevice()) {
1540 Builder.CreateIntCast(NumThreads, Int32,
false)};
1541 createRuntimeFunctionCall(
1542 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_num_threads), Args);
1545 if (ProcBind != OMP_PROC_BIND_default) {
1549 ConstantInt::get(Int32,
unsigned(ProcBind),
true)};
1550 createRuntimeFunctionCall(
1551 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_proc_bind), Args);
1554 BasicBlock *InsertBB = Builder.GetInsertBlock();
1559 BasicBlock *OuterAllocaBlock = OuterAllocaIP.getBlock();
1567 InsertPointTy NewOuter(OuterAllocaBlock, OuterAllocaBlock->
begin());
1568 Builder.restoreIP(NewOuter);
1569 AllocaInst *TIDAddrAlloca = Builder.CreateAlloca(Int32,
nullptr,
"tid.addr");
1571 Builder.CreateAlloca(Int32,
nullptr,
"zero.addr");
1574 if (ArgsInZeroAddressSpace && M.getDataLayout().getAllocaAddrSpace() != 0) {
1577 TIDAddrAlloca, PointerType ::get(M.getContext(), 0),
"tid.addr.ascast");
1581 PointerType ::get(M.getContext(), 0),
1582 "zero.addr.ascast");
1603 auto FiniCBWrapper = [&](InsertPointTy IP) {
1608 Builder.restoreIP(IP);
1610 IP = InsertPointTy(
I->getParent(),
I->getIterator());
1614 "Unexpected insertion point for finalization call!");
1618 FinalizationStack.push_back({FiniCBWrapper, OMPD_parallel, IsCancellable});
1623 InsertPointTy InnerAllocaIP = Builder.saveIP();
1626 Builder.CreateAlloca(Int32,
nullptr,
"tid.addr.local");
1627 Instruction *PrivTID = Builder.CreateLoad(Int32, PrivTIDAddr,
"tid");
1630 ToBeDeleted.
push_back(Builder.CreateLoad(Int32, TIDAddr,
"tid.addr.use"));
1632 Builder.CreateLoad(Int32, ZeroAddr,
"zero.addr.use");
1650 LLVM_DEBUG(
dbgs() <<
"Before body codegen: " << *OuterFn <<
"\n");
1653 assert(BodyGenCB &&
"Expected body generation callback!");
1654 InsertPointTy CodeGenIP(PRegBodyBB, PRegBodyBB->
begin());
1655 if (
Error Err = BodyGenCB(InnerAllocaIP, CodeGenIP))
1658 LLVM_DEBUG(
dbgs() <<
"After body codegen: " << *OuterFn <<
"\n");
1661 if (Config.isTargetDevice()) {
1663 OI.PostOutlineCB = [=, ToBeDeletedVec =
1664 std::move(ToBeDeleted)](
Function &OutlinedFn) {
1666 IfCondition, NumThreads, PrivTID, PrivTIDAddr,
1667 ThreadID, ToBeDeletedVec);
1671 OI.PostOutlineCB = [=, ToBeDeletedVec =
1672 std::move(ToBeDeleted)](
Function &OutlinedFn) {
1674 PrivTID, PrivTIDAddr, ToBeDeletedVec);
1678 OI.OuterAllocaBB = OuterAllocaBlock;
1679 OI.EntryBB = PRegEntryBB;
1680 OI.ExitBB = PRegExitBB;
1684 OI.collectBlocks(ParallelRegionBlockSet, Blocks);
1695 ".omp_par", ArgsInZeroAddressSpace);
1700 Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
1702 Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands,
1707 return GV->getValueType() == OpenMPIRBuilder::Ident;
1712 LLVM_DEBUG(
dbgs() <<
"Before privatization: " << *OuterFn <<
"\n");
1715 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num);
1718 if (&V == TIDAddr || &V == ZeroAddr) {
1719 OI.ExcludeArgsFromAggregate.push_back(&V);
1724 for (
Use &U : V.uses())
1726 if (ParallelRegionBlockSet.
count(UserI->getParent()))
1736 if (!V.getType()->isPointerTy()) {
1740 Builder.restoreIP(OuterAllocaIP);
1742 Builder.CreateAlloca(V.getType(),
nullptr, V.getName() +
".reloaded");
1746 Builder.SetInsertPoint(InsertBB,
1748 Builder.CreateStore(&V, Ptr);
1751 Builder.restoreIP(InnerAllocaIP);
1752 Inner = Builder.CreateLoad(V.getType(), Ptr);
1755 Value *ReplacementValue =
nullptr;
1758 ReplacementValue = PrivTID;
1760 InsertPointOrErrorTy AfterIP =
1761 PrivCB(InnerAllocaIP, Builder.saveIP(), V, *Inner, ReplacementValue);
1763 return AfterIP.takeError();
1764 Builder.restoreIP(*AfterIP);
1766 InnerAllocaIP.getBlock(),
1767 InnerAllocaIP.getBlock()->getTerminator()->getIterator()};
1769 assert(ReplacementValue &&
1770 "Expected copy/create callback to set replacement value!");
1771 if (ReplacementValue == &V)
1776 UPtr->set(ReplacementValue);
1801 for (
Value *Output : Outputs)
1804 assert(Outputs.empty() &&
1805 "OpenMP outlining should not produce live-out values!");
1807 LLVM_DEBUG(
dbgs() <<
"After privatization: " << *OuterFn <<
"\n");
1809 for (
auto *BB : Blocks)
1816 auto FiniInfo = FinalizationStack.pop_back_val();
1818 assert(FiniInfo.DK == OMPD_parallel &&
1819 "Unexpected finalization stack state!");
1823 InsertPointTy PreFiniIP(PRegPreFiniBB, PRegPreFiniTI->
getIterator());
1824 if (
Error Err = FiniCB(PreFiniIP))
1828 addOutlineInfo(std::move(OI));
1830 InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end());
1831 UI->eraseFromParent();
1836void OpenMPIRBuilder::emitFlush(
const LocationDescription &
Loc) {
1839 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1840 Value *
Args[] = {getOrCreateIdent(SrcLocStr, SrcLocStrSize)};
1842 createRuntimeFunctionCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_flush),
1846void OpenMPIRBuilder::createFlush(
const LocationDescription &
Loc) {
1847 if (!updateToLocation(
Loc))
1852void OpenMPIRBuilder::emitTaskwaitImpl(
const LocationDescription &
Loc) {
1856 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1857 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1858 Value *
Args[] = {Ident, getOrCreateThreadID(Ident)};
1861 createRuntimeFunctionCall(
1862 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskwait), Args);
1865void OpenMPIRBuilder::createTaskwait(
const LocationDescription &
Loc) {
1866 if (!updateToLocation(
Loc))
1868 emitTaskwaitImpl(
Loc);
1871void OpenMPIRBuilder::emitTaskyieldImpl(
const LocationDescription &
Loc) {
1874 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1875 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1877 Value *
Args[] = {Ident, getOrCreateThreadID(Ident), I32Null};
1879 createRuntimeFunctionCall(
1880 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskyield), Args);
1883void OpenMPIRBuilder::createTaskyield(
const LocationDescription &
Loc) {
1884 if (!updateToLocation(
Loc))
1886 emitTaskyieldImpl(
Loc);
1895 OpenMPIRBuilder &OMPBuilder,
1898 if (Dependencies.
empty())
1918 Type *DependInfo = OMPBuilder.DependInfo;
1919 Module &M = OMPBuilder.M;
1921 Value *DepArray =
nullptr;
1922 OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
1923 Builder.SetInsertPoint(
1924 OldIP.getBlock()->getParent()->getEntryBlock().getTerminator());
1927 DepArray = Builder.CreateAlloca(DepArrayTy,
nullptr,
".dep.arr.addr");
1929 Builder.restoreIP(OldIP);
1931 for (
const auto &[DepIdx, Dep] :
enumerate(Dependencies)) {
1933 Builder.CreateConstInBoundsGEP2_64(DepArrayTy, DepArray, 0, DepIdx);
1935 Value *Addr = Builder.CreateStructGEP(
1937 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr));
1938 Value *DepValPtr = Builder.CreatePtrToInt(Dep.DepVal, Builder.getInt64Ty());
1939 Builder.CreateStore(DepValPtr, Addr);
1942 DependInfo,
Base,
static_cast<unsigned int>(RTLDependInfoFields::Len));
1943 Builder.CreateStore(
1944 Builder.getInt64(M.getDataLayout().getTypeStoreSize(Dep.DepValueType)),
1947 Value *Flags = Builder.CreateStructGEP(
1949 static_cast<unsigned int>(RTLDependInfoFields::Flags));
1950 Builder.CreateStore(
1951 ConstantInt::get(Builder.getInt8Ty(),
1952 static_cast<unsigned int>(Dep.DepKind)),
1958OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTask(
1959 const LocationDescription &
Loc, InsertPointTy AllocaIP,
1960 BodyGenCallbackTy BodyGenCB,
bool Tied,
Value *Final,
Value *IfCondition,
1964 if (!updateToLocation(
Loc))
1965 return InsertPointTy();
1968 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1969 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1986 BasicBlock *TaskExitBB = splitBB(Builder,
true,
"task.exit");
1987 BasicBlock *TaskBodyBB = splitBB(Builder,
true,
"task.body");
1989 splitBB(Builder,
true,
"task.alloca");
1991 InsertPointTy TaskAllocaIP =
1992 InsertPointTy(TaskAllocaBB, TaskAllocaBB->
begin());
1993 InsertPointTy TaskBodyIP = InsertPointTy(TaskBodyBB, TaskBodyBB->
begin());
1994 if (
Error Err = BodyGenCB(TaskAllocaIP, TaskBodyIP))
1998 OI.EntryBB = TaskAllocaBB;
1999 OI.OuterAllocaBB = AllocaIP.getBlock();
2000 OI.ExitBB = TaskExitBB;
2005 Builder, AllocaIP, ToBeDeleted, TaskAllocaIP,
"global.tid",
false));
2007 OI.PostOutlineCB = [
this, Ident, Tied, Final, IfCondition, Dependencies,
2008 Mergeable, Priority, EventHandle, TaskAllocaBB,
2009 ToBeDeleted](
Function &OutlinedFn)
mutable {
2012 "there must be a single user for the outlined function");
2017 bool HasShareds = StaleCI->
arg_size() > 1;
2018 Builder.SetInsertPoint(StaleCI);
2023 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc);
2027 Value *ThreadID = getOrCreateThreadID(Ident);
2039 Value *Flags = Builder.getInt32(Tied);
2042 Builder.CreateSelect(Final, Builder.getInt32(2), Builder.getInt32(0));
2043 Flags = Builder.CreateOr(FinalFlag, Flags);
2047 Flags = Builder.CreateOr(Builder.getInt32(4), Flags);
2049 Flags = Builder.CreateOr(Builder.getInt32(32), Flags);
2055 Value *TaskSize = Builder.getInt64(
2056 divideCeil(M.getDataLayout().getTypeSizeInBits(Task), 8));
2061 Value *SharedsSize = Builder.getInt64(0);
2065 assert(ArgStructAlloca &&
2066 "Unable to find the alloca instruction corresponding to arguments "
2067 "for extracted function");
2070 assert(ArgStructType &&
"Unable to find struct type corresponding to "
2071 "arguments for extracted function");
2073 Builder.getInt64(M.getDataLayout().getTypeStoreSize(ArgStructType));
2078 CallInst *TaskData = createRuntimeFunctionCall(
2079 TaskAllocFn, {Ident, ThreadID, Flags,
2080 TaskSize, SharedsSize,
2087 Function *TaskDetachFn = getOrCreateRuntimeFunctionPtr(
2088 OMPRTL___kmpc_task_allow_completion_event);
2090 createRuntimeFunctionCall(TaskDetachFn, {Ident, ThreadID, TaskData});
2092 Builder.CreatePointerBitCastOrAddrSpaceCast(EventHandle,
2093 Builder.getPtrTy(0));
2094 EventVal = Builder.CreatePtrToInt(EventVal, Builder.getInt64Ty());
2095 Builder.CreateStore(EventVal, EventHandleAddr);
2101 Value *TaskShareds = Builder.CreateLoad(VoidPtr, TaskData);
2102 Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment,
2120 Builder.CreateInBoundsGEP(TaskPtr, TaskData, {
Zero,
Zero});
2123 VoidPtr, VoidPtr, Builder.getInt32Ty(), VoidPtr, VoidPtr);
2124 Value *PriorityData = Builder.CreateInBoundsGEP(
2125 TaskStructType, TaskGEP, {
Zero, ConstantInt::get(
Int32Ty, 4)});
2128 Value *CmplrData = Builder.CreateInBoundsGEP(CmplrStructType,
2130 Builder.CreateStore(Priority, CmplrData);
2155 splitBB(Builder,
true,
"if.end");
2157 Builder.GetInsertPoint()->
getParent()->getTerminator();
2158 Instruction *ThenTI = IfTerminator, *ElseTI =
nullptr;
2159 Builder.SetInsertPoint(IfTerminator);
2162 Builder.SetInsertPoint(ElseTI);
2164 if (Dependencies.size()) {
2166 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_wait_deps);
2167 createRuntimeFunctionCall(
2169 {Ident, ThreadID, Builder.getInt32(Dependencies.size()), DepArray,
2170 ConstantInt::get(Builder.getInt32Ty(), 0),
2174 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_begin_if0);
2176 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_complete_if0);
2177 createRuntimeFunctionCall(TaskBeginFn, {Ident, ThreadID, TaskData});
2180 CI = createRuntimeFunctionCall(&OutlinedFn, {ThreadID, TaskData});
2182 CI = createRuntimeFunctionCall(&OutlinedFn, {ThreadID});
2184 createRuntimeFunctionCall(TaskCompleteFn, {Ident, ThreadID, TaskData});
2185 Builder.SetInsertPoint(ThenTI);
2188 if (Dependencies.size()) {
2190 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_with_deps);
2191 createRuntimeFunctionCall(
2193 {Ident, ThreadID, TaskData, Builder.getInt32(Dependencies.size()),
2194 DepArray, ConstantInt::get(Builder.getInt32Ty(), 0),
2199 Function *TaskFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task);
2200 createRuntimeFunctionCall(TaskFn, {Ident, ThreadID, TaskData});
2205 Builder.SetInsertPoint(TaskAllocaBB, TaskAllocaBB->begin());
2207 LoadInst *Shareds = Builder.CreateLoad(VoidPtr, OutlinedFn.
getArg(1));
2209 Shareds, [Shareds](
Use &U) {
return U.getUser() != Shareds; });
2213 I->eraseFromParent();
2216 addOutlineInfo(std::move(OI));
2217 Builder.SetInsertPoint(TaskExitBB, TaskExitBB->
begin());
2219 return Builder.saveIP();
2222OpenMPIRBuilder::InsertPointOrErrorTy
2223OpenMPIRBuilder::createTaskgroup(
const LocationDescription &
Loc,
2224 InsertPointTy AllocaIP,
2225 BodyGenCallbackTy BodyGenCB) {
2226 if (!updateToLocation(
Loc))
2227 return InsertPointTy();
2230 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
2231 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
2232 Value *ThreadID = getOrCreateThreadID(Ident);
2236 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_taskgroup);
2237 createRuntimeFunctionCall(TaskgroupFn, {Ident, ThreadID});
2239 BasicBlock *TaskgroupExitBB = splitBB(Builder,
true,
"taskgroup.exit");
2240 if (
Error Err = BodyGenCB(AllocaIP, Builder.saveIP()))
2243 Builder.SetInsertPoint(TaskgroupExitBB);
2246 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_taskgroup);
2247 createRuntimeFunctionCall(EndTaskgroupFn, {Ident, ThreadID});
2249 return Builder.saveIP();
2252OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createSections(
2253 const LocationDescription &
Loc, InsertPointTy AllocaIP,
2255 FinalizeCallbackTy FiniCB,
bool IsCancellable,
bool IsNowait) {
2258 if (!updateToLocation(
Loc))
2264 auto FiniCBWrapper = [&](InsertPointTy IP) {
2273 CancellationBranches.
push_back(DummyBranch);
2277 FinalizationStack.
push_back({FiniCBWrapper, OMPD_sections, IsCancellable});
2295 auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP,
Value *IndVar) ->
Error {
2296 Builder.restoreIP(CodeGenIP);
2298 splitBBWithSuffix(Builder,
false,
".sections.after");
2302 unsigned CaseNumber = 0;
2303 for (
auto SectionCB : SectionCBs) {
2305 M.getContext(),
"omp_section_loop.body.case", CurFn,
Continue);
2306 SwitchStmt->
addCase(Builder.getInt32(CaseNumber), CaseBB);
2307 Builder.SetInsertPoint(CaseBB);
2309 if (
Error Err = SectionCB(InsertPointTy(), {CaseEndBr->
getParent(),
2321 Value *LB = ConstantInt::get(I32Ty, 0);
2322 Value *UB = ConstantInt::get(I32Ty, SectionCBs.size());
2323 Value *
ST = ConstantInt::get(I32Ty, 1);
2325 Loc, LoopBodyGenCB, LB, UB, ST,
true,
false, AllocaIP,
"section_loop");
2329 InsertPointOrErrorTy WsloopIP =
2330 applyStaticWorkshareLoop(
Loc.DL, *
LoopInfo, AllocaIP,
2331 WorksharingLoopType::ForStaticLoop, !IsNowait);
2333 return WsloopIP.takeError();
2334 InsertPointTy AfterIP = *WsloopIP;
2337 assert(LoopFini &&
"Bad structure of static workshare loop finalization");
2340 auto FiniInfo = FinalizationStack.pop_back_val();
2341 assert(FiniInfo.DK == OMPD_sections &&
2342 "Unexpected finalization stack state!");
2343 if (FinalizeCallbackTy &CB = FiniInfo.FiniCB) {
2344 Builder.restoreIP(AfterIP);
2346 splitBBWithSuffix(Builder,
true,
"sections.fini");
2347 if (
Error Err = CB(Builder.saveIP()))
2349 AfterIP = {FiniBB, FiniBB->
begin()};
2353 for (
BranchInst *DummyBranch : CancellationBranches) {
2361OpenMPIRBuilder::InsertPointOrErrorTy
2362OpenMPIRBuilder::createSection(
const LocationDescription &
Loc,
2363 BodyGenCallbackTy BodyGenCB,
2364 FinalizeCallbackTy FiniCB) {
2365 if (!updateToLocation(
Loc))
2368 auto FiniCBWrapper = [&](InsertPointTy IP) {
2379 Builder.restoreIP(IP);
2380 auto *CaseBB =
Loc.IP.getBlock();
2384 IP = InsertPointTy(
I->getParent(),
I->getIterator());
2388 Directive OMPD = Directive::OMPD_sections;
2391 return EmitOMPInlinedRegion(OMPD,
nullptr,
nullptr, BodyGenCB, FiniCBWrapper,
2399 return OpenMPIRBuilder::InsertPointTy(
I->getParent(),
IT);
2402Value *OpenMPIRBuilder::getGPUThreadID() {
2403 return createRuntimeFunctionCall(
2404 getOrCreateRuntimeFunction(M,
2405 OMPRTL___kmpc_get_hardware_thread_id_in_block),
2409Value *OpenMPIRBuilder::getGPUWarpSize() {
2410 return createRuntimeFunctionCall(
2411 getOrCreateRuntimeFunction(M, OMPRTL___kmpc_get_warp_size), {});
2414Value *OpenMPIRBuilder::getNVPTXWarpID() {
2415 unsigned LaneIDBits =
Log2_32(Config.getGridValue().GV_Warp_Size);
2416 return Builder.CreateAShr(getGPUThreadID(), LaneIDBits,
"nvptx_warp_id");
2419Value *OpenMPIRBuilder::getNVPTXLaneID() {
2420 unsigned LaneIDBits =
Log2_32(Config.getGridValue().GV_Warp_Size);
2421 assert(LaneIDBits < 32 &&
"Invalid LaneIDBits size in NVPTX device.");
2422 unsigned LaneIDMask = ~0
u >> (32u - LaneIDBits);
2423 return Builder.CreateAnd(getGPUThreadID(), Builder.getInt32(LaneIDMask),
2427Value *OpenMPIRBuilder::castValueToType(InsertPointTy AllocaIP,
Value *From,
2430 uint64_t FromSize = M.getDataLayout().getTypeStoreSize(FromType);
2431 uint64_t ToSize = M.getDataLayout().getTypeStoreSize(ToType);
2432 assert(FromSize > 0 &&
"From size must be greater than zero");
2433 assert(ToSize > 0 &&
"To size must be greater than zero");
2434 if (FromType == ToType)
2436 if (FromSize == ToSize)
2437 return Builder.CreateBitCast(From, ToType);
2439 return Builder.CreateIntCast(From, ToType,
true);
2440 InsertPointTy SaveIP = Builder.saveIP();
2441 Builder.restoreIP(AllocaIP);
2442 Value *CastItem = Builder.CreateAlloca(ToType);
2443 Builder.restoreIP(SaveIP);
2445 Value *ValCastItem = Builder.CreatePointerBitCastOrAddrSpaceCast(
2446 CastItem, Builder.getPtrTy(0));
2447 Builder.CreateStore(From, ValCastItem);
2448 return Builder.CreateLoad(ToType, CastItem);
2451Value *OpenMPIRBuilder::createRuntimeShuffleFunction(InsertPointTy AllocaIP,
2455 uint64_t Size = M.getDataLayout().getTypeStoreSize(ElementType);
2456 assert(
Size <= 8 &&
"Unsupported bitwidth in shuffle instruction");
2460 Value *ElemCast = castValueToType(AllocaIP, Element, CastTy);
2462 Builder.CreateIntCast(getGPUWarpSize(), Builder.getInt16Ty(),
true);
2463 Function *ShuffleFunc = getOrCreateRuntimeFunctionPtr(
2464 Size <= 4 ? RuntimeFunction::OMPRTL___kmpc_shuffle_int32
2465 : RuntimeFunction::OMPRTL___kmpc_shuffle_int64);
2466 Value *WarpSizeCast =
2467 Builder.CreateIntCast(WarpSize, Builder.getInt16Ty(),
true);
2468 Value *ShuffleCall =
2469 createRuntimeFunctionCall(ShuffleFunc, {ElemCast,
Offset, WarpSizeCast});
2470 return castValueToType(AllocaIP, ShuffleCall, CastTy);
2473void OpenMPIRBuilder::shuffleAndStore(InsertPointTy AllocaIP,
Value *SrcAddr,
2477 uint64_t Size = M.getDataLayout().getTypeStoreSize(ElemType);
2488 Type *IndexTy = Builder.getIndexTy(
2489 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
2490 Value *ElemPtr = DstAddr;
2491 Value *Ptr = SrcAddr;
2492 for (
unsigned IntSize = 8; IntSize >= 1; IntSize /= 2) {
2496 Ptr = Builder.CreatePointerBitCastOrAddrSpaceCast(
2497 Ptr, Builder.getPtrTy(0), Ptr->
getName() +
".ascast");
2499 Builder.CreateGEP(ElemType, SrcAddr, {ConstantInt::get(IndexTy, 1)});
2500 ElemPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
2501 ElemPtr, Builder.getPtrTy(0), ElemPtr->
getName() +
".ascast");
2504 if ((
Size / IntSize) > 1) {
2505 Value *PtrEnd = Builder.CreatePointerBitCastOrAddrSpaceCast(
2506 SrcAddrGEP, Builder.getPtrTy());
2511 BasicBlock *CurrentBB = Builder.GetInsertBlock();
2512 emitBlock(PreCondBB, CurFunc);
2514 Builder.CreatePHI(Ptr->
getType(), 2);
2517 Builder.CreatePHI(ElemPtr->
getType(), 2);
2521 Value *PtrDiff = Builder.CreatePtrDiff(
2522 Builder.getInt8Ty(), PtrEnd,
2523 Builder.CreatePointerBitCastOrAddrSpaceCast(Ptr, Builder.getPtrTy()));
2524 Builder.CreateCondBr(
2525 Builder.CreateICmpSGT(PtrDiff, Builder.getInt64(IntSize - 1)), ThenBB,
2527 emitBlock(ThenBB, CurFunc);
2528 Value *Res = createRuntimeShuffleFunction(
2530 Builder.CreateAlignedLoad(
2531 IntType, Ptr, M.getDataLayout().getPrefTypeAlign(ElemType)),
2533 Builder.CreateAlignedStore(Res, ElemPtr,
2534 M.getDataLayout().getPrefTypeAlign(ElemType));
2536 Builder.CreateGEP(IntType, Ptr, {ConstantInt::get(IndexTy, 1)});
2537 Value *LocalElemPtr =
2538 Builder.CreateGEP(IntType, ElemPtr, {ConstantInt::get(IndexTy, 1)});
2541 emitBranch(PreCondBB);
2542 emitBlock(ExitBB, CurFunc);
2544 Value *Res = createRuntimeShuffleFunction(
2545 AllocaIP, Builder.CreateLoad(IntType, Ptr), IntType,
Offset);
2548 Res = Builder.CreateTrunc(Res, ElemType);
2549 Builder.CreateStore(Res, ElemPtr);
2550 Ptr = Builder.CreateGEP(IntType, Ptr, {ConstantInt::get(IndexTy, 1)});
2552 Builder.CreateGEP(IntType, ElemPtr, {ConstantInt::get(IndexTy, 1)});
2558Error OpenMPIRBuilder::emitReductionListCopy(
2559 InsertPointTy AllocaIP, CopyAction Action,
Type *ReductionArrayTy,
2562 Type *IndexTy = Builder.getIndexTy(
2563 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
2564 Value *RemoteLaneOffset = CopyOptions.RemoteLaneOffset;
2568 for (
auto En :
enumerate(ReductionInfos)) {
2569 const ReductionInfo &RI = En.value();
2570 Value *SrcElementAddr =
nullptr;
2572 Value *DestElementAddr =
nullptr;
2573 Value *DestElementPtrAddr =
nullptr;
2575 bool ShuffleInElement =
false;
2578 bool UpdateDestListPtr =
false;
2581 Value *SrcElementPtrAddr = Builder.CreateInBoundsGEP(
2582 ReductionArrayTy, SrcBase,
2583 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
2584 SrcElementAddr = Builder.CreateLoad(Builder.getPtrTy(), SrcElementPtrAddr);
2588 DestElementPtrAddr = Builder.CreateInBoundsGEP(
2589 ReductionArrayTy, DestBase,
2590 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
2591 bool IsByRefElem = (!IsByRef.
empty() && IsByRef[En.index()]);
2593 case CopyAction::RemoteLaneToThread: {
2594 InsertPointTy CurIP = Builder.saveIP();
2595 Builder.restoreIP(AllocaIP);
2597 Type *DestAllocaType =
2598 IsByRefElem ? RI.ByRefAllocatedType : RI.ElementType;
2599 DestAlloca = Builder.CreateAlloca(DestAllocaType,
nullptr,
2600 ".omp.reduction.element");
2602 M.getDataLayout().getPrefTypeAlign(DestAllocaType));
2603 DestElementAddr = DestAlloca;
2605 Builder.CreateAddrSpaceCast(DestElementAddr, Builder.getPtrTy(),
2606 DestElementAddr->
getName() +
".ascast");
2607 Builder.restoreIP(CurIP);
2608 ShuffleInElement =
true;
2609 UpdateDestListPtr =
true;
2612 case CopyAction::ThreadCopy: {
2614 Builder.CreateLoad(Builder.getPtrTy(), DestElementPtrAddr);
2621 if (ShuffleInElement) {
2622 Type *ShuffleType = RI.ElementType;
2623 Value *ShuffleSrcAddr = SrcElementAddr;
2624 Value *ShuffleDestAddr = DestElementAddr;
2628 assert(RI.ByRefElementType &&
"Expected by-ref element type to be set");
2629 assert(RI.ByRefAllocatedType &&
2630 "Expected by-ref allocated type to be set");
2635 ShuffleType = RI.ByRefElementType;
2637 InsertPointOrErrorTy GenResult =
2638 RI.DataPtrPtrGen(Builder.saveIP(), ShuffleSrcAddr, ShuffleSrcAddr);
2641 return GenResult.takeError();
2643 ShuffleSrcAddr = Builder.CreateLoad(Builder.getPtrTy(), ShuffleSrcAddr);
2646 InsertPointTy OldIP = Builder.saveIP();
2647 Builder.restoreIP(AllocaIP);
2649 LocalStorage = Builder.CreateAlloca(ShuffleType);
2650 Builder.restoreIP(OldIP);
2651 ShuffleDestAddr = LocalStorage;
2655 shuffleAndStore(AllocaIP, ShuffleSrcAddr, ShuffleDestAddr, ShuffleType,
2656 RemoteLaneOffset, ReductionArrayTy, IsByRefElem);
2660 InsertPointOrErrorTy GenResult =
2661 RI.DataPtrPtrGen(Builder.saveIP(),
2662 Builder.CreatePointerBitCastOrAddrSpaceCast(
2663 DestAlloca, Builder.getPtrTy(),
".ascast"),
2667 return GenResult.takeError();
2669 Builder.CreateStore(Builder.CreatePointerBitCastOrAddrSpaceCast(
2670 LocalStorage, Builder.getPtrTy(),
".ascast"),
2674 switch (RI.EvaluationKind) {
2675 case EvalKind::Scalar: {
2676 Value *Elem = Builder.CreateLoad(RI.ElementType, SrcElementAddr);
2678 Builder.CreateStore(Elem, DestElementAddr);
2681 case EvalKind::Complex: {
2682 Value *SrcRealPtr = Builder.CreateConstInBoundsGEP2_32(
2683 RI.ElementType, SrcElementAddr, 0, 0,
".realp");
2684 Value *SrcReal = Builder.CreateLoad(
2685 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
2686 Value *SrcImgPtr = Builder.CreateConstInBoundsGEP2_32(
2687 RI.ElementType, SrcElementAddr, 0, 1,
".imagp");
2688 Value *SrcImg = Builder.CreateLoad(
2689 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
2691 Value *DestRealPtr = Builder.CreateConstInBoundsGEP2_32(
2692 RI.ElementType, DestElementAddr, 0, 0,
".realp");
2693 Value *DestImgPtr = Builder.CreateConstInBoundsGEP2_32(
2694 RI.ElementType, DestElementAddr, 0, 1,
".imagp");
2695 Builder.CreateStore(SrcReal, DestRealPtr);
2696 Builder.CreateStore(SrcImg, DestImgPtr);
2699 case EvalKind::Aggregate: {
2700 Value *SizeVal = Builder.getInt64(
2701 M.getDataLayout().getTypeStoreSize(RI.ElementType));
2702 Builder.CreateMemCpy(
2703 DestElementAddr, M.getDataLayout().getPrefTypeAlign(RI.ElementType),
2704 SrcElementAddr, M.getDataLayout().getPrefTypeAlign(RI.ElementType),
2716 if (UpdateDestListPtr) {
2717 Value *CastDestAddr = Builder.CreatePointerBitCastOrAddrSpaceCast(
2718 DestElementAddr, Builder.getPtrTy(),
2719 DestElementAddr->
getName() +
".ascast");
2720 Builder.CreateStore(CastDestAddr, DestElementPtrAddr);
2730 InsertPointTy SavedIP = Builder.saveIP();
2733 Builder.getVoidTy(), {Builder.getPtrTy(), Builder.getInt32Ty()},
2737 "_omp_reduction_inter_warp_copy_func", &M);
2742 Builder.SetInsertPoint(EntryBB);
2760 "__openmp_nvptx_data_transfer_temporary_storage";
2761 GlobalVariable *TransferMedium = M.getGlobalVariable(TransferMediumName);
2762 unsigned WarpSize = Config.getGridValue().GV_Warp_Size;
2764 if (!TransferMedium) {
2773 Value *GPUThreadID = getGPUThreadID();
2775 Value *LaneID = getNVPTXLaneID();
2777 Value *WarpID = getNVPTXWarpID();
2779 InsertPointTy AllocaIP =
2780 InsertPointTy(Builder.GetInsertBlock(),
2781 Builder.GetInsertBlock()->getFirstInsertionPt());
2784 Builder.restoreIP(AllocaIP);
2785 AllocaInst *ReduceListAlloca = Builder.CreateAlloca(
2786 Arg0Type,
nullptr, ReduceListArg->
getName() +
".addr");
2788 Builder.CreateAlloca(Arg1Type,
nullptr, NumWarpsArg->
getName() +
".addr");
2789 Value *ReduceListAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
2790 ReduceListAlloca, Arg0Type, ReduceListAlloca->
getName() +
".ascast");
2791 Value *NumWarpsAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
2792 NumWarpsAlloca, Builder.getPtrTy(0),
2793 NumWarpsAlloca->
getName() +
".ascast");
2794 Builder.CreateStore(ReduceListArg, ReduceListAddrCast);
2795 Builder.CreateStore(NumWarpsArg, NumWarpsAddrCast);
2797 InsertPointTy CodeGenIP =
2799 Builder.restoreIP(CodeGenIP);
2802 Builder.CreateLoad(Builder.getPtrTy(), ReduceListAddrCast);
2804 for (
auto En :
enumerate(ReductionInfos)) {
2809 const ReductionInfo &RI = En.value();
2810 bool IsByRefElem = !IsByRef.
empty() && IsByRef[En.index()];
2811 unsigned RealTySize = M.getDataLayout().getTypeAllocSize(
2812 IsByRefElem ? RI.ByRefElementType : RI.ElementType);
2813 for (
unsigned TySize = 4; TySize > 0 && RealTySize > 0; TySize /= 2) {
2816 unsigned NumIters = RealTySize / TySize;
2819 Value *Cnt =
nullptr;
2820 Value *CntAddr =
nullptr;
2824 CodeGenIP = Builder.saveIP();
2825 Builder.restoreIP(AllocaIP);
2827 Builder.CreateAlloca(Builder.getInt32Ty(),
nullptr,
".cnt.addr");
2829 CntAddr = Builder.CreateAddrSpaceCast(CntAddr, Builder.getPtrTy(),
2830 CntAddr->
getName() +
".ascast");
2831 Builder.restoreIP(CodeGenIP);
2838 emitBlock(PrecondBB, Builder.GetInsertBlock()->getParent());
2839 Cnt = Builder.CreateLoad(Builder.getInt32Ty(), CntAddr,
2841 Value *
Cmp = Builder.CreateICmpULT(
2842 Cnt, ConstantInt::get(Builder.getInt32Ty(), NumIters));
2843 Builder.CreateCondBr(Cmp, BodyBB, ExitBB);
2844 emitBlock(BodyBB, Builder.GetInsertBlock()->getParent());
2848 InsertPointOrErrorTy BarrierIP1 =
2849 createBarrier(LocationDescription(Builder.saveIP(),
Loc.DL),
2850 omp::Directive::OMPD_unknown,
2854 return BarrierIP1.takeError();
2860 Value *IsWarpMaster = Builder.CreateIsNull(LaneID,
"warp_master");
2861 Builder.CreateCondBr(IsWarpMaster, ThenBB, ElseBB);
2862 emitBlock(ThenBB, Builder.GetInsertBlock()->getParent());
2865 auto *RedListArrayTy =
2867 Type *IndexTy = Builder.getIndexTy(
2868 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
2870 Builder.CreateInBoundsGEP(RedListArrayTy, ReduceList,
2871 {ConstantInt::get(IndexTy, 0),
2872 ConstantInt::get(IndexTy, En.index())});
2874 Value *ElemPtr = Builder.CreateLoad(Builder.getPtrTy(), ElemPtrPtr);
2877 InsertPointOrErrorTy GenRes =
2878 RI.DataPtrPtrGen(Builder.saveIP(), ElemPtr, ElemPtr);
2881 return GenRes.takeError();
2883 ElemPtr = Builder.CreateLoad(Builder.getPtrTy(), ElemPtr);
2887 ElemPtr = Builder.CreateGEP(Builder.getInt32Ty(), ElemPtr, Cnt);
2891 Value *MediumPtr = Builder.CreateInBoundsGEP(
2892 ArrayTy, TransferMedium, {Builder.getInt64(0), WarpID});
2895 Value *Elem = Builder.CreateLoad(CType, ElemPtr);
2897 Builder.CreateStore(Elem, MediumPtr,
2899 Builder.CreateBr(MergeBB);
2902 emitBlock(ElseBB, Builder.GetInsertBlock()->getParent());
2903 Builder.CreateBr(MergeBB);
2906 emitBlock(MergeBB, Builder.GetInsertBlock()->getParent());
2907 InsertPointOrErrorTy BarrierIP2 =
2908 createBarrier(LocationDescription(Builder.saveIP(),
Loc.DL),
2909 omp::Directive::OMPD_unknown,
2913 return BarrierIP2.takeError();
2920 Value *NumWarpsVal =
2921 Builder.CreateLoad(Builder.getInt32Ty(), NumWarpsAddrCast);
2923 Value *IsActiveThread =
2924 Builder.CreateICmpULT(GPUThreadID, NumWarpsVal,
"is_active_thread");
2925 Builder.CreateCondBr(IsActiveThread, W0ThenBB, W0ElseBB);
2927 emitBlock(W0ThenBB, Builder.GetInsertBlock()->getParent());
2931 Value *SrcMediumPtrVal = Builder.CreateInBoundsGEP(
2932 ArrayTy, TransferMedium, {Builder.getInt64(0), GPUThreadID});
2934 Value *TargetElemPtrPtr =
2935 Builder.CreateInBoundsGEP(RedListArrayTy, ReduceList,
2936 {ConstantInt::get(IndexTy, 0),
2937 ConstantInt::get(IndexTy, En.index())});
2938 Value *TargetElemPtrVal =
2939 Builder.CreateLoad(Builder.getPtrTy(), TargetElemPtrPtr);
2940 Value *TargetElemPtr = TargetElemPtrVal;
2943 InsertPointOrErrorTy GenRes =
2944 RI.DataPtrPtrGen(Builder.saveIP(), TargetElemPtr, TargetElemPtr);
2947 return GenRes.takeError();
2949 TargetElemPtr = Builder.CreateLoad(Builder.getPtrTy(), TargetElemPtr);
2954 Builder.CreateGEP(Builder.getInt32Ty(), TargetElemPtr, Cnt);
2957 Value *SrcMediumValue =
2958 Builder.CreateLoad(CType, SrcMediumPtrVal,
true);
2959 Builder.CreateStore(SrcMediumValue, TargetElemPtr);
2960 Builder.CreateBr(W0MergeBB);
2962 emitBlock(W0ElseBB, Builder.GetInsertBlock()->getParent());
2963 Builder.CreateBr(W0MergeBB);
2965 emitBlock(W0MergeBB, Builder.GetInsertBlock()->getParent());
2968 Cnt = Builder.CreateNSWAdd(
2969 Cnt, ConstantInt::get(Builder.getInt32Ty(), 1));
2970 Builder.CreateStore(Cnt, CntAddr,
false);
2972 auto *CurFn = Builder.GetInsertBlock()->
getParent();
2973 emitBranch(PrecondBB);
2974 emitBlock(ExitBB, CurFn);
2976 RealTySize %= TySize;
2980 Builder.CreateRetVoid();
2981 Builder.restoreIP(SavedIP);
2992 {Builder.getPtrTy(), Builder.getInt16Ty(),
2993 Builder.getInt16Ty(), Builder.getInt16Ty()},
2997 "_omp_reduction_shuffle_and_reduce_func", &M);
3007 Builder.SetInsertPoint(EntryBB);
3018 Type *ReduceListArgType = ReduceListArg->
getType();
3020 Type *LaneIDArgPtrType = Builder.getPtrTy(0);
3021 Value *ReduceListAlloca = Builder.CreateAlloca(
3022 ReduceListArgType,
nullptr, ReduceListArg->
getName() +
".addr");
3023 Value *LaneIdAlloca = Builder.CreateAlloca(LaneIDArgType,
nullptr,
3024 LaneIDArg->
getName() +
".addr");
3025 Value *RemoteLaneOffsetAlloca = Builder.CreateAlloca(
3026 LaneIDArgType,
nullptr, RemoteLaneOffsetArg->
getName() +
".addr");
3027 Value *AlgoVerAlloca = Builder.CreateAlloca(LaneIDArgType,
nullptr,
3028 AlgoVerArg->
getName() +
".addr");
3034 Instruction *RemoteReductionListAlloca = Builder.CreateAlloca(
3035 RedListArrayTy,
nullptr,
".omp.reduction.remote_reduce_list");
3037 Value *ReduceListAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3038 ReduceListAlloca, ReduceListArgType,
3039 ReduceListAlloca->
getName() +
".ascast");
3040 Value *LaneIdAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3041 LaneIdAlloca, LaneIDArgPtrType, LaneIdAlloca->
getName() +
".ascast");
3042 Value *RemoteLaneOffsetAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3043 RemoteLaneOffsetAlloca, LaneIDArgPtrType,
3044 RemoteLaneOffsetAlloca->
getName() +
".ascast");
3045 Value *AlgoVerAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3046 AlgoVerAlloca, LaneIDArgPtrType, AlgoVerAlloca->
getName() +
".ascast");
3047 Value *RemoteListAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3048 RemoteReductionListAlloca, Builder.getPtrTy(),
3049 RemoteReductionListAlloca->
getName() +
".ascast");
3051 Builder.CreateStore(ReduceListArg, ReduceListAddrCast);
3052 Builder.CreateStore(LaneIDArg, LaneIdAddrCast);
3053 Builder.CreateStore(RemoteLaneOffsetArg, RemoteLaneOffsetAddrCast);
3054 Builder.CreateStore(AlgoVerArg, AlgoVerAddrCast);
3056 Value *ReduceList = Builder.CreateLoad(ReduceListArgType, ReduceListAddrCast);
3057 Value *LaneId = Builder.CreateLoad(LaneIDArgType, LaneIdAddrCast);
3058 Value *RemoteLaneOffset =
3059 Builder.CreateLoad(LaneIDArgType, RemoteLaneOffsetAddrCast);
3060 Value *AlgoVer = Builder.CreateLoad(LaneIDArgType, AlgoVerAddrCast);
3067 Error EmitRedLsCpRes = emitReductionListCopy(
3068 AllocaIP, CopyAction::RemoteLaneToThread, RedListArrayTy, ReductionInfos,
3069 ReduceList, RemoteListAddrCast, IsByRef,
3070 {RemoteLaneOffset,
nullptr,
nullptr});
3073 return EmitRedLsCpRes;
3096 Value *CondAlgo0 = Builder.CreateIsNull(AlgoVer);
3097 Value *Algo1 = Builder.CreateICmpEQ(AlgoVer, Builder.getInt16(1));
3098 Value *LaneComp = Builder.CreateICmpULT(LaneId, RemoteLaneOffset);
3099 Value *CondAlgo1 = Builder.CreateAnd(Algo1, LaneComp);
3100 Value *Algo2 = Builder.CreateICmpEQ(AlgoVer, Builder.getInt16(2));
3101 Value *LaneIdAnd1 = Builder.CreateAnd(LaneId, Builder.getInt16(1));
3102 Value *LaneIdComp = Builder.CreateIsNull(LaneIdAnd1);
3103 Value *Algo2AndLaneIdComp = Builder.CreateAnd(Algo2, LaneIdComp);
3104 Value *RemoteOffsetComp =
3105 Builder.CreateICmpSGT(RemoteLaneOffset, Builder.getInt16(0));
3106 Value *CondAlgo2 = Builder.CreateAnd(Algo2AndLaneIdComp, RemoteOffsetComp);
3107 Value *CA0OrCA1 = Builder.CreateOr(CondAlgo0, CondAlgo1);
3108 Value *CondReduce = Builder.CreateOr(CA0OrCA1, CondAlgo2);
3114 Builder.CreateCondBr(CondReduce, ThenBB, ElseBB);
3115 emitBlock(ThenBB, Builder.GetInsertBlock()->getParent());
3116 Value *LocalReduceListPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
3117 ReduceList, Builder.getPtrTy());
3118 Value *RemoteReduceListPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
3119 RemoteListAddrCast, Builder.getPtrTy());
3120 createRuntimeFunctionCall(ReduceFn, {LocalReduceListPtr, RemoteReduceListPtr})
3121 ->addFnAttr(Attribute::NoUnwind);
3122 Builder.CreateBr(MergeBB);
3124 emitBlock(ElseBB, Builder.GetInsertBlock()->getParent());
3125 Builder.CreateBr(MergeBB);
3127 emitBlock(MergeBB, Builder.GetInsertBlock()->getParent());
3131 Algo1 = Builder.CreateICmpEQ(AlgoVer, Builder.getInt16(1));
3132 Value *LaneIdGtOffset = Builder.CreateICmpUGE(LaneId, RemoteLaneOffset);
3133 Value *CondCopy = Builder.CreateAnd(Algo1, LaneIdGtOffset);
3138 Builder.CreateCondBr(CondCopy, CpyThenBB, CpyElseBB);
3140 emitBlock(CpyThenBB, Builder.GetInsertBlock()->getParent());
3142 EmitRedLsCpRes = emitReductionListCopy(
3143 AllocaIP, CopyAction::ThreadCopy, RedListArrayTy, ReductionInfos,
3144 RemoteListAddrCast, ReduceList, IsByRef);
3147 return EmitRedLsCpRes;
3149 Builder.CreateBr(CpyMergeBB);
3151 emitBlock(CpyElseBB, Builder.GetInsertBlock()->getParent());
3152 Builder.CreateBr(CpyMergeBB);
3154 emitBlock(CpyMergeBB, Builder.GetInsertBlock()->getParent());
3156 Builder.CreateRetVoid();
3161Function *OpenMPIRBuilder::emitListToGlobalCopyFunction(
3163 AttributeList FuncAttrs) {
3164 OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
3167 Builder.getVoidTy(),
3168 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3172 "_omp_reduction_list_to_global_copy_func", &M);
3179 Builder.SetInsertPoint(EntryBlock);
3188 Value *BufferArgAlloca = Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
3189 BufferArg->
getName() +
".addr");
3190 Value *IdxArgAlloca = Builder.CreateAlloca(Builder.getInt32Ty(),
nullptr,
3192 Value *ReduceListArgAlloca = Builder.CreateAlloca(
3193 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3194 Value *BufferArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3195 BufferArgAlloca, Builder.getPtrTy(),
3196 BufferArgAlloca->
getName() +
".ascast");
3197 Value *IdxArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3198 IdxArgAlloca, Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3199 Value *ReduceListArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3200 ReduceListArgAlloca, Builder.getPtrTy(),
3201 ReduceListArgAlloca->
getName() +
".ascast");
3203 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3204 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3205 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3207 Value *LocalReduceList =
3208 Builder.CreateLoad(Builder.getPtrTy(), ReduceListArgAddrCast);
3209 Value *BufferArgVal =
3210 Builder.CreateLoad(Builder.getPtrTy(), BufferArgAddrCast);
3211 Value *Idxs[] = {Builder.CreateLoad(Builder.getInt32Ty(), IdxArgAddrCast)};
3212 Type *IndexTy = Builder.getIndexTy(
3213 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3214 for (
auto En :
enumerate(ReductionInfos)) {
3215 const ReductionInfo &RI = En.value();
3216 auto *RedListArrayTy =
3219 Value *ElemPtrPtr = Builder.CreateInBoundsGEP(
3220 RedListArrayTy, LocalReduceList,
3221 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3223 Value *ElemPtr = Builder.CreateLoad(Builder.getPtrTy(), ElemPtrPtr);
3227 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferArgVal, Idxs);
3228 Value *GlobVal = Builder.CreateConstInBoundsGEP2_32(
3229 ReductionsBufferTy, BufferVD, 0, En.index());
3231 switch (RI.EvaluationKind) {
3232 case EvalKind::Scalar: {
3233 Value *TargetElement = Builder.CreateLoad(RI.ElementType, ElemPtr);
3234 Builder.CreateStore(TargetElement, GlobVal);
3237 case EvalKind::Complex: {
3238 Value *SrcRealPtr = Builder.CreateConstInBoundsGEP2_32(
3239 RI.ElementType, ElemPtr, 0, 0,
".realp");
3240 Value *SrcReal = Builder.CreateLoad(
3241 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
3242 Value *SrcImgPtr = Builder.CreateConstInBoundsGEP2_32(
3243 RI.ElementType, ElemPtr, 0, 1,
".imagp");
3244 Value *SrcImg = Builder.CreateLoad(
3245 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
3247 Value *DestRealPtr = Builder.CreateConstInBoundsGEP2_32(
3248 RI.ElementType, GlobVal, 0, 0,
".realp");
3249 Value *DestImgPtr = Builder.CreateConstInBoundsGEP2_32(
3250 RI.ElementType, GlobVal, 0, 1,
".imagp");
3251 Builder.CreateStore(SrcReal, DestRealPtr);
3252 Builder.CreateStore(SrcImg, DestImgPtr);
3255 case EvalKind::Aggregate: {
3257 Builder.getInt64(M.getDataLayout().getTypeStoreSize(RI.ElementType));
3258 Builder.CreateMemCpy(
3259 GlobVal, M.getDataLayout().getPrefTypeAlign(RI.ElementType), ElemPtr,
3260 M.getDataLayout().getPrefTypeAlign(RI.ElementType), SizeVal,
false);
3266 Builder.CreateRetVoid();
3267 Builder.restoreIP(OldIP);
3271Function *OpenMPIRBuilder::emitListToGlobalReduceFunction(
3273 Type *ReductionsBufferTy, AttributeList FuncAttrs) {
3274 OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
3277 Builder.getVoidTy(),
3278 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3282 "_omp_reduction_list_to_global_reduce_func", &M);
3289 Builder.SetInsertPoint(EntryBlock);
3298 Value *BufferArgAlloca = Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
3299 BufferArg->
getName() +
".addr");
3300 Value *IdxArgAlloca = Builder.CreateAlloca(Builder.getInt32Ty(),
nullptr,
3302 Value *ReduceListArgAlloca = Builder.CreateAlloca(
3303 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3304 auto *RedListArrayTy =
3309 Value *LocalReduceList =
3310 Builder.CreateAlloca(RedListArrayTy,
nullptr,
".omp.reduction.red_list");
3312 Value *BufferArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3313 BufferArgAlloca, Builder.getPtrTy(),
3314 BufferArgAlloca->
getName() +
".ascast");
3315 Value *IdxArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3316 IdxArgAlloca, Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3317 Value *ReduceListArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3318 ReduceListArgAlloca, Builder.getPtrTy(),
3319 ReduceListArgAlloca->
getName() +
".ascast");
3320 Value *LocalReduceListAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3321 LocalReduceList, Builder.getPtrTy(),
3322 LocalReduceList->
getName() +
".ascast");
3324 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3325 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3326 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3328 Value *BufferVal = Builder.CreateLoad(Builder.getPtrTy(), BufferArgAddrCast);
3329 Value *Idxs[] = {Builder.CreateLoad(Builder.getInt32Ty(), IdxArgAddrCast)};
3330 Type *IndexTy = Builder.getIndexTy(
3331 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3332 for (
auto En :
enumerate(ReductionInfos)) {
3333 Value *TargetElementPtrPtr = Builder.CreateInBoundsGEP(
3334 RedListArrayTy, LocalReduceListAddrCast,
3335 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3337 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
3339 Value *GlobValPtr = Builder.CreateConstInBoundsGEP2_32(
3340 ReductionsBufferTy, BufferVD, 0, En.index());
3341 Builder.CreateStore(GlobValPtr, TargetElementPtrPtr);
3346 Builder.CreateLoad(Builder.getPtrTy(), ReduceListArgAddrCast);
3347 createRuntimeFunctionCall(ReduceFn, {LocalReduceListAddrCast, ReduceList})
3348 ->addFnAttr(Attribute::NoUnwind);
3349 Builder.CreateRetVoid();
3350 Builder.restoreIP(OldIP);
3354Function *OpenMPIRBuilder::emitGlobalToListCopyFunction(
3356 AttributeList FuncAttrs) {
3357 OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
3360 Builder.getVoidTy(),
3361 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3365 "_omp_reduction_global_to_list_copy_func", &M);
3372 Builder.SetInsertPoint(EntryBlock);
3381 Value *BufferArgAlloca = Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
3382 BufferArg->
getName() +
".addr");
3383 Value *IdxArgAlloca = Builder.CreateAlloca(Builder.getInt32Ty(),
nullptr,
3385 Value *ReduceListArgAlloca = Builder.CreateAlloca(
3386 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3387 Value *BufferArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3388 BufferArgAlloca, Builder.getPtrTy(),
3389 BufferArgAlloca->
getName() +
".ascast");
3390 Value *IdxArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3391 IdxArgAlloca, Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3392 Value *ReduceListArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3393 ReduceListArgAlloca, Builder.getPtrTy(),
3394 ReduceListArgAlloca->
getName() +
".ascast");
3395 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3396 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3397 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3399 Value *LocalReduceList =
3400 Builder.CreateLoad(Builder.getPtrTy(), ReduceListArgAddrCast);
3401 Value *BufferVal = Builder.CreateLoad(Builder.getPtrTy(), BufferArgAddrCast);
3402 Value *Idxs[] = {Builder.CreateLoad(Builder.getInt32Ty(), IdxArgAddrCast)};
3403 Type *IndexTy = Builder.getIndexTy(
3404 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3405 for (
auto En :
enumerate(ReductionInfos)) {
3406 const OpenMPIRBuilder::ReductionInfo &RI = En.value();
3407 auto *RedListArrayTy =
3410 Value *ElemPtrPtr = Builder.CreateInBoundsGEP(
3411 RedListArrayTy, LocalReduceList,
3412 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3414 Value *ElemPtr = Builder.CreateLoad(Builder.getPtrTy(), ElemPtrPtr);
3417 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
3418 Value *GlobValPtr = Builder.CreateConstInBoundsGEP2_32(
3419 ReductionsBufferTy, BufferVD, 0, En.index());
3421 switch (RI.EvaluationKind) {
3422 case EvalKind::Scalar: {
3423 Value *TargetElement = Builder.CreateLoad(RI.ElementType, GlobValPtr);
3424 Builder.CreateStore(TargetElement, ElemPtr);
3427 case EvalKind::Complex: {
3428 Value *SrcRealPtr = Builder.CreateConstInBoundsGEP2_32(
3429 RI.ElementType, GlobValPtr, 0, 0,
".realp");
3430 Value *SrcReal = Builder.CreateLoad(
3431 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
3432 Value *SrcImgPtr = Builder.CreateConstInBoundsGEP2_32(
3433 RI.ElementType, GlobValPtr, 0, 1,
".imagp");
3434 Value *SrcImg = Builder.CreateLoad(
3435 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
3437 Value *DestRealPtr = Builder.CreateConstInBoundsGEP2_32(
3438 RI.ElementType, ElemPtr, 0, 0,
".realp");
3439 Value *DestImgPtr = Builder.CreateConstInBoundsGEP2_32(
3440 RI.ElementType, ElemPtr, 0, 1,
".imagp");
3441 Builder.CreateStore(SrcReal, DestRealPtr);
3442 Builder.CreateStore(SrcImg, DestImgPtr);
3445 case EvalKind::Aggregate: {
3447 Builder.getInt64(M.getDataLayout().getTypeStoreSize(RI.ElementType));
3448 Builder.CreateMemCpy(
3449 ElemPtr, M.getDataLayout().getPrefTypeAlign(RI.ElementType),
3450 GlobValPtr, M.getDataLayout().getPrefTypeAlign(RI.ElementType),
3457 Builder.CreateRetVoid();
3458 Builder.restoreIP(OldIP);
3462Function *OpenMPIRBuilder::emitGlobalToListReduceFunction(
3464 Type *ReductionsBufferTy, AttributeList FuncAttrs) {
3465 OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
3468 Builder.getVoidTy(),
3469 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3473 "_omp_reduction_global_to_list_reduce_func", &M);
3480 Builder.SetInsertPoint(EntryBlock);
3489 Value *BufferArgAlloca = Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
3490 BufferArg->
getName() +
".addr");
3491 Value *IdxArgAlloca = Builder.CreateAlloca(Builder.getInt32Ty(),
nullptr,
3493 Value *ReduceListArgAlloca = Builder.CreateAlloca(
3494 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3500 Value *LocalReduceList =
3501 Builder.CreateAlloca(RedListArrayTy,
nullptr,
".omp.reduction.red_list");
3503 Value *BufferArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3504 BufferArgAlloca, Builder.getPtrTy(),
3505 BufferArgAlloca->
getName() +
".ascast");
3506 Value *IdxArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3507 IdxArgAlloca, Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3508 Value *ReduceListArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3509 ReduceListArgAlloca, Builder.getPtrTy(),
3510 ReduceListArgAlloca->
getName() +
".ascast");
3511 Value *ReductionList = Builder.CreatePointerBitCastOrAddrSpaceCast(
3512 LocalReduceList, Builder.getPtrTy(),
3513 LocalReduceList->
getName() +
".ascast");
3515 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3516 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3517 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3519 Value *BufferVal = Builder.CreateLoad(Builder.getPtrTy(), BufferArgAddrCast);
3520 Value *Idxs[] = {Builder.CreateLoad(Builder.getInt32Ty(), IdxArgAddrCast)};
3521 Type *IndexTy = Builder.getIndexTy(
3522 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3523 for (
auto En :
enumerate(ReductionInfos)) {
3524 Value *TargetElementPtrPtr = Builder.CreateInBoundsGEP(
3525 RedListArrayTy, ReductionList,
3526 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3529 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
3530 Value *GlobValPtr = Builder.CreateConstInBoundsGEP2_32(
3531 ReductionsBufferTy, BufferVD, 0, En.index());
3532 Builder.CreateStore(GlobValPtr, TargetElementPtrPtr);
3537 Builder.CreateLoad(Builder.getPtrTy(), ReduceListArgAddrCast);
3538 createRuntimeFunctionCall(ReduceFn, {ReduceList, ReductionList})
3539 ->addFnAttr(Attribute::NoUnwind);
3540 Builder.CreateRetVoid();
3541 Builder.restoreIP(OldIP);
3545std::string OpenMPIRBuilder::getReductionFuncName(
StringRef Name)
const {
3546 std::string Suffix =
3547 createPlatformSpecificName({
"omp",
"reduction",
"reduction_func"});
3548 return (Name + Suffix).
str();
3554 AttributeList FuncAttrs) {
3556 {Builder.getPtrTy(), Builder.getPtrTy()},
3558 std::string
Name = getReductionFuncName(ReducerName);
3566 Builder.SetInsertPoint(EntryBB);
3570 Value *LHSArrayPtr =
nullptr;
3571 Value *RHSArrayPtr =
nullptr;
3578 Builder.CreateAlloca(Arg0Type,
nullptr, Arg0->
getName() +
".addr");
3580 Builder.CreateAlloca(Arg1Type,
nullptr, Arg1->
getName() +
".addr");
3581 Value *LHSAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3582 LHSAlloca, Arg0Type, LHSAlloca->
getName() +
".ascast");
3583 Value *RHSAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3584 RHSAlloca, Arg1Type, RHSAlloca->
getName() +
".ascast");
3585 Builder.CreateStore(Arg0, LHSAddrCast);
3586 Builder.CreateStore(Arg1, RHSAddrCast);
3587 LHSArrayPtr = Builder.CreateLoad(Arg0Type, LHSAddrCast);
3588 RHSArrayPtr = Builder.CreateLoad(Arg1Type, RHSAddrCast);
3591 Type *IndexTy = Builder.getIndexTy(
3592 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3594 for (
auto En :
enumerate(ReductionInfos)) {
3595 const ReductionInfo &RI = En.value();
3596 Value *RHSI8PtrPtr = Builder.CreateInBoundsGEP(
3597 RedArrayTy, RHSArrayPtr,
3598 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3599 Value *RHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), RHSI8PtrPtr);
3600 Value *RHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
3601 RHSI8Ptr, RI.PrivateVariable->getType(),
3602 RHSI8Ptr->
getName() +
".ascast");
3604 Value *LHSI8PtrPtr = Builder.CreateInBoundsGEP(
3605 RedArrayTy, LHSArrayPtr,
3606 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3607 Value *LHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), LHSI8PtrPtr);
3608 Value *LHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
3609 LHSI8Ptr, RI.Variable->getType(), LHSI8Ptr->
getName() +
".ascast");
3611 if (ReductionGenCBKind == ReductionGenCBKind::Clang) {
3618 if (!IsByRef.
empty() && !IsByRef[En.index()]) {
3619 LHS = Builder.CreateLoad(RI.ElementType, LHSPtr);
3620 RHS = Builder.CreateLoad(RI.ElementType, RHSPtr);
3624 InsertPointOrErrorTy AfterIP =
3625 RI.ReductionGen(Builder.saveIP(),
LHS,
RHS, Reduced);
3627 return AfterIP.takeError();
3628 if (!Builder.GetInsertBlock())
3629 return ReductionFunc;
3631 Builder.restoreIP(*AfterIP);
3633 if (!IsByRef.
empty() && !IsByRef[En.index()])
3634 Builder.CreateStore(Reduced, LHSPtr);
3638 if (ReductionGenCBKind == ReductionGenCBKind::Clang)
3639 for (
auto En :
enumerate(ReductionInfos)) {
3640 unsigned Index = En.index();
3641 const ReductionInfo &RI = En.value();
3642 Value *LHSFixupPtr, *RHSFixupPtr;
3643 Builder.restoreIP(RI.ReductionGenClang(
3644 Builder.saveIP(), Index, &LHSFixupPtr, &RHSFixupPtr, ReductionFunc));
3649 LHSPtrs[Index], [ReductionFunc](
const Use &U) {
3654 RHSPtrs[Index], [ReductionFunc](
const Use &U) {
3660 Builder.CreateRetVoid();
3661 return ReductionFunc;
3667 for (
const OpenMPIRBuilder::ReductionInfo &RI : ReductionInfos) {
3669 assert(RI.Variable &&
"expected non-null variable");
3670 assert(RI.PrivateVariable &&
"expected non-null private variable");
3671 assert((RI.ReductionGen || RI.ReductionGenClang) &&
3672 "expected non-null reduction generator callback");
3675 RI.Variable->getType() == RI.PrivateVariable->getType() &&
3676 "expected variables and their private equivalents to have the same "
3679 assert(RI.Variable->getType()->isPointerTy() &&
3680 "expected variables to be pointers");
3684OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createReductionsGPU(
3685 const LocationDescription &
Loc, InsertPointTy AllocaIP,
3688 ReductionGenCBKind ReductionGenCBKind, std::optional<omp::GV> GridValue,
3689 unsigned ReductionBufNum,
Value *SrcLocInfo) {
3690 if (!updateToLocation(
Loc))
3691 return InsertPointTy();
3692 Builder.restoreIP(CodeGenIP);
3699 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
3700 SrcLocInfo = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
3703 if (ReductionInfos.
size() == 0)
3704 return Builder.saveIP();
3707 if (ReductionGenCBKind != ReductionGenCBKind::Clang) {
3713 Builder.SetInsertPoint(InsertBlock, InsertBlock->
end());
3717 AttributeList FuncAttrs;
3718 AttrBuilder AttrBldr(Ctx);
3720 AttrBldr.addAttribute(Attr);
3721 AttrBldr.removeAttribute(Attribute::OptimizeNone);
3722 FuncAttrs = FuncAttrs.addFnAttributes(Ctx, AttrBldr);
3724 CodeGenIP = Builder.saveIP();
3726 Builder.GetInsertBlock()->getParent()->getName(), ReductionInfos, IsByRef,
3727 ReductionGenCBKind, FuncAttrs);
3728 if (!ReductionResult)
3730 Function *ReductionFunc = *ReductionResult;
3731 Builder.restoreIP(CodeGenIP);
3734 if (GridValue.has_value())
3735 Config.setGridValue(GridValue.value());
3750 Builder.getPtrTy(M.getDataLayout().getProgramAddressSpace());
3752 CodeGenIP = Builder.saveIP();
3753 Builder.restoreIP(AllocaIP);
3754 Value *ReductionListAlloca =
3755 Builder.CreateAlloca(RedArrayTy,
nullptr,
".omp.reduction.red_list");
3756 Value *ReductionList = Builder.CreatePointerBitCastOrAddrSpaceCast(
3757 ReductionListAlloca, PtrTy, ReductionListAlloca->
getName() +
".ascast");
3758 Builder.restoreIP(CodeGenIP);
3759 Type *IndexTy = Builder.getIndexTy(
3760 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3761 for (
auto En :
enumerate(ReductionInfos)) {
3762 const ReductionInfo &RI = En.value();
3763 Value *ElemPtr = Builder.CreateInBoundsGEP(
3764 RedArrayTy, ReductionList,
3765 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3767 Value *PrivateVar = RI.PrivateVariable;
3768 bool IsByRefElem = !IsByRef.
empty() && IsByRef[En.index()];
3770 PrivateVar = Builder.CreateLoad(RI.ElementType, PrivateVar);
3773 Builder.CreatePointerBitCastOrAddrSpaceCast(PrivateVar, PtrTy);
3774 Builder.CreateStore(CastElem, ElemPtr);
3776 CodeGenIP = Builder.saveIP();
3778 ReductionInfos, ReductionFunc, FuncAttrs, IsByRef);
3784 emitInterWarpCopyFunction(
Loc, ReductionInfos, FuncAttrs, IsByRef);
3788 Builder.restoreIP(CodeGenIP);
3790 Value *RL = Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList, PtrTy);
3792 unsigned MaxDataSize = 0;
3794 for (
auto En :
enumerate(ReductionInfos)) {
3795 auto Size = M.getDataLayout().getTypeStoreSize(En.value().ElementType);
3796 if (
Size > MaxDataSize)
3798 ReductionTypeArgs.
emplace_back(En.value().ElementType);
3800 Value *ReductionDataSize =
3801 Builder.getInt64(MaxDataSize * ReductionInfos.
size());
3802 if (!IsTeamsReduction) {
3803 Value *SarFuncCast =
3804 Builder.CreatePointerBitCastOrAddrSpaceCast(*SarFunc, FuncPtrTy);
3806 Builder.CreatePointerBitCastOrAddrSpaceCast(WcFunc, FuncPtrTy);
3807 Value *
Args[] = {SrcLocInfo, ReductionDataSize, RL, SarFuncCast,
3809 Function *Pv2Ptr = getOrCreateRuntimeFunctionPtr(
3810 RuntimeFunction::OMPRTL___kmpc_nvptx_parallel_reduce_nowait_v2);
3811 Res = createRuntimeFunctionCall(Pv2Ptr, Args);
3813 CodeGenIP = Builder.saveIP();
3815 Ctx, ReductionTypeArgs,
"struct._globalized_locals_ty");
3816 Function *RedFixedBuferFn = getOrCreateRuntimeFunctionPtr(
3817 RuntimeFunction::OMPRTL___kmpc_reduction_get_fixed_buffer);
3818 Function *LtGCFunc = emitListToGlobalCopyFunction(
3819 ReductionInfos, ReductionsBufferTy, FuncAttrs);
3820 Function *LtGRFunc = emitListToGlobalReduceFunction(
3821 ReductionInfos, ReductionFunc, ReductionsBufferTy, FuncAttrs);
3822 Function *GtLCFunc = emitGlobalToListCopyFunction(
3823 ReductionInfos, ReductionsBufferTy, FuncAttrs);
3824 Function *GtLRFunc = emitGlobalToListReduceFunction(
3825 ReductionInfos, ReductionFunc, ReductionsBufferTy, FuncAttrs);
3826 Builder.restoreIP(CodeGenIP);
3828 Value *KernelTeamsReductionPtr = createRuntimeFunctionCall(
3829 RedFixedBuferFn, {},
"_openmp_teams_reductions_buffer_$_$ptr");
3831 Value *Args3[] = {SrcLocInfo,
3832 KernelTeamsReductionPtr,
3833 Builder.getInt32(ReductionBufNum),
3843 Function *TeamsReduceFn = getOrCreateRuntimeFunctionPtr(
3844 RuntimeFunction::OMPRTL___kmpc_nvptx_teams_reduce_nowait_v2);
3845 Res = createRuntimeFunctionCall(TeamsReduceFn, Args3);
3851 Value *
Cond = Builder.CreateICmpEQ(Res, Builder.getInt32(1));
3852 Builder.CreateCondBr(
Cond, ThenBB, ExitBB);
3858 emitBlock(ThenBB, CurFunc);
3861 for (
auto En :
enumerate(ReductionInfos)) {
3862 const ReductionInfo &RI = En.value();
3864 Value *RedValue = RI.Variable;
3866 Builder.CreatePointerBitCastOrAddrSpaceCast(RI.PrivateVariable, PtrTy);
3868 if (ReductionGenCBKind == ReductionGenCBKind::Clang) {
3869 Value *LHSPtr, *RHSPtr;
3870 Builder.restoreIP(RI.ReductionGenClang(Builder.saveIP(), En.index(),
3871 &LHSPtr, &RHSPtr, CurFunc));
3884 if (IsByRef.
empty() || !IsByRef[En.index()]) {
3885 RedValue = Builder.CreateLoad(
ValueType, RI.Variable,
3886 "red.value." +
Twine(En.index()));
3888 Value *PrivateRedValue = Builder.CreateLoad(
3891 InsertPointOrErrorTy AfterIP =
3892 RI.ReductionGen(Builder.saveIP(), RedValue, PrivateRedValue, Reduced);
3894 return AfterIP.takeError();
3895 Builder.restoreIP(*AfterIP);
3897 if (!IsByRef.
empty() && !IsByRef[En.index()])
3898 Builder.CreateStore(Reduced, RI.Variable);
3901 emitBlock(ExitBB, CurFunc);
3902 if (ContinuationBlock) {
3903 Builder.CreateBr(ContinuationBlock);
3904 Builder.SetInsertPoint(ContinuationBlock);
3906 Config.setEmitLLVMUsed();
3908 return Builder.saveIP();
3917 ".omp.reduction.func", &M);
3927 Builder.SetInsertPoint(ReductionFuncBlock);
3928 Value *LHSArrayPtr =
nullptr;
3929 Value *RHSArrayPtr =
nullptr;
3940 Builder.CreateAlloca(Arg0Type,
nullptr, Arg0->
getName() +
".addr");
3942 Builder.CreateAlloca(Arg1Type,
nullptr, Arg1->
getName() +
".addr");
3943 Value *LHSAddrCast =
3944 Builder.CreatePointerBitCastOrAddrSpaceCast(LHSAlloca, Arg0Type);
3945 Value *RHSAddrCast =
3946 Builder.CreatePointerBitCastOrAddrSpaceCast(RHSAlloca, Arg1Type);
3947 Builder.CreateStore(Arg0, LHSAddrCast);
3948 Builder.CreateStore(Arg1, RHSAddrCast);
3949 LHSArrayPtr = Builder.CreateLoad(Arg0Type, LHSAddrCast);
3950 RHSArrayPtr = Builder.CreateLoad(Arg1Type, RHSAddrCast);
3952 LHSArrayPtr = ReductionFunc->
getArg(0);
3953 RHSArrayPtr = ReductionFunc->
getArg(1);
3956 unsigned NumReductions = ReductionInfos.
size();
3959 for (
auto En :
enumerate(ReductionInfos)) {
3960 const OpenMPIRBuilder::ReductionInfo &RI = En.value();
3961 Value *LHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
3962 RedArrayTy, LHSArrayPtr, 0, En.index());
3963 Value *LHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), LHSI8PtrPtr);
3964 Value *LHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
3965 LHSI8Ptr, RI.Variable->
getType());
3966 Value *
LHS = Builder.CreateLoad(RI.ElementType, LHSPtr);
3967 Value *RHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
3968 RedArrayTy, RHSArrayPtr, 0, En.index());
3969 Value *RHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), RHSI8PtrPtr);
3970 Value *RHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
3971 RHSI8Ptr, RI.PrivateVariable->
getType());
3972 Value *
RHS = Builder.CreateLoad(RI.ElementType, RHSPtr);
3974 OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
3975 RI.ReductionGen(Builder.saveIP(),
LHS,
RHS, Reduced);
3977 return AfterIP.takeError();
3979 Builder.restoreIP(*AfterIP);
3981 if (!Builder.GetInsertBlock())
3985 if (!IsByRef[En.index()])
3986 Builder.CreateStore(Reduced, LHSPtr);
3988 Builder.CreateRetVoid();
3992OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createReductions(
3993 const LocationDescription &
Loc, InsertPointTy AllocaIP,
3995 bool IsNoWait,
bool IsTeamsReduction) {
3998 return createReductionsGPU(
Loc, AllocaIP, Builder.saveIP(), ReductionInfos,
3999 IsByRef, IsNoWait, IsTeamsReduction);
4003 if (!updateToLocation(
Loc))
4004 return InsertPointTy();
4006 if (ReductionInfos.
size() == 0)
4007 return Builder.saveIP();
4016 unsigned NumReductions = ReductionInfos.
size();
4018 Builder.SetInsertPoint(AllocaIP.getBlock()->getTerminator());
4019 Value *RedArray = Builder.CreateAlloca(RedArrayTy,
nullptr,
"red.array");
4021 Builder.SetInsertPoint(InsertBlock, InsertBlock->
end());
4023 for (
auto En :
enumerate(ReductionInfos)) {
4024 unsigned Index = En.index();
4025 const ReductionInfo &RI = En.value();
4026 Value *RedArrayElemPtr = Builder.CreateConstInBoundsGEP2_64(
4027 RedArrayTy, RedArray, 0, Index,
"red.array.elem." +
Twine(Index));
4028 Builder.CreateStore(RI.PrivateVariable, RedArrayElemPtr);
4033 Type *IndexTy = Builder.getIndexTy(
4034 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
4035 Function *
Func = Builder.GetInsertBlock()->getParent();
4038 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
4039 bool CanGenerateAtomic =
all_of(ReductionInfos, [](
const ReductionInfo &RI) {
4040 return RI.AtomicReductionGen;
4042 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize,
4044 ? IdentFlag::OMP_IDENT_FLAG_ATOMIC_REDUCE
4046 Value *ThreadId = getOrCreateThreadID(Ident);
4047 Constant *NumVariables = Builder.getInt32(NumReductions);
4049 unsigned RedArrayByteSize =
DL.getTypeStoreSize(RedArrayTy);
4050 Constant *RedArraySize = ConstantInt::get(IndexTy, RedArrayByteSize);
4052 Value *Lock = getOMPCriticalRegionLock(
".reduction");
4053 Function *ReduceFunc = getOrCreateRuntimeFunctionPtr(
4054 IsNoWait ? RuntimeFunction::OMPRTL___kmpc_reduce_nowait
4055 : RuntimeFunction::OMPRTL___kmpc_reduce);
4057 createRuntimeFunctionCall(ReduceFunc,
4058 {Ident, ThreadId, NumVariables, RedArraySize,
4059 RedArray, ReductionFunc, Lock},
4070 Builder.CreateSwitch(ReduceCall, ContinuationBlock, 2);
4071 Switch->addCase(Builder.getInt32(1), NonAtomicRedBlock);
4072 Switch->addCase(Builder.getInt32(2), AtomicRedBlock);
4077 Builder.SetInsertPoint(NonAtomicRedBlock);
4078 for (
auto En :
enumerate(ReductionInfos)) {
4079 const ReductionInfo &RI = En.value();
4083 Value *RedValue = RI.Variable;
4084 if (!IsByRef[En.index()]) {
4085 RedValue = Builder.CreateLoad(
ValueType, RI.Variable,
4086 "red.value." +
Twine(En.index()));
4088 Value *PrivateRedValue =
4089 Builder.CreateLoad(
ValueType, RI.PrivateVariable,
4090 "red.private.value." +
Twine(En.index()));
4092 InsertPointOrErrorTy AfterIP =
4093 RI.ReductionGen(Builder.saveIP(), RedValue, PrivateRedValue, Reduced);
4095 return AfterIP.takeError();
4096 Builder.restoreIP(*AfterIP);
4098 if (!Builder.GetInsertBlock())
4099 return InsertPointTy();
4101 if (!IsByRef[En.index()])
4102 Builder.CreateStore(Reduced, RI.Variable);
4104 Function *EndReduceFunc = getOrCreateRuntimeFunctionPtr(
4105 IsNoWait ? RuntimeFunction::OMPRTL___kmpc_end_reduce_nowait
4106 : RuntimeFunction::OMPRTL___kmpc_end_reduce);
4107 createRuntimeFunctionCall(EndReduceFunc, {Ident, ThreadId, Lock});
4108 Builder.CreateBr(ContinuationBlock);
4113 Builder.SetInsertPoint(AtomicRedBlock);
4114 if (CanGenerateAtomic &&
llvm::none_of(IsByRef, [](
bool P) {
return P; })) {
4115 for (
const ReductionInfo &RI : ReductionInfos) {
4116 InsertPointOrErrorTy AfterIP = RI.AtomicReductionGen(
4117 Builder.saveIP(), RI.ElementType, RI.Variable, RI.PrivateVariable);
4119 return AfterIP.takeError();
4120 Builder.restoreIP(*AfterIP);
4121 if (!Builder.GetInsertBlock())
4122 return InsertPointTy();
4124 Builder.CreateBr(ContinuationBlock);
4126 Builder.CreateUnreachable();
4137 if (!Builder.GetInsertBlock())
4138 return InsertPointTy();
4140 Builder.SetInsertPoint(ContinuationBlock);
4141 return Builder.saveIP();
4144OpenMPIRBuilder::InsertPointOrErrorTy
4145OpenMPIRBuilder::createMaster(
const LocationDescription &
Loc,
4146 BodyGenCallbackTy BodyGenCB,
4147 FinalizeCallbackTy FiniCB) {
4148 if (!updateToLocation(
Loc))
4151 Directive OMPD = Directive::OMPD_master;
4153 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
4154 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
4155 Value *ThreadId = getOrCreateThreadID(Ident);
4158 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_master);
4159 Instruction *EntryCall = createRuntimeFunctionCall(EntryRTLFn, Args);
4161 Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_master);
4162 Instruction *ExitCall = createRuntimeFunctionCall(ExitRTLFn, Args);
4164 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
4168OpenMPIRBuilder::InsertPointOrErrorTy
4169OpenMPIRBuilder::createMasked(
const LocationDescription &
Loc,
4170 BodyGenCallbackTy BodyGenCB,
4172 if (!updateToLocation(
Loc))
4175 Directive OMPD = Directive::OMPD_masked;
4177 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
4178 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
4179 Value *ThreadId = getOrCreateThreadID(Ident);
4181 Value *ArgsEnd[] = {Ident, ThreadId};
4183 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_masked);
4184 Instruction *EntryCall = createRuntimeFunctionCall(EntryRTLFn, Args);
4186 Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_masked);
4187 Instruction *ExitCall = createRuntimeFunctionCall(ExitRTLFn, ArgsEnd);
4189 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
4199 Call->setDoesNotThrow();
4211OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createScan(
4212 const LocationDescription &
Loc, InsertPointTy AllocaIP,
4214 bool IsInclusive, ScanInfo *ScanRedInfo) {
4215 if (ScanRedInfo->OMPFirstScanLoop) {
4216 llvm::Error Err = emitScanBasedDirectiveDeclsIR(AllocaIP, ScanVars,
4217 ScanVarsType, ScanRedInfo);
4221 if (!updateToLocation(
Loc))
4226 if (ScanRedInfo->OMPFirstScanLoop) {
4228 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4229 Value *BuffPtr = (*(ScanRedInfo->ScanBuffPtrs))[ScanVars[i]];
4230 Value *Buff = Builder.CreateLoad(Builder.getPtrTy(), BuffPtr);
4231 Type *DestTy = ScanVarsType[i];
4232 Value *Val = Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
4233 Value *Src = Builder.CreateLoad(DestTy, ScanVars[i]);
4235 Builder.CreateStore(Src, Val);
4238 Builder.CreateBr(ScanRedInfo->OMPScanLoopExit);
4239 emitBlock(ScanRedInfo->OMPScanDispatch,
4240 Builder.GetInsertBlock()->getParent());
4242 if (!ScanRedInfo->OMPFirstScanLoop) {
4243 IV = ScanRedInfo->IV;
4246 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4247 Value *BuffPtr = (*(ScanRedInfo->ScanBuffPtrs))[ScanVars[i]];
4248 Value *Buff = Builder.CreateLoad(Builder.getPtrTy(), BuffPtr);
4249 Type *DestTy = ScanVarsType[i];
4251 Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
4252 Value *Src = Builder.CreateLoad(DestTy, SrcPtr);
4253 Builder.CreateStore(Src, ScanVars[i]);
4259 if (ScanRedInfo->OMPFirstScanLoop == IsInclusive) {
4260 Builder.CreateCondBr(CmpI, ScanRedInfo->OMPBeforeScanBlock,
4261 ScanRedInfo->OMPAfterScanBlock);
4263 Builder.CreateCondBr(CmpI, ScanRedInfo->OMPAfterScanBlock,
4264 ScanRedInfo->OMPBeforeScanBlock);
4266 emitBlock(ScanRedInfo->OMPAfterScanBlock,
4267 Builder.GetInsertBlock()->getParent());
4268 Builder.SetInsertPoint(ScanRedInfo->OMPAfterScanBlock);
4269 return Builder.saveIP();
4272Error OpenMPIRBuilder::emitScanBasedDirectiveDeclsIR(
4276 Builder.restoreIP(AllocaIP);
4278 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4280 Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
"vla");
4281 (*(ScanRedInfo->ScanBuffPtrs))[ScanVars[i]] = BuffPtr;
4285 auto BodyGenCB = [&](InsertPointTy AllocaIP,
4286 InsertPointTy CodeGenIP) ->
Error {
4287 Builder.restoreIP(CodeGenIP);
4289 Builder.CreateAdd(ScanRedInfo->Span, Builder.getInt32(1));
4290 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4294 Value *Buff = Builder.CreateMalloc(IntPtrTy, ScanVarsType[i], Allocsize,
4295 AllocSpan,
nullptr,
"arr");
4296 Builder.CreateStore(Buff, (*(ScanRedInfo->ScanBuffPtrs))[ScanVars[i]]);
4304 Builder.SetInsertPoint(ScanRedInfo->OMPScanInit->getTerminator());
4306 llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
4307 createMasked(Builder.saveIP(), BodyGenCB, FiniCB, FilterVal);
4310 return AfterIP.takeError();
4311 Builder.restoreIP(*AfterIP);
4312 BasicBlock *InputBB = Builder.GetInsertBlock();
4314 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
4315 AfterIP = createBarrier(Builder.saveIP(), llvm::omp::OMPD_barrier);
4317 return AfterIP.takeError();
4318 Builder.restoreIP(*AfterIP);
4323Error OpenMPIRBuilder::emitScanBasedDirectiveFinalsIR(
4325 auto BodyGenCB = [&](InsertPointTy AllocaIP,
4326 InsertPointTy CodeGenIP) ->
Error {
4327 Builder.restoreIP(CodeGenIP);
4328 for (ReductionInfo RedInfo : ReductionInfos) {
4329 Value *PrivateVar = RedInfo.PrivateVariable;
4330 Value *OrigVar = RedInfo.Variable;
4331 Value *BuffPtr = (*(ScanRedInfo->ScanBuffPtrs))[PrivateVar];
4332 Value *Buff = Builder.CreateLoad(Builder.getPtrTy(), BuffPtr);
4334 Type *SrcTy = RedInfo.ElementType;
4335 Value *Val = Builder.CreateInBoundsGEP(SrcTy, Buff, ScanRedInfo->Span,
4337 Value *Src = Builder.CreateLoad(SrcTy, Val);
4339 Builder.CreateStore(Src, OrigVar);
4340 Builder.CreateFree(Buff);
4348 if (ScanRedInfo->OMPScanFinish->getTerminator())
4349 Builder.SetInsertPoint(ScanRedInfo->OMPScanFinish->getTerminator());
4351 Builder.SetInsertPoint(ScanRedInfo->OMPScanFinish);
4354 llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
4355 createMasked(Builder.saveIP(), BodyGenCB, FiniCB, FilterVal);
4358 return AfterIP.takeError();
4359 Builder.restoreIP(*AfterIP);
4360 BasicBlock *InputBB = Builder.GetInsertBlock();
4362 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
4363 AfterIP = createBarrier(Builder.saveIP(), llvm::omp::OMPD_barrier);
4365 return AfterIP.takeError();
4366 Builder.restoreIP(*AfterIP);
4370OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitScanReduction(
4371 const LocationDescription &
Loc,
4373 ScanInfo *ScanRedInfo) {
4375 if (!updateToLocation(
Loc))
4377 auto BodyGenCB = [&](InsertPointTy AllocaIP,
4378 InsertPointTy CodeGenIP) ->
Error {
4379 Builder.restoreIP(CodeGenIP);
4385 splitBB(Builder,
false,
"omp.outer.log.scan.exit");
4387 Builder.GetInsertBlock()->getModule(),
4391 Builder.CreateUIToFP(ScanRedInfo->Span, Builder.getDoubleTy());
4394 Builder.GetInsertBlock()->getModule(),
4397 LogVal = Builder.CreateFPToUI(LogVal, Builder.getInt32Ty());
4400 llvm::ConstantInt::get(ScanRedInfo->Span->getType(), 1));
4401 Builder.SetInsertPoint(InputBB);
4402 Builder.CreateBr(LoopBB);
4403 emitBlock(LoopBB, CurFn);
4404 Builder.SetInsertPoint(LoopBB);
4406 PHINode *Counter = Builder.CreatePHI(Builder.getInt32Ty(), 2);
4408 PHINode *Pow2K = Builder.CreatePHI(Builder.getInt32Ty(), 2);
4409 Counter->
addIncoming(llvm::ConstantInt::get(Builder.getInt32Ty(), 0),
4411 Pow2K->
addIncoming(llvm::ConstantInt::get(Builder.getInt32Ty(), 1),
4419 llvm::Value *CmpI = Builder.CreateICmpUGE(NMin1, Pow2K);
4420 Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
4421 emitBlock(InnerLoopBB, CurFn);
4422 Builder.SetInsertPoint(InnerLoopBB);
4423 PHINode *IVal = Builder.CreatePHI(Builder.getInt32Ty(), 2);
4425 for (ReductionInfo RedInfo : ReductionInfos) {
4426 Value *ReductionVal = RedInfo.PrivateVariable;
4427 Value *BuffPtr = (*(ScanRedInfo->ScanBuffPtrs))[ReductionVal];
4428 Value *Buff = Builder.CreateLoad(Builder.getPtrTy(), BuffPtr);
4429 Type *DestTy = RedInfo.ElementType;
4430 Value *
IV = Builder.CreateAdd(IVal, Builder.getInt32(1));
4432 Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
4433 Value *OffsetIval = Builder.CreateNUWSub(
IV, Pow2K);
4435 Builder.CreateInBoundsGEP(DestTy, Buff, OffsetIval,
"arrayOffset");
4436 Value *
LHS = Builder.CreateLoad(DestTy, LHSPtr);
4437 Value *
RHS = Builder.CreateLoad(DestTy, RHSPtr);
4439 InsertPointOrErrorTy AfterIP =
4440 RedInfo.ReductionGen(Builder.saveIP(),
LHS,
RHS, Result);
4442 return AfterIP.takeError();
4443 Builder.CreateStore(Result, LHSPtr);
4446 IVal, llvm::ConstantInt::get(Builder.getInt32Ty(), 1));
4447 IVal->
addIncoming(NextIVal, Builder.GetInsertBlock());
4448 CmpI = Builder.CreateICmpUGE(NextIVal, Pow2K);
4449 Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
4450 emitBlock(InnerExitBB, CurFn);
4452 Counter, llvm::ConstantInt::get(Counter->
getType(), 1));
4455 llvm::Value *NextPow2K = Builder.CreateShl(Pow2K, 1,
"",
true);
4456 Pow2K->
addIncoming(NextPow2K, Builder.GetInsertBlock());
4458 Builder.CreateCondBr(Cmp, LoopBB, ExitBB);
4468 llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
4469 createMasked(Builder.saveIP(), BodyGenCB, FiniCB, FilterVal);
4472 return AfterIP.takeError();
4473 Builder.restoreIP(*AfterIP);
4474 AfterIP = createBarrier(Builder.saveIP(), llvm::omp::OMPD_barrier);
4477 return AfterIP.takeError();
4478 Builder.restoreIP(*AfterIP);
4479 Error Err = emitScanBasedDirectiveFinalsIR(ReductionInfos, ScanRedInfo);
4486Error OpenMPIRBuilder::emitScanBasedDirectiveIR(
4489 ScanInfo *ScanRedInfo) {
4497 ScanRedInfo->OMPFirstScanLoop =
true;
4498 Error Err = InputLoopGen();
4508 ScanRedInfo->OMPFirstScanLoop =
false;
4509 Error Err = ScanLoopGen(Builder.saveIP());
4516void OpenMPIRBuilder::createScanBBs(ScanInfo *ScanRedInfo) {
4517 Function *
Fun = Builder.GetInsertBlock()->getParent();
4518 ScanRedInfo->OMPScanDispatch =
4520 ScanRedInfo->OMPAfterScanBlock =
4522 ScanRedInfo->OMPBeforeScanBlock =
4524 ScanRedInfo->OMPScanLoopExit =
4527CanonicalLoopInfo *OpenMPIRBuilder::createLoopSkeleton(
4551 Builder.SetCurrentDebugLocation(
DL);
4553 Builder.SetInsertPoint(Preheader);
4554 Builder.CreateBr(Header);
4556 Builder.SetInsertPoint(Header);
4557 PHINode *IndVarPHI = Builder.CreatePHI(IndVarTy, 2,
"omp_" + Name +
".iv");
4558 IndVarPHI->
addIncoming(ConstantInt::get(IndVarTy, 0), Preheader);
4559 Builder.CreateBr(
Cond);
4561 Builder.SetInsertPoint(
Cond);
4563 Builder.CreateICmpULT(IndVarPHI, TripCount,
"omp_" + Name +
".cmp");
4564 Builder.CreateCondBr(Cmp, Body, Exit);
4566 Builder.SetInsertPoint(Body);
4567 Builder.CreateBr(Latch);
4569 Builder.SetInsertPoint(Latch);
4570 Value *
Next = Builder.CreateAdd(IndVarPHI, ConstantInt::get(IndVarTy, 1),
4571 "omp_" + Name +
".next",
true);
4572 Builder.CreateBr(Header);
4575 Builder.SetInsertPoint(Exit);
4576 Builder.CreateBr(After);
4579 LoopInfos.emplace_front();
4580 CanonicalLoopInfo *CL = &LoopInfos.front();
4582 CL->Header = Header;
4594OpenMPIRBuilder::createCanonicalLoop(
const LocationDescription &
Loc,
4595 LoopBodyGenCallbackTy BodyGenCB,
4600 CanonicalLoopInfo *CL = createLoopSkeleton(
Loc.DL, TripCount, BB->
getParent(),
4601 NextBB, NextBB, Name);
4605 if (updateToLocation(
Loc)) {
4609 spliceBB(Builder, After,
false);
4610 Builder.CreateBr(CL->getPreheader());
4615 if (
Error Err = BodyGenCB(CL->getBodyIP(), CL->getIndVar()))
4625 ScanInfos.emplace_front();
4626 ScanInfo *
Result = &ScanInfos.front();
4631OpenMPIRBuilder::createCanonicalScanLoops(
4632 const LocationDescription &
Loc, LoopBodyGenCallbackTy BodyGenCB,
4633 Value *Start,
Value *Stop,
Value *Step,
bool IsSigned,
bool InclusiveStop,
4634 InsertPointTy ComputeIP,
const Twine &Name, ScanInfo *ScanRedInfo) {
4635 LocationDescription ComputeLoc =
4636 ComputeIP.isSet() ? LocationDescription(ComputeIP,
Loc.DL) :
Loc;
4637 updateToLocation(ComputeLoc);
4641 Value *TripCount = calculateCanonicalLoopTripCount(
4642 ComputeLoc, Start, Stop, Step, IsSigned, InclusiveStop, Name);
4643 ScanRedInfo->Span = TripCount;
4644 ScanRedInfo->OMPScanInit = splitBB(Builder,
true,
"scan.init");
4645 Builder.SetInsertPoint(ScanRedInfo->OMPScanInit);
4647 auto BodyGen = [=](InsertPointTy CodeGenIP,
Value *
IV) {
4648 Builder.restoreIP(CodeGenIP);
4649 ScanRedInfo->IV =
IV;
4650 createScanBBs(ScanRedInfo);
4651 BasicBlock *InputBlock = Builder.GetInsertBlock();
4655 Terminator->setSuccessor(0, ScanRedInfo->OMPScanDispatch);
4656 emitBlock(ScanRedInfo->OMPBeforeScanBlock,
4657 Builder.GetInsertBlock()->getParent());
4658 Builder.CreateBr(ScanRedInfo->OMPScanLoopExit);
4659 emitBlock(ScanRedInfo->OMPScanLoopExit,
4660 Builder.GetInsertBlock()->getParent());
4661 Builder.CreateBr(ContinueBlock);
4662 Builder.SetInsertPoint(
4663 ScanRedInfo->OMPBeforeScanBlock->getFirstInsertionPt());
4664 return BodyGenCB(Builder.saveIP(),
IV);
4667 const auto &&InputLoopGen = [&]() ->
Error {
4669 Builder.saveIP(), BodyGen, Start, Stop, Step, IsSigned, InclusiveStop,
4670 ComputeIP, Name,
true, ScanRedInfo);
4674 Builder.restoreIP((*LoopInfo)->getAfterIP());
4677 const auto &&ScanLoopGen = [&](LocationDescription
Loc) ->
Error {
4679 createCanonicalLoop(
Loc, BodyGen, Start, Stop, Step, IsSigned,
4680 InclusiveStop, ComputeIP, Name,
true, ScanRedInfo);
4684 Builder.restoreIP((*LoopInfo)->getAfterIP());
4685 ScanRedInfo->OMPScanFinish = Builder.GetInsertBlock();
4688 Error Err = emitScanBasedDirectiveIR(InputLoopGen, ScanLoopGen, ScanRedInfo);
4694Value *OpenMPIRBuilder::calculateCanonicalLoopTripCount(
4696 bool IsSigned,
bool InclusiveStop,
const Twine &Name) {
4706 assert(IndVarTy == Stop->
getType() &&
"Stop type mismatch");
4707 assert(IndVarTy == Step->
getType() &&
"Step type mismatch");
4709 updateToLocation(
Loc);
4726 Value *IsNeg = Builder.CreateICmpSLT(Step, Zero);
4727 Incr = Builder.CreateSelect(IsNeg, Builder.CreateNeg(Step), Step);
4728 Value *LB = Builder.CreateSelect(IsNeg, Stop, Start);
4729 Value *UB = Builder.CreateSelect(IsNeg, Start, Stop);
4730 Span = Builder.CreateSub(UB, LB,
"",
false,
true);
4731 ZeroCmp = Builder.CreateICmp(
4734 Span = Builder.CreateSub(Stop, Start,
"",
true);
4735 ZeroCmp = Builder.CreateICmp(
4739 Value *CountIfLooping;
4740 if (InclusiveStop) {
4741 CountIfLooping = Builder.CreateAdd(Builder.CreateUDiv(Span, Incr), One);
4744 Value *CountIfTwo = Builder.CreateAdd(
4745 Builder.CreateUDiv(Builder.CreateSub(Span, One), Incr), One);
4747 CountIfLooping = Builder.CreateSelect(OneCmp, One, CountIfTwo);
4750 return Builder.CreateSelect(ZeroCmp, Zero, CountIfLooping,
4751 "omp_" + Name +
".tripcount");
4755 const LocationDescription &
Loc, LoopBodyGenCallbackTy BodyGenCB,
4756 Value *Start,
Value *Stop,
Value *Step,
bool IsSigned,
bool InclusiveStop,
4757 InsertPointTy ComputeIP,
const Twine &Name,
bool InScan,
4758 ScanInfo *ScanRedInfo) {
4759 LocationDescription ComputeLoc =
4760 ComputeIP.isSet() ? LocationDescription(ComputeIP,
Loc.DL) :
Loc;
4762 Value *TripCount = calculateCanonicalLoopTripCount(
4763 ComputeLoc, Start, Stop, Step, IsSigned, InclusiveStop, Name);
4765 auto BodyGen = [=](InsertPointTy CodeGenIP,
Value *
IV) {
4766 Builder.restoreIP(CodeGenIP);
4767 Value *Span = Builder.CreateMul(
IV, Step);
4768 Value *IndVar = Builder.CreateAdd(Span, Start);
4770 ScanRedInfo->IV = IndVar;
4771 return BodyGenCB(Builder.saveIP(), IndVar);
4773 LocationDescription LoopLoc =
4776 : LocationDescription(Builder.saveIP(),
4777 Builder.getCurrentDebugLocation());
4778 return createCanonicalLoop(LoopLoc, BodyGen, TripCount, Name);
4787 OpenMPIRBuilder &OMPBuilder) {
4788 unsigned Bitwidth = Ty->getIntegerBitWidth();
4790 return OMPBuilder.getOrCreateRuntimeFunction(
4791 M, omp::RuntimeFunction::OMPRTL___kmpc_dist_for_static_init_4u);
4793 return OMPBuilder.getOrCreateRuntimeFunction(
4794 M, omp::RuntimeFunction::OMPRTL___kmpc_dist_for_static_init_8u);
4803 OpenMPIRBuilder &OMPBuilder) {
4804 unsigned Bitwidth = Ty->getIntegerBitWidth();
4806 return OMPBuilder.getOrCreateRuntimeFunction(
4807 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_4u);
4809 return OMPBuilder.getOrCreateRuntimeFunction(
4810 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_8u);
4814OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::applyStaticWorkshareLoop(
4815 DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
4818 assert(CLI->isValid() &&
"Requires a valid canonical loop");
4820 "Require dedicated allocate IP");
4823 Builder.restoreIP(CLI->getPreheaderIP());
4824 Builder.SetCurrentDebugLocation(
DL);
4827 Constant *SrcLocStr = getOrCreateSrcLocStr(
DL, SrcLocStrSize);
4828 Value *SrcLoc = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
4832 Type *IVTy =
IV->getType();
4834 LoopType == WorksharingLoopType::DistributeForStaticLoop
4838 getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_for_static_fini);
4841 Builder.SetInsertPoint(AllocaIP.getBlock()->getFirstNonPHIOrDbgOrAlloca());
4844 Value *PLastIter = Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
4845 Value *PLowerBound = Builder.CreateAlloca(IVTy,
nullptr,
"p.lowerbound");
4846 Value *PUpperBound = Builder.CreateAlloca(IVTy,
nullptr,
"p.upperbound");
4847 Value *PStride = Builder.CreateAlloca(IVTy,
nullptr,
"p.stride");
4848 CLI->setLastIter(PLastIter);
4854 Builder.SetInsertPoint(CLI->getPreheader()->getTerminator());
4856 Constant *One = ConstantInt::get(IVTy, 1);
4857 Builder.CreateStore(Zero, PLowerBound);
4858 Value *UpperBound = Builder.CreateSub(CLI->getTripCount(), One);
4859 Builder.CreateStore(UpperBound, PUpperBound);
4860 Builder.CreateStore(One, PStride);
4862 Value *ThreadNum = getOrCreateThreadID(SrcLoc);
4865 (LoopType == WorksharingLoopType::DistributeStaticLoop)
4866 ? OMPScheduleType::OrderedDistribute
4869 ConstantInt::get(I32Type,
static_cast<int>(SchedType));
4873 auto BuildInitCall = [LoopType, SrcLoc, ThreadNum, PLastIter, PLowerBound,
4874 PUpperBound, IVTy, PStride, One,
Zero, StaticInit,
4875 this](
Value *SchedulingType,
auto &Builder) {
4877 PLowerBound, PUpperBound});
4878 if (LoopType == WorksharingLoopType::DistributeForStaticLoop) {
4879 Value *PDistUpperBound =
4880 Builder.CreateAlloca(IVTy,
nullptr,
"p.distupperbound");
4881 Args.push_back(PDistUpperBound);
4884 createRuntimeFunctionCall(StaticInit, Args);
4886 BuildInitCall(SchedulingType, Builder);
4887 if (HasDistSchedule &&
4888 LoopType != WorksharingLoopType::DistributeStaticLoop) {
4889 Constant *DistScheduleSchedType = ConstantInt::get(
4894 BuildInitCall(DistScheduleSchedType, Builder);
4896 Value *LowerBound = Builder.CreateLoad(IVTy, PLowerBound);
4897 Value *InclusiveUpperBound = Builder.CreateLoad(IVTy, PUpperBound);
4898 Value *TripCountMinusOne = Builder.CreateSub(InclusiveUpperBound, LowerBound);
4899 Value *TripCount = Builder.CreateAdd(TripCountMinusOne, One);
4900 CLI->setTripCount(TripCount);
4907 Builder.SetInsertPoint(CLI->getBody(),
4908 CLI->getBody()->getFirstInsertionPt());
4909 Builder.SetCurrentDebugLocation(
DL);
4910 return Builder.CreateAdd(OldIV, LowerBound);
4914 Builder.SetInsertPoint(CLI->getExit(),
4915 CLI->getExit()->getTerminator()->getIterator());
4916 createRuntimeFunctionCall(StaticFini, {SrcLoc, ThreadNum});
4920 InsertPointOrErrorTy BarrierIP =
4921 createBarrier(LocationDescription(Builder.saveIP(),
DL),
4922 omp::Directive::OMPD_for,
false,
4925 return BarrierIP.takeError();
4928 InsertPointTy AfterIP = CLI->getAfterIP();
4950 if (
Block == CLI->getCond() ||
Block == CLI->getHeader())
4952 Reachable.insert(
Block);
4962 Ctx, {
MDString::get(Ctx,
"llvm.loop.parallel_accesses"), AccessGroup}));
4965OpenMPIRBuilder::InsertPointOrErrorTy
4966OpenMPIRBuilder::applyStaticChunkedWorkshareLoop(
4967 DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
4970 assert(CLI->isValid() &&
"Requires a valid canonical loop");
4971 assert(ChunkSize || DistScheduleChunkSize &&
"Chunk size is required");
4973 LLVMContext &Ctx = CLI->getFunction()->getContext();
4975 Value *OrigTripCount = CLI->getTripCount();
4976 Type *IVTy =
IV->getType();
4978 "Max supported tripcount bitwidth is 64 bits");
4980 :
Type::getInt64Ty(Ctx);
4983 Constant *One = ConstantInt::get(InternalIVTy, 1);
4993 if (ChunkSize || DistScheduleChunkSize)
5001 getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_for_static_fini);
5004 Builder.restoreIP(AllocaIP);
5005 Builder.SetCurrentDebugLocation(
DL);
5006 Value *PLastIter = Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
5007 Value *PLowerBound =
5008 Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.lowerbound");
5009 Value *PUpperBound =
5010 Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.upperbound");
5011 Value *PStride = Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.stride");
5012 CLI->setLastIter(PLastIter);
5015 Builder.restoreIP(CLI->getPreheaderIP());
5016 Builder.SetCurrentDebugLocation(
DL);
5019 Value *CastedChunkSize = Builder.CreateZExtOrTrunc(
5020 ChunkSize ? ChunkSize : Zero, InternalIVTy,
"chunksize");
5021 Value *CastedDistScheduleChunkSize = Builder.CreateZExtOrTrunc(
5022 DistScheduleChunkSize ? DistScheduleChunkSize : Zero, InternalIVTy,
5023 "distschedulechunksize");
5024 Value *CastedTripCount =
5025 Builder.CreateZExt(OrigTripCount, InternalIVTy,
"tripcount");
5028 ConstantInt::get(I32Type,
static_cast<int>(SchedType));
5030 ConstantInt::get(I32Type,
static_cast<int>(DistScheduleSchedType));
5031 Builder.CreateStore(Zero, PLowerBound);
5032 Value *OrigUpperBound = Builder.CreateSub(CastedTripCount, One);
5033 Builder.CreateStore(OrigUpperBound, PUpperBound);
5034 Builder.CreateStore(One, PStride);
5039 Constant *SrcLocStr = getOrCreateSrcLocStr(
DL, SrcLocStrSize);
5040 Value *SrcLoc = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
5041 Value *ThreadNum = getOrCreateThreadID(SrcLoc);
5042 auto BuildInitCall = [StaticInit, SrcLoc, ThreadNum, PLastIter, PLowerBound,
5043 PUpperBound, PStride, One,
5044 this](
Value *SchedulingType,
Value *ChunkSize,
5046 createRuntimeFunctionCall(
5047 StaticInit, {SrcLoc, ThreadNum,
5048 SchedulingType, PLastIter,
5049 PLowerBound, PUpperBound,
5053 BuildInitCall(SchedulingType, CastedChunkSize, Builder);
5054 if (DistScheduleSchedType != OMPScheduleType::None &&
5055 SchedType != OMPScheduleType::OrderedDistributeChunked &&
5056 SchedType != OMPScheduleType::OrderedDistribute) {
5060 BuildInitCall(DistSchedulingType, CastedDistScheduleChunkSize, Builder);
5064 Value *FirstChunkStart =
5065 Builder.CreateLoad(InternalIVTy, PLowerBound,
"omp_firstchunk.lb");
5066 Value *FirstChunkStop =
5067 Builder.CreateLoad(InternalIVTy, PUpperBound,
"omp_firstchunk.ub");
5068 Value *FirstChunkEnd = Builder.CreateAdd(FirstChunkStop, One);
5070 Builder.CreateSub(FirstChunkEnd, FirstChunkStart,
"omp_chunk.range");
5071 Value *NextChunkStride =
5072 Builder.CreateLoad(InternalIVTy, PStride,
"omp_dispatch.stride");
5075 BasicBlock *DispatchEnter = splitBB(Builder,
true);
5076 Value *DispatchCounter;
5081 CanonicalLoopInfo *DispatchCLI =
cantFail(createCanonicalLoop(
5082 {Builder.saveIP(),
DL},
5083 [&](InsertPointTy BodyIP,
Value *Counter) {
5084 DispatchCounter = Counter;
5087 FirstChunkStart, CastedTripCount, NextChunkStride,
5093 BasicBlock *DispatchBody = DispatchCLI->getBody();
5094 BasicBlock *DispatchLatch = DispatchCLI->getLatch();
5095 BasicBlock *DispatchExit = DispatchCLI->getExit();
5096 BasicBlock *DispatchAfter = DispatchCLI->getAfter();
5097 DispatchCLI->invalidate();
5105 Builder.restoreIP(CLI->getPreheaderIP());
5106 Builder.SetCurrentDebugLocation(
DL);
5109 Builder.SetInsertPoint(CLI->getPreheader()->getTerminator());
5110 Value *ChunkEnd = Builder.CreateAdd(DispatchCounter, ChunkRange);
5111 Value *IsLastChunk =
5112 Builder.CreateICmpUGE(ChunkEnd, CastedTripCount,
"omp_chunk.is_last");
5113 Value *CountUntilOrigTripCount =
5114 Builder.CreateSub(CastedTripCount, DispatchCounter);
5115 Value *ChunkTripCount = Builder.CreateSelect(
5116 IsLastChunk, CountUntilOrigTripCount, ChunkRange,
"omp_chunk.tripcount");
5117 Value *BackcastedChunkTC =
5118 Builder.CreateTrunc(ChunkTripCount, IVTy,
"omp_chunk.tripcount.trunc");
5119 CLI->setTripCount(BackcastedChunkTC);
5124 Value *BackcastedDispatchCounter =
5125 Builder.CreateTrunc(DispatchCounter, IVTy,
"omp_dispatch.iv.trunc");
5127 Builder.restoreIP(CLI->getBodyIP());
5128 return Builder.CreateAdd(
IV, BackcastedDispatchCounter);
5133 createRuntimeFunctionCall(StaticFini, {SrcLoc, ThreadNum});
5137 InsertPointOrErrorTy AfterIP =
5138 createBarrier(LocationDescription(Builder.saveIP(),
DL), OMPD_for,
5141 return AfterIP.takeError();
5159 unsigned Bitwidth = Ty->getIntegerBitWidth();
5160 Module &M = OMPBuilder->M;
5162 case WorksharingLoopType::ForStaticLoop:
5164 return OMPBuilder->getOrCreateRuntimeFunction(
5165 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_loop_4u);
5167 return OMPBuilder->getOrCreateRuntimeFunction(
5168 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_loop_8u);
5170 case WorksharingLoopType::DistributeStaticLoop:
5172 return OMPBuilder->getOrCreateRuntimeFunction(
5173 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_static_loop_4u);
5175 return OMPBuilder->getOrCreateRuntimeFunction(
5176 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_static_loop_8u);
5178 case WorksharingLoopType::DistributeForStaticLoop:
5180 return OMPBuilder->getOrCreateRuntimeFunction(
5181 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_for_static_loop_4u);
5183 return OMPBuilder->getOrCreateRuntimeFunction(
5184 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_for_static_loop_8u);
5187 if (Bitwidth != 32 && Bitwidth != 64) {
5199 Function &LoopBodyFn,
bool NoLoop) {
5201 Module &M = OMPBuilder->M;
5210 if (LoopType == WorksharingLoopType::DistributeStaticLoop) {
5211 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
5212 RealArgs.
push_back(ConstantInt::get(Builder.getInt8Ty(), 0));
5213 Builder.restoreIP({InsertBlock, std::prev(InsertBlock->
end())});
5214 OMPBuilder->createRuntimeFunctionCall(RTLFn, RealArgs);
5217 FunctionCallee RTLNumThreads = OMPBuilder->getOrCreateRuntimeFunction(
5218 M, omp::RuntimeFunction::OMPRTL_omp_get_num_threads);
5219 Builder.restoreIP({InsertBlock, std::prev(InsertBlock->
end())});
5220 Value *NumThreads = OMPBuilder->createRuntimeFunctionCall(RTLNumThreads, {});
5223 Builder.CreateZExtOrTrunc(NumThreads, TripCountTy,
"num.threads.cast"));
5224 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
5225 if (LoopType == WorksharingLoopType::DistributeForStaticLoop) {
5226 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
5227 RealArgs.
push_back(ConstantInt::get(Builder.getInt8Ty(), NoLoop));
5229 RealArgs.
push_back(ConstantInt::get(Builder.getInt8Ty(), 0));
5232 OMPBuilder->createRuntimeFunctionCall(RTLFn, RealArgs);
5236 OpenMPIRBuilder *OMPIRBuilder, CanonicalLoopInfo *CLI,
Value *Ident,
5241 Value *TripCount = CLI->getTripCount();
5247 Preheader->
splice(std::prev(Preheader->
end()), CLI->getBody(),
5248 CLI->getBody()->begin(), std::prev(CLI->getBody()->end()));
5253 Builder.restoreIP({Preheader, Preheader->
end()});
5256 Builder.CreateBr(CLI->getExit());
5259 OpenMPIRBuilder::OutlineInfo CleanUpInfo;
5262 CleanUpInfo.EntryBB = CLI->getHeader();
5263 CleanUpInfo.ExitBB = CLI->getExit();
5264 CleanUpInfo.collectBlocks(RegionBlockSet, BlocksToBeRemoved);
5272 "Expected unique undroppable user of outlined function");
5274 assert(OutlinedFnCallInstruction &&
"Expected outlined function call");
5276 "Expected outlined function call to be located in loop preheader");
5278 if (OutlinedFnCallInstruction->
arg_size() > 1)
5285 LoopBodyArg, TripCount, OutlinedFn, NoLoop);
5287 for (
auto &ToBeDeletedItem : ToBeDeleted)
5288 ToBeDeletedItem->eraseFromParent();
5292OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyWorkshareLoopTarget(
5293 DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
5296 Constant *SrcLocStr = getOrCreateSrcLocStr(
DL, SrcLocStrSize);
5297 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
5300 OI.OuterAllocaBB = CLI->getPreheader();
5306 OI.OuterAllocaBB = AllocaIP.getBlock();
5309 OI.EntryBB = CLI->getBody();
5310 OI.ExitBB = CLI->getLatch()->splitBasicBlock(CLI->getLatch()->begin(),
5311 "omp.prelatch",
true);
5314 Builder.restoreIP({CLI->getPreheader(), CLI->getPreheader()->begin()});
5318 AllocaInst *NewLoopCnt = Builder.CreateAlloca(CLI->getIndVarType(), 0,
"");
5320 Builder.CreateLoad(CLI->getIndVarType(), NewLoopCnt);
5331 OI.collectBlocks(ParallelRegionBlockSet, Blocks);
5342 CLI->getPreheader(),
5351 Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
5357 CLI->getIndVar()->user_end());
5360 if (ParallelRegionBlockSet.
count(Inst->getParent())) {
5361 Inst->replaceUsesOfWith(CLI->getIndVar(), NewLoopCntLoad);
5367 OI.ExcludeArgsFromAggregate.push_back(NewLoopCntLoad);
5374 OI.PostOutlineCB = [=, ToBeDeletedVec =
5375 std::move(ToBeDeleted)](
Function &OutlinedFn) {
5379 addOutlineInfo(std::move(OI));
5380 return CLI->getAfterIP();
5383OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::applyWorkshareLoop(
5384 DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
5385 bool NeedsBarrier, omp::ScheduleKind SchedKind,
Value *ChunkSize,
5386 bool HasSimdModifier,
bool HasMonotonicModifier,
5387 bool HasNonmonotonicModifier,
bool HasOrderedClause,
5389 Value *DistScheduleChunkSize) {
5390 if (Config.isTargetDevice())
5391 return applyWorkshareLoopTarget(
DL, CLI, AllocaIP, LoopType, NoLoop);
5393 SchedKind, ChunkSize, HasSimdModifier, HasMonotonicModifier,
5394 HasNonmonotonicModifier, HasOrderedClause, DistScheduleChunkSize);
5396 bool IsOrdered = (EffectiveScheduleType & OMPScheduleType::ModifierOrdered) ==
5397 OMPScheduleType::ModifierOrdered;
5399 if (HasDistSchedule) {
5400 DistScheduleSchedType = DistScheduleChunkSize
5401 ? OMPScheduleType::OrderedDistributeChunked
5402 : OMPScheduleType::OrderedDistribute;
5404 switch (EffectiveScheduleType & ~OMPScheduleType::ModifierMask) {
5405 case OMPScheduleType::BaseStatic:
5406 case OMPScheduleType::BaseDistribute:
5407 assert(!ChunkSize || !DistScheduleChunkSize &&
5408 "No chunk size with static-chunked schedule");
5409 if (IsOrdered && !HasDistSchedule)
5410 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
5411 NeedsBarrier, ChunkSize);
5413 if (DistScheduleChunkSize)
5414 return applyStaticChunkedWorkshareLoop(
5415 DL, CLI, AllocaIP, NeedsBarrier, ChunkSize, EffectiveScheduleType,
5416 DistScheduleChunkSize, DistScheduleSchedType);
5417 return applyStaticWorkshareLoop(
DL, CLI, AllocaIP, LoopType, NeedsBarrier,
5420 case OMPScheduleType::BaseStaticChunked:
5421 case OMPScheduleType::BaseDistributeChunked:
5422 if (IsOrdered && !HasDistSchedule)
5423 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
5424 NeedsBarrier, ChunkSize);
5426 return applyStaticChunkedWorkshareLoop(
5427 DL, CLI, AllocaIP, NeedsBarrier, ChunkSize, EffectiveScheduleType,
5428 DistScheduleChunkSize, DistScheduleSchedType);
5430 case OMPScheduleType::BaseRuntime:
5431 case OMPScheduleType::BaseAuto:
5432 case OMPScheduleType::BaseGreedy:
5433 case OMPScheduleType::BaseBalanced:
5434 case OMPScheduleType::BaseSteal:
5435 case OMPScheduleType::BaseGuidedSimd:
5436 case OMPScheduleType::BaseRuntimeSimd:
5438 "schedule type does not support user-defined chunk sizes");
5440 case OMPScheduleType::BaseDynamicChunked:
5441 case OMPScheduleType::BaseGuidedChunked:
5442 case OMPScheduleType::BaseGuidedIterativeChunked:
5443 case OMPScheduleType::BaseGuidedAnalyticalChunked:
5444 case OMPScheduleType::BaseStaticBalancedChunked:
5445 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
5446 NeedsBarrier, ChunkSize);
5459 unsigned Bitwidth = Ty->getIntegerBitWidth();
5461 return OMPBuilder.getOrCreateRuntimeFunction(
5462 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_4u);
5464 return OMPBuilder.getOrCreateRuntimeFunction(
5465 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_8u);
5475 unsigned Bitwidth = Ty->getIntegerBitWidth();
5477 return OMPBuilder.getOrCreateRuntimeFunction(
5478 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_4u);
5480 return OMPBuilder.getOrCreateRuntimeFunction(
5481 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_8u);
5490 unsigned Bitwidth = Ty->getIntegerBitWidth();
5492 return OMPBuilder.getOrCreateRuntimeFunction(
5493 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_4u);
5495 return OMPBuilder.getOrCreateRuntimeFunction(
5496 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_8u);
5500OpenMPIRBuilder::InsertPointOrErrorTy
5501OpenMPIRBuilder::applyDynamicWorkshareLoop(
DebugLoc DL, CanonicalLoopInfo *CLI,
5502 InsertPointTy AllocaIP,
5504 bool NeedsBarrier,
Value *Chunk) {
5505 assert(CLI->isValid() &&
"Requires a valid canonical loop");
5507 "Require dedicated allocate IP");
5509 "Require valid schedule type");
5511 bool Ordered = (SchedType & OMPScheduleType::ModifierOrdered) ==
5512 OMPScheduleType::ModifierOrdered;
5515 Builder.SetCurrentDebugLocation(
DL);
5518 Constant *SrcLocStr = getOrCreateSrcLocStr(
DL, SrcLocStrSize);
5519 Value *SrcLoc = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
5523 Type *IVTy =
IV->getType();
5528 Builder.SetInsertPoint(AllocaIP.getBlock()->getFirstNonPHIOrDbgOrAlloca());
5530 Value *PLastIter = Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
5531 Value *PLowerBound = Builder.CreateAlloca(IVTy,
nullptr,
"p.lowerbound");
5532 Value *PUpperBound = Builder.CreateAlloca(IVTy,
nullptr,
"p.upperbound");
5533 Value *PStride = Builder.CreateAlloca(IVTy,
nullptr,
"p.stride");
5534 CLI->setLastIter(PLastIter);
5542 Constant *One = ConstantInt::get(IVTy, 1);
5543 Builder.CreateStore(One, PLowerBound);
5544 Value *UpperBound = CLI->getTripCount();
5545 Builder.CreateStore(UpperBound, PUpperBound);
5546 Builder.CreateStore(One, PStride);
5552 InsertPointTy AfterIP = CLI->getAfterIP();
5560 Value *ThreadNum = getOrCreateThreadID(SrcLoc);
5563 ConstantInt::get(I32Type,
static_cast<int>(SchedType));
5566 createRuntimeFunctionCall(DynamicInit, {SrcLoc, ThreadNum, SchedulingType,
5575 Builder.SetInsertPoint(OuterCond, OuterCond->getFirstInsertionPt());
5576 Value *Res = createRuntimeFunctionCall(
5578 {SrcLoc, ThreadNum, PLastIter, PLowerBound, PUpperBound, PStride});
5579 Constant *Zero32 = ConstantInt::get(I32Type, 0);
5582 Builder.CreateSub(Builder.CreateLoad(IVTy, PLowerBound), One,
"lb");
5583 Builder.CreateCondBr(MoreWork, Header, Exit);
5589 PI->setIncomingBlock(0, OuterCond);
5590 PI->setIncomingValue(0, LowerBound);
5595 Br->setSuccessor(0, OuterCond);
5600 Builder.SetInsertPoint(
Cond,
Cond->getFirstInsertionPt());
5601 UpperBound = Builder.CreateLoad(IVTy, PUpperBound,
"ub");
5604 CI->setOperand(1, UpperBound);
5608 assert(BI->getSuccessor(1) == Exit);
5609 BI->setSuccessor(1, OuterCond);
5613 Builder.SetInsertPoint(&Latch->
back());
5615 createRuntimeFunctionCall(DynamicFini, {SrcLoc, ThreadNum});
5620 Builder.SetInsertPoint(&
Exit->back());
5621 InsertPointOrErrorTy BarrierIP =
5622 createBarrier(LocationDescription(Builder.saveIP(),
DL),
5623 omp::Directive::OMPD_for,
false,
5626 return BarrierIP.takeError();
5645 auto HasRemainingUses = [&BBsToErase](
BasicBlock *BB) {
5650 if (BBsToErase.
count(UseInst->getParent()))
5657 while (BBsToErase.
remove_if(HasRemainingUses)) {
5667 InsertPointTy ComputeIP) {
5668 assert(
Loops.size() >= 1 &&
"At least one loop required");
5669 size_t NumLoops =
Loops.size();
5673 return Loops.front();
5675 CanonicalLoopInfo *Outermost =
Loops.front();
5676 CanonicalLoopInfo *Innermost =
Loops.back();
5677 BasicBlock *OrigPreheader = Outermost->getPreheader();
5678 BasicBlock *OrigAfter = Outermost->getAfter();
5685 Loop->collectControlBlocks(OldControlBBs);
5688 Builder.SetCurrentDebugLocation(
DL);
5689 if (ComputeIP.isSet())
5690 Builder.restoreIP(ComputeIP);
5692 Builder.restoreIP(Outermost->getPreheaderIP());
5696 Value *CollapsedTripCount =
nullptr;
5697 for (CanonicalLoopInfo *L :
Loops) {
5699 "All loops to collapse must be valid canonical loops");
5700 Value *OrigTripCount =
L->getTripCount();
5701 if (!CollapsedTripCount) {
5702 CollapsedTripCount = OrigTripCount;
5707 CollapsedTripCount =
5708 Builder.CreateNUWMul(CollapsedTripCount, OrigTripCount);
5712 CanonicalLoopInfo *
Result =
5713 createLoopSkeleton(
DL, CollapsedTripCount,
F,
5714 OrigPreheader->
getNextNode(), OrigAfter,
"collapsed");
5720 Builder.restoreIP(
Result->getBodyIP());
5724 NewIndVars.
resize(NumLoops);
5725 for (
int i = NumLoops - 1; i >= 1; --i) {
5726 Value *OrigTripCount =
Loops[i]->getTripCount();
5728 Value *NewIndVar = Builder.CreateURem(Leftover, OrigTripCount);
5729 NewIndVars[i] = NewIndVar;
5731 Leftover = Builder.CreateUDiv(Leftover, OrigTripCount);
5734 NewIndVars[0] = Leftover;
5745 auto ContinueWith = [&ContinueBlock, &ContinuePred,
DL](
BasicBlock *Dest,
5752 ContinueBlock =
nullptr;
5753 ContinuePred = NextSrc;
5760 for (
size_t i = 0; i < NumLoops - 1; ++i)
5761 ContinueWith(
Loops[i]->getBody(),
Loops[i + 1]->getHeader());
5764 ContinueWith(Innermost->getBody(), Innermost->getLatch());
5767 for (
size_t i = NumLoops - 1; i > 0; --i)
5768 ContinueWith(
Loops[i]->getAfter(),
Loops[i - 1]->getLatch());
5771 ContinueWith(
Result->getLatch(),
nullptr);
5778 for (
size_t i = 0; i < NumLoops; ++i)
5779 Loops[i]->getIndVar()->replaceAllUsesWith(NewIndVars[i]);
5784 for (CanonicalLoopInfo *L :
Loops)
5793std::vector<CanonicalLoopInfo *>
5797 "Must pass as many tile sizes as there are loops");
5798 int NumLoops =
Loops.size();
5799 assert(NumLoops >= 1 &&
"At least one loop to tile required");
5801 CanonicalLoopInfo *OutermostLoop =
Loops.front();
5802 CanonicalLoopInfo *InnermostLoop =
Loops.back();
5803 Function *
F = OutermostLoop->getBody()->getParent();
5804 BasicBlock *InnerEnter = InnermostLoop->getBody();
5805 BasicBlock *InnerLatch = InnermostLoop->getLatch();
5811 Loop->collectControlBlocks(OldControlBBs);
5818 for (CanonicalLoopInfo *L :
Loops) {
5819 assert(
L->isValid() &&
"All input loops must be valid canonical loops");
5831 for (
int i = 0; i < NumLoops - 1; ++i) {
5832 CanonicalLoopInfo *Surrounding =
Loops[i];
5835 BasicBlock *EnterBB = Surrounding->getBody();
5841 Builder.SetCurrentDebugLocation(
DL);
5842 Builder.restoreIP(OutermostLoop->getPreheaderIP());
5844 for (
int i = 0; i < NumLoops; ++i) {
5846 Value *OrigTripCount = OrigTripCounts[i];
5849 Value *FloorCompleteTripCount = Builder.CreateUDiv(OrigTripCount,
TileSize);
5850 Value *FloorTripRem = Builder.CreateURem(OrigTripCount,
TileSize);
5859 Value *FloorTripOverflow =
5860 Builder.CreateICmpNE(FloorTripRem, ConstantInt::get(IVType, 0));
5862 FloorTripOverflow = Builder.CreateZExt(FloorTripOverflow, IVType);
5863 Value *FloorTripCount =
5864 Builder.CreateAdd(FloorCompleteTripCount, FloorTripOverflow,
5865 "omp_floor" +
Twine(i) +
".tripcount",
true);
5868 FloorCompleteCount.
push_back(FloorCompleteTripCount);
5874 std::vector<CanonicalLoopInfo *>
Result;
5875 Result.reserve(NumLoops * 2);
5879 BasicBlock *Enter = OutermostLoop->getPreheader();
5886 BasicBlock *OutroInsertBefore = InnermostLoop->getExit();
5888 auto EmbeddNewLoop =
5889 [
this,
DL,
F, InnerEnter, &Enter, &
Continue, &OutroInsertBefore](
5891 CanonicalLoopInfo *EmbeddedLoop = createLoopSkeleton(
5892 DL, TripCount,
F, InnerEnter, OutroInsertBefore, Name);
5897 Enter = EmbeddedLoop->getBody();
5898 Continue = EmbeddedLoop->getLatch();
5899 OutroInsertBefore = EmbeddedLoop->getLatch();
5900 return EmbeddedLoop;
5904 const Twine &NameBase) {
5906 CanonicalLoopInfo *EmbeddedLoop =
5907 EmbeddNewLoop(
P.value(), NameBase +
Twine(
P.index()));
5908 Result.push_back(EmbeddedLoop);
5912 EmbeddNewLoops(FloorCount,
"floor");
5916 Builder.SetInsertPoint(Enter->getTerminator());
5918 for (
int i = 0; i < NumLoops; ++i) {
5919 CanonicalLoopInfo *FloorLoop =
Result[i];
5922 Value *FloorIsEpilogue =
5923 Builder.CreateICmpEQ(FloorLoop->getIndVar(), FloorCompleteCount[i]);
5924 Value *TileTripCount =
5925 Builder.CreateSelect(FloorIsEpilogue, FloorRems[i],
TileSize);
5931 EmbeddNewLoops(TileCounts,
"tile");
5936 for (std::pair<BasicBlock *, BasicBlock *>
P : InbetweenCode) {
5945 BodyEnter =
nullptr;
5946 BodyEntered = ExitBB;
5958 Builder.restoreIP(
Result.back()->getBodyIP());
5959 for (
int i = 0; i < NumLoops; ++i) {
5960 CanonicalLoopInfo *FloorLoop =
Result[i];
5961 CanonicalLoopInfo *TileLoop =
Result[NumLoops + i];
5962 Value *OrigIndVar = OrigIndVars[i];
5966 Builder.CreateMul(
Size, FloorLoop->getIndVar(), {},
true);
5968 Builder.CreateAdd(Scale, TileLoop->getIndVar(), {},
true);
5975 for (CanonicalLoopInfo *L :
Loops)
5979 for (CanonicalLoopInfo *GenL : Result)
5990 if (Properties.
empty())
6013 assert(
Loop->isValid() &&
"Expecting a valid CanonicalLoopInfo");
6017 assert(Latch &&
"A valid CanonicalLoopInfo must have a unique latch");
6025 if (
I.mayReadOrWriteMemory()) {
6029 I.setMetadata(LLVMContext::MD_access_group, AccessGroup);
6034void OpenMPIRBuilder::unrollLoopFull(
DebugLoc, CanonicalLoopInfo *
Loop) {
6041void OpenMPIRBuilder::unrollLoopHeuristic(
DebugLoc, CanonicalLoopInfo *
Loop) {
6049void OpenMPIRBuilder::createIfVersion(CanonicalLoopInfo *CanonicalLoop,
6052 const Twine &NamePrefix) {
6053 Function *
F = CanonicalLoop->getFunction();
6075 auto SplitBeforeIt = CanonicalLoop->getBody()->getFirstNonPHIIt();
6081 C, NamePrefix +
".if.then",
Cond->getParent(),
Cond->getNextNode());
6083 C, NamePrefix +
".if.else",
Cond->getParent(), CanonicalLoop->getExit());
6086 Builder.SetInsertPoint(SplitBeforeIt);
6088 Builder.CreateCondBr(IfCond, ThenBlock, ElseBlock);
6091 spliceBB(IP, ThenBlock,
false, Builder.getCurrentDebugLocation());
6094 Builder.SetInsertPoint(ElseBlock);
6100 ExistingBlocks.
reserve(
L->getNumBlocks() + 1);
6102 ExistingBlocks.
append(
L->block_begin(),
L->block_end());
6108 assert(LoopCond && LoopHeader &&
"Invalid loop structure");
6110 if (
Block ==
L->getLoopPreheader() ||
Block ==
L->getLoopLatch() ||
6117 if (
Block == ThenBlock)
6118 NewBB->
setName(NamePrefix +
".if.else");
6121 VMap[
Block] = NewBB;
6125 Builder.CreateBr(NewBlocks.
front());
6129 L->getLoopLatch()->splitBasicBlock(
6130 L->getLoopLatch()->begin(), NamePrefix +
".pre_latch",
true);
6134 L->addBasicBlockToLoop(ThenBlock, LI);
6138OpenMPIRBuilder::getOpenMPDefaultSimdAlign(
const Triple &TargetTriple,
6140 if (TargetTriple.
isX86()) {
6141 if (Features.
lookup(
"avx512f"))
6143 else if (Features.
lookup(
"avx"))
6147 if (TargetTriple.
isPPC())
6149 if (TargetTriple.
isWasm())
6154void OpenMPIRBuilder::applySimd(CanonicalLoopInfo *CanonicalLoop,
6156 Value *IfCond, OrderKind Order,
6160 Function *
F = CanonicalLoop->getFunction();
6175 if (AlignedVars.
size()) {
6176 InsertPointTy IP = Builder.saveIP();
6177 for (
auto &AlignedItem : AlignedVars) {
6178 Value *AlignedPtr = AlignedItem.first;
6179 Value *Alignment = AlignedItem.second;
6182 Builder.CreateAlignmentAssumption(
F->getDataLayout(), AlignedPtr,
6185 Builder.restoreIP(IP);
6190 createIfVersion(CanonicalLoop, IfCond, VMap, LIA, LI, L,
"simd");
6200 if (
Block == CanonicalLoop->getCond() ||
6201 Block == CanonicalLoop->getHeader())
6203 Reachable.insert(
Block);
6213 if ((Safelen ==
nullptr) || (Order == OrderKind::OMP_ORDER_concurrent))
6229 Ctx, {
MDString::get(Ctx,
"llvm.loop.vectorize.enable"), BoolConst}));
6231 if (Simdlen || Safelen) {
6235 ConstantInt *VectorizeWidth = Simdlen ==
nullptr ? Safelen : Simdlen;
6261static std::unique_ptr<TargetMachine>
6265 StringRef CPU =
F->getFnAttribute(
"target-cpu").getValueAsString();
6266 StringRef Features =
F->getFnAttribute(
"target-features").getValueAsString();
6277 std::nullopt, OptLevel));
6301 [&](
const Function &
F) {
return TM->getTargetTransformInfo(
F); });
6302 FAM.registerPass([&]() {
return TIRA; });
6316 assert(L &&
"Expecting CanonicalLoopInfo to be recognized as a loop");
6321 nullptr, ORE,
static_cast<int>(OptLevel),
6342 <<
" Threshold=" << UP.
Threshold <<
"\n"
6345 <<
" PartialOptSizeThreshold="
6365 Ptr = Load->getPointerOperand();
6367 Ptr = Store->getPointerOperand();
6374 if (Alloca->getParent() == &
F->getEntryBlock())
6394 int MaxTripCount = 0;
6395 bool MaxOrZero =
false;
6396 unsigned TripMultiple = 0;
6398 bool UseUpperBound =
false;
6400 MaxTripCount, MaxOrZero, TripMultiple, UCE, UP, PP,
6402 unsigned Factor = UP.
Count;
6403 LLVM_DEBUG(
dbgs() <<
"Suggesting unroll factor of " << Factor <<
"\n");
6411void OpenMPIRBuilder::unrollLoopPartial(
DebugLoc DL, CanonicalLoopInfo *
Loop,
6413 CanonicalLoopInfo **UnrolledCLI) {
6414 assert(Factor >= 0 &&
"Unroll factor must not be negative");
6430 Ctx, {
MDString::get(Ctx,
"llvm.loop.unroll.count"), FactorConst}));
6443 *UnrolledCLI =
Loop;
6448 "unrolling only makes sense with a factor of 2 or larger");
6450 Type *IndVarTy =
Loop->getIndVarType();
6457 std::vector<CanonicalLoopInfo *>
LoopNest =
6458 tileLoops(
DL, {
Loop}, {FactorVal});
6461 CanonicalLoopInfo *InnerLoop =
LoopNest[1];
6472 Ctx, {
MDString::get(Ctx,
"llvm.loop.unroll.count"), FactorConst})});
6475 (*UnrolledCLI)->assertOK();
6479OpenMPIRBuilder::InsertPointTy
6480OpenMPIRBuilder::createCopyPrivate(
const LocationDescription &
Loc,
6483 if (!updateToLocation(
Loc))
6487 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6488 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6489 Value *ThreadId = getOrCreateThreadID(Ident);
6491 llvm::Value *DidItLD = Builder.CreateLoad(Builder.getInt32Ty(), DidIt);
6493 Value *
Args[] = {Ident, ThreadId, BufSize, CpyBuf, CpyFn, DidItLD};
6495 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_copyprivate);
6496 createRuntimeFunctionCall(Fn, Args);
6498 return Builder.saveIP();
6501OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createSingle(
6502 const LocationDescription &
Loc, BodyGenCallbackTy BodyGenCB,
6506 if (!updateToLocation(
Loc))
6512 if (!CPVars.
empty()) {
6514 Builder.CreateStore(Builder.getInt32(0), DidIt);
6517 Directive OMPD = Directive::OMPD_single;
6519 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6520 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6521 Value *ThreadId = getOrCreateThreadID(Ident);
6524 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_single);
6525 Instruction *EntryCall = createRuntimeFunctionCall(EntryRTLFn, Args);
6527 Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_single);
6528 Instruction *ExitCall = createRuntimeFunctionCall(ExitRTLFn, Args);
6530 auto FiniCBWrapper = [&](InsertPointTy IP) ->
Error {
6531 if (
Error Err = FiniCB(IP))
6538 Builder.CreateStore(Builder.getInt32(1), DidIt);
6551 InsertPointOrErrorTy AfterIP =
6552 EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCBWrapper,
6556 return AfterIP.takeError();
6559 for (
size_t I = 0,
E = CPVars.
size();
I <
E; ++
I)
6561 createCopyPrivate(LocationDescription(Builder.saveIP(),
Loc.DL),
6562 ConstantInt::get(Int64, 0), CPVars[
I],
6565 }
else if (!IsNowait) {
6566 InsertPointOrErrorTy AfterIP =
6567 createBarrier(LocationDescription(Builder.saveIP(),
Loc.DL),
6568 omp::Directive::OMPD_unknown,
false,
6571 return AfterIP.takeError();
6573 return Builder.saveIP();
6576OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createCritical(
6577 const LocationDescription &
Loc, BodyGenCallbackTy BodyGenCB,
6578 FinalizeCallbackTy FiniCB,
StringRef CriticalName,
Value *HintInst) {
6580 if (!updateToLocation(
Loc))
6583 Directive OMPD = Directive::OMPD_critical;
6585 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6586 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6587 Value *ThreadId = getOrCreateThreadID(Ident);
6588 Value *LockVar = getOMPCriticalRegionLock(CriticalName);
6589 Value *
Args[] = {Ident, ThreadId, LockVar};
6595 EnterArgs.push_back(HintInst);
6596 RTFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_critical_with_hint);
6598 RTFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_critical);
6600 Instruction *EntryCall = createRuntimeFunctionCall(RTFn, EnterArgs);
6603 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_critical);
6604 Instruction *ExitCall = createRuntimeFunctionCall(ExitRTLFn, Args);
6606 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
6610OpenMPIRBuilder::InsertPointTy
6611OpenMPIRBuilder::createOrderedDepend(
const LocationDescription &
Loc,
6612 InsertPointTy AllocaIP,
unsigned NumLoops,
6614 const Twine &Name,
bool IsDependSource) {
6618 "OpenMP runtime requires depend vec with i64 type");
6620 if (!updateToLocation(
Loc))
6625 Builder.restoreIP(AllocaIP);
6626 AllocaInst *ArgsBase = Builder.CreateAlloca(ArrI64Ty,
nullptr, Name);
6628 updateToLocation(
Loc);
6631 for (
unsigned I = 0;
I < NumLoops; ++
I) {
6632 Value *DependAddrGEPIter = Builder.CreateInBoundsGEP(
6633 ArrI64Ty, ArgsBase, {Builder.getInt64(0), Builder.getInt64(
I)});
6634 StoreInst *STInst = Builder.CreateStore(StoreValues[
I], DependAddrGEPIter);
6638 Value *DependBaseAddrGEP = Builder.CreateInBoundsGEP(
6639 ArrI64Ty, ArgsBase, {Builder.getInt64(0), Builder.getInt64(0)});
6642 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6643 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6644 Value *ThreadId = getOrCreateThreadID(Ident);
6645 Value *
Args[] = {Ident, ThreadId, DependBaseAddrGEP};
6649 RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_doacross_post);
6651 RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_doacross_wait);
6652 createRuntimeFunctionCall(RTLFn, Args);
6654 return Builder.saveIP();
6657OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createOrderedThreadsSimd(
6658 const LocationDescription &
Loc, BodyGenCallbackTy BodyGenCB,
6659 FinalizeCallbackTy FiniCB,
bool IsThreads) {
6660 if (!updateToLocation(
Loc))
6663 Directive OMPD = Directive::OMPD_ordered;
6669 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6670 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6671 Value *ThreadId = getOrCreateThreadID(Ident);
6674 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_ordered);
6675 EntryCall = createRuntimeFunctionCall(EntryRTLFn, Args);
6678 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_ordered);
6679 ExitCall = createRuntimeFunctionCall(ExitRTLFn, Args);
6682 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
6686OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::EmitOMPInlinedRegion(
6688 BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB,
bool Conditional,
6689 bool HasFinalize,
bool IsCancellable) {
6692 FinalizationStack.push_back({FiniCB, OMPD, IsCancellable});
6696 BasicBlock *EntryBB = Builder.GetInsertBlock();
6705 emitCommonDirectiveEntry(OMPD, EntryCall, ExitBB, Conditional);
6708 if (
Error Err = BodyGenCB( InsertPointTy(),
6716 "Unexpected control flow graph state!!");
6717 InsertPointOrErrorTy AfterIP =
6718 emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize);
6720 return AfterIP.takeError();
6722 "Unexpected Control Flow State!");
6728 "Unexpected Insertion point location!");
6731 auto InsertBB = merged ? ExitPredBB : ExitBB;
6734 Builder.SetInsertPoint(InsertBB);
6736 return Builder.saveIP();
6739OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveEntry(
6742 if (!Conditional || !EntryCall)
6743 return Builder.saveIP();
6745 BasicBlock *EntryBB = Builder.GetInsertBlock();
6746 Value *CallBool = Builder.CreateIsNotNull(EntryCall);
6758 Builder.CreateCondBr(CallBool, ThenBB, ExitBB);
6760 Builder.SetInsertPoint(UI);
6761 Builder.Insert(EntryBBTI);
6762 UI->eraseFromParent();
6769OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitCommonDirectiveExit(
6770 omp::Directive OMPD, InsertPointTy FinIP,
Instruction *ExitCall,
6773 Builder.restoreIP(FinIP);
6777 assert(!FinalizationStack.empty() &&
6778 "Unexpected finalization stack state!");
6780 FinalizationInfo Fi = FinalizationStack.pop_back_val();
6781 assert(Fi.DK == OMPD &&
"Unexpected Directive for Finalization call!");
6783 if (
Error Err = Fi.FiniCB(FinIP))
6790 Builder.SetInsertPoint(FiniBBTI);
6794 return Builder.saveIP();
6798 Builder.Insert(ExitCall);
6804OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createCopyinClauseBlocks(
6805 InsertPointTy IP,
Value *MasterAddr,
Value *PrivateAddr,
6834 "copyin.not.master.end");
6841 Builder.SetInsertPoint(OMP_Entry);
6842 Value *MasterPtr = Builder.CreatePtrToInt(MasterAddr, IntPtrTy);
6843 Value *PrivatePtr = Builder.CreatePtrToInt(PrivateAddr, IntPtrTy);
6844 Value *cmp = Builder.CreateICmpNE(MasterPtr, PrivatePtr);
6845 Builder.CreateCondBr(cmp, CopyBegin, CopyEnd);
6847 Builder.SetInsertPoint(CopyBegin);
6849 Builder.SetInsertPoint(Builder.CreateBr(CopyEnd));
6851 return Builder.saveIP();
6854CallInst *OpenMPIRBuilder::createOMPAlloc(
const LocationDescription &
Loc,
6858 updateToLocation(
Loc);
6861 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6862 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6863 Value *ThreadId = getOrCreateThreadID(Ident);
6866 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_alloc);
6868 return createRuntimeFunctionCall(Fn, Args, Name);
6871CallInst *OpenMPIRBuilder::createOMPFree(
const LocationDescription &
Loc,
6875 updateToLocation(
Loc);
6878 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6879 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6880 Value *ThreadId = getOrCreateThreadID(Ident);
6882 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_free);
6883 return createRuntimeFunctionCall(Fn, Args, Name);
6886CallInst *OpenMPIRBuilder::createOMPInteropInit(
6887 const LocationDescription &
Loc,
Value *InteropVar,
6889 Value *DependenceAddress,
bool HaveNowaitClause) {
6891 updateToLocation(
Loc);
6894 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6895 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6896 Value *ThreadId = getOrCreateThreadID(Ident);
6897 if (Device ==
nullptr)
6899 Constant *InteropTypeVal = ConstantInt::get(Int32, (
int)InteropType);
6900 if (NumDependences ==
nullptr) {
6901 NumDependences = ConstantInt::get(Int32, 0);
6905 Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause);
6907 Ident, ThreadId, InteropVar, InteropTypeVal,
6908 Device, NumDependences, DependenceAddress, HaveNowaitClauseVal};
6910 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___tgt_interop_init);
6912 return createRuntimeFunctionCall(Fn, Args);
6915CallInst *OpenMPIRBuilder::createOMPInteropDestroy(
6916 const LocationDescription &
Loc,
Value *InteropVar,
Value *Device,
6917 Value *NumDependences,
Value *DependenceAddress,
bool HaveNowaitClause) {
6919 updateToLocation(
Loc);
6922 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6923 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6924 Value *ThreadId = getOrCreateThreadID(Ident);
6925 if (Device ==
nullptr)
6927 if (NumDependences ==
nullptr) {
6928 NumDependences = ConstantInt::get(Int32, 0);
6932 Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause);
6934 Ident, ThreadId, InteropVar,
Device,
6935 NumDependences, DependenceAddress, HaveNowaitClauseVal};
6937 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___tgt_interop_destroy);
6939 return createRuntimeFunctionCall(Fn, Args);
6942CallInst *OpenMPIRBuilder::createOMPInteropUse(
const LocationDescription &
Loc,
6944 Value *NumDependences,
6945 Value *DependenceAddress,
6946 bool HaveNowaitClause) {
6948 updateToLocation(
Loc);
6950 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6951 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6952 Value *ThreadId = getOrCreateThreadID(Ident);
6953 if (Device ==
nullptr)
6955 if (NumDependences ==
nullptr) {
6956 NumDependences = ConstantInt::get(Int32, 0);
6960 Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause);
6962 Ident, ThreadId, InteropVar,
Device,
6963 NumDependences, DependenceAddress, HaveNowaitClauseVal};
6965 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___tgt_interop_use);
6967 return createRuntimeFunctionCall(Fn, Args);
6970CallInst *OpenMPIRBuilder::createCachedThreadPrivate(
6974 updateToLocation(
Loc);
6977 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6978 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6979 Value *ThreadId = getOrCreateThreadID(Ident);
6981 getOrCreateInternalVariable(Int8PtrPtr,
Name.str());
6985 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_threadprivate_cached);
6987 return createRuntimeFunctionCall(Fn, Args);
6990OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTargetInit(
6991 const LocationDescription &
Loc,
6992 const llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs) {
6994 "expected num_threads and num_teams to be specified");
6996 if (!updateToLocation(
Loc))
7000 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
7001 Constant *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
7013 const std::string DebugPrefix =
"_debug__";
7014 if (KernelName.
ends_with(DebugPrefix)) {
7015 KernelName = KernelName.
drop_back(DebugPrefix.length());
7016 Kernel = M.getFunction(KernelName);
7022 if (
Attrs.MinTeams > 1 ||
Attrs.MaxTeams.front() > 0)
7027 int32_t MaxThreadsVal =
Attrs.MaxThreads.front();
7028 if (MaxThreadsVal < 0)
7029 MaxThreadsVal = std::max(
7032 if (MaxThreadsVal > 0)
7033 writeThreadBoundsForKernel(
T, *
Kernel,
Attrs.MinThreads, MaxThreadsVal);
7044 Function *Fn = getOrCreateRuntimeFunctionPtr(
7045 omp::RuntimeFunction::OMPRTL___kmpc_target_init);
7048 Twine DynamicEnvironmentName = KernelName +
"_dynamic_environment";
7049 Constant *DynamicEnvironmentInitializer =
7053 DynamicEnvironmentInitializer, DynamicEnvironmentName,
7055 DL.getDefaultGlobalsAddressSpace());
7059 DynamicEnvironmentGV->
getType() == DynamicEnvironmentPtr
7060 ? DynamicEnvironmentGV
7062 DynamicEnvironmentPtr);
7065 ConfigurationEnvironment, {
7066 UseGenericStateMachineVal,
7067 MayUseNestedParallelismVal,
7074 ReductionBufferLength,
7077 KernelEnvironment, {
7078 ConfigurationEnvironmentInitializer,
7082 std::string KernelEnvironmentName =
7083 (KernelName +
"_kernel_environment").str();
7086 KernelEnvironmentInitializer, KernelEnvironmentName,
7088 DL.getDefaultGlobalsAddressSpace());
7092 KernelEnvironmentGV->
getType() == KernelEnvironmentPtr
7093 ? KernelEnvironmentGV
7095 KernelEnvironmentPtr);
7096 Value *KernelLaunchEnvironment = DebugKernelWrapper->
getArg(0);
7098 KernelLaunchEnvironment =
7099 KernelLaunchEnvironment->
getType() == KernelLaunchEnvParamTy
7100 ? KernelLaunchEnvironment
7101 : Builder.CreateAddrSpaceCast(KernelLaunchEnvironment,
7102 KernelLaunchEnvParamTy);
7103 CallInst *ThreadKind = createRuntimeFunctionCall(
7104 Fn, {KernelEnvironment, KernelLaunchEnvironment});
7106 Value *ExecUserCode = Builder.CreateICmpEQ(
7116 auto *UI = Builder.CreateUnreachable();
7122 Builder.SetInsertPoint(WorkerExitBB);
7123 Builder.CreateRetVoid();
7126 Builder.SetInsertPoint(CheckBBTI);
7127 Builder.CreateCondBr(ExecUserCode, UI->getParent(), WorkerExitBB);
7130 UI->eraseFromParent();
7137void OpenMPIRBuilder::createTargetDeinit(
const LocationDescription &
Loc,
7138 int32_t TeamsReductionDataSize,
7139 int32_t TeamsReductionBufferLength) {
7140 if (!updateToLocation(
Loc))
7143 Function *Fn = getOrCreateRuntimeFunctionPtr(
7144 omp::RuntimeFunction::OMPRTL___kmpc_target_deinit);
7146 createRuntimeFunctionCall(Fn, {});
7148 if (!TeamsReductionBufferLength || !TeamsReductionDataSize)
7154 const std::string DebugPrefix =
"_debug__";
7156 KernelName = KernelName.
drop_back(DebugPrefix.length());
7157 auto *KernelEnvironmentGV =
7158 M.getNamedGlobal((KernelName +
"_kernel_environment").str());
7159 assert(KernelEnvironmentGV &&
"Expected kernel environment global\n");
7160 auto *KernelEnvironmentInitializer = KernelEnvironmentGV->
getInitializer();
7162 KernelEnvironmentInitializer,
7163 ConstantInt::get(Int32, TeamsReductionDataSize), {0, 7});
7165 NewInitializer, ConstantInt::get(Int32, TeamsReductionBufferLength),
7172 if (
Kernel.hasFnAttribute(Name)) {
7173 int32_t OldLimit =
Kernel.getFnAttributeAsParsedInteger(Name);
7179std::pair<int32_t, int32_t>
7181 int32_t ThreadLimit =
7182 Kernel.getFnAttributeAsParsedInteger(
"omp_target_thread_limit");
7185 const auto &Attr =
Kernel.getFnAttribute(
"amdgpu-flat-work-group-size");
7186 if (!Attr.isValid() || !Attr.isStringAttribute())
7187 return {0, ThreadLimit};
7188 auto [LBStr, UBStr] = Attr.getValueAsString().split(
',');
7191 return {0, ThreadLimit};
7192 UB = ThreadLimit ? std::min(ThreadLimit, UB) : UB;
7198 if (
Kernel.hasFnAttribute(
"nvvm.maxntid")) {
7199 int32_t UB =
Kernel.getFnAttributeAsParsedInteger(
"nvvm.maxntid");
7200 return {0, ThreadLimit ? std::min(ThreadLimit, UB) : UB};
7202 return {0, ThreadLimit};
7205void OpenMPIRBuilder::writeThreadBoundsForKernel(
const Triple &
T,
7208 Kernel.addFnAttr(
"omp_target_thread_limit", std::to_string(UB));
7211 Kernel.addFnAttr(
"amdgpu-flat-work-group-size",
7219std::pair<int32_t, int32_t>
7222 return {0,
Kernel.getFnAttributeAsParsedInteger(
"omp_target_num_teams")};
7226 int32_t LB, int32_t UB) {
7233 Kernel.addFnAttr(
"omp_target_num_teams", std::to_string(LB));
7236void OpenMPIRBuilder::setOutlinedTargetRegionFunctionAttributes(
7238 if (Config.isTargetDevice()) {
7245 else if (
T.isNVPTX())
7247 else if (
T.isSPIRV())
7254 if (Config.isTargetDevice()) {
7255 assert(OutlinedFn &&
"The outlined function must exist if embedded");
7264Constant *OpenMPIRBuilder::createTargetRegionEntryAddr(
Function *OutlinedFn,
7269 assert(!M.getGlobalVariable(EntryFnName,
true) &&
7270 "Named kernel already exists?");
7276Error OpenMPIRBuilder::emitTargetRegionFunction(
7277 TargetRegionEntryInfo &EntryInfo,
7278 FunctionGenCallback &GenerateFunctionCallback,
bool IsOffloadEntry,
7282 OffloadInfoManager.getTargetRegionEntryFnName(EntryFnName, EntryInfo);
7284 if (Config.isTargetDevice() || !Config.openMPOffloadMandatory()) {
7288 OutlinedFn = *CBResult;
7290 OutlinedFn =
nullptr;
7296 if (!IsOffloadEntry)
7299 std::string EntryFnIDName =
7300 Config.isTargetDevice()
7301 ? std::string(EntryFnName)
7302 : createPlatformSpecificName({EntryFnName,
"region_id"});
7304 OutlinedFnID = registerTargetRegionFunction(EntryInfo, OutlinedFn,
7305 EntryFnName, EntryFnIDName);
7309Constant *OpenMPIRBuilder::registerTargetRegionFunction(
7310 TargetRegionEntryInfo &EntryInfo,
Function *OutlinedFn,
7313 setOutlinedTargetRegionFunctionAttributes(OutlinedFn);
7314 auto OutlinedFnID = createOutlinedFunctionID(OutlinedFn, EntryFnIDName);
7315 auto EntryAddr = createTargetRegionEntryAddr(OutlinedFn, EntryFnName);
7316 OffloadInfoManager.registerTargetRegionEntryInfo(
7317 EntryInfo, EntryAddr, OutlinedFnID,
7318 OffloadEntriesInfoManager::OMPTargetRegionEntryTargetRegion);
7319 return OutlinedFnID;
7322OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTargetData(
7323 const LocationDescription &
Loc, InsertPointTy AllocaIP,
7324 InsertPointTy CodeGenIP,
Value *DeviceID,
Value *IfCond,
7325 TargetDataInfo &
Info, GenMapInfoCallbackTy GenMapInfoCB,
7327 function_ref<InsertPointOrErrorTy(InsertPointTy CodeGenIP,
7328 BodyGenTy BodyGenType)>
7331 if (!updateToLocation(
Loc))
7332 return InsertPointTy();
7334 Builder.restoreIP(CodeGenIP);
7336 if (Config.IsTargetDevice.value_or(
false)) {
7338 InsertPointOrErrorTy AfterIP =
7339 BodyGenCB(Builder.saveIP(), BodyGenTy::NoPriv);
7341 return AfterIP.takeError();
7342 Builder.restoreIP(*AfterIP);
7344 return Builder.saveIP();
7347 bool IsStandAlone = !BodyGenCB;
7348 MapInfosTy *MapInfo;
7352 auto BeginThenGen = [&](InsertPointTy AllocaIP,
7353 InsertPointTy CodeGenIP) ->
Error {
7354 MapInfo = &GenMapInfoCB(Builder.saveIP());
7355 if (
Error Err = emitOffloadingArrays(
7356 AllocaIP, Builder.saveIP(), *MapInfo,
Info, CustomMapperCB,
7357 true, DeviceAddrCB))
7360 TargetDataRTArgs RTArgs;
7361 emitOffloadingArraysArgument(Builder, RTArgs,
Info);
7364 Value *PointerNum = Builder.getInt32(
Info.NumberOfPtrs);
7369 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
7370 SrcLocInfo = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
7374 SrcLocInfo, DeviceID,
7375 PointerNum, RTArgs.BasePointersArray,
7376 RTArgs.PointersArray, RTArgs.SizesArray,
7377 RTArgs.MapTypesArray, RTArgs.MapNamesArray,
7378 RTArgs.MappersArray};
7381 assert(MapperFunc &&
"MapperFunc missing for standalone target data");
7385 if (
Info.HasNoWait) {
7392 createRuntimeFunctionCall(getOrCreateRuntimeFunctionPtr(*MapperFunc),
7395 if (
Info.HasNoWait) {
7399 emitBlock(OffloadContBlock, CurFn,
true);
7400 Builder.restoreIP(Builder.saveIP());
7405 bool RequiresOuterTargetTask =
Info.HasNoWait;
7406 if (!RequiresOuterTargetTask)
7407 cantFail(TaskBodyCB(
nullptr,
nullptr,
7410 cantFail(emitTargetTask(TaskBodyCB, DeviceID, SrcLocInfo, AllocaIP,
7411 {}, RTArgs,
Info.HasNoWait));
7413 Function *BeginMapperFunc = getOrCreateRuntimeFunctionPtr(
7414 omp::OMPRTL___tgt_target_data_begin_mapper);
7416 createRuntimeFunctionCall(BeginMapperFunc, OffloadingArgs);
7418 for (
auto DeviceMap :
Info.DevicePtrInfoMap) {
7421 Builder.CreateLoad(Builder.getPtrTy(), DeviceMap.second.first);
7422 Builder.CreateStore(LI, DeviceMap.second.second);
7429 InsertPointOrErrorTy AfterIP =
7430 BodyGenCB(Builder.saveIP(), BodyGenTy::Priv);
7432 return AfterIP.takeError();
7433 Builder.restoreIP(*AfterIP);
7441 auto BeginElseGen = [&](InsertPointTy AllocaIP,
7442 InsertPointTy CodeGenIP) ->
Error {
7443 InsertPointOrErrorTy AfterIP =
7444 BodyGenCB(Builder.saveIP(), BodyGenTy::DupNoPriv);
7446 return AfterIP.takeError();
7447 Builder.restoreIP(*AfterIP);
7452 auto EndThenGen = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
7453 TargetDataRTArgs RTArgs;
7454 Info.EmitDebug = !MapInfo->Names.empty();
7455 emitOffloadingArraysArgument(Builder, RTArgs,
Info,
true);
7458 Value *PointerNum = Builder.getInt32(
Info.NumberOfPtrs);
7463 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
7464 SrcLocInfo = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
7467 Value *OffloadingArgs[] = {SrcLocInfo, DeviceID,
7468 PointerNum, RTArgs.BasePointersArray,
7469 RTArgs.PointersArray, RTArgs.SizesArray,
7470 RTArgs.MapTypesArray, RTArgs.MapNamesArray,
7471 RTArgs.MappersArray};
7473 getOrCreateRuntimeFunctionPtr(omp::OMPRTL___tgt_target_data_end_mapper);
7475 createRuntimeFunctionCall(EndMapperFunc, OffloadingArgs);
7481 auto EndElseGen = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
7489 return emitIfClause(IfCond, BeginThenGen, BeginElseGen, AllocaIP);
7490 return BeginThenGen(AllocaIP, Builder.saveIP());
7498 InsertPointOrErrorTy AfterIP =
7499 BodyGenCB(Builder.saveIP(), BodyGenTy::NoPriv);
7501 return AfterIP.takeError();
7505 return emitIfClause(IfCond, EndThenGen, EndElseGen, AllocaIP);
7506 return EndThenGen(AllocaIP, Builder.saveIP());
7509 return emitIfClause(IfCond, BeginThenGen, EndElseGen, AllocaIP);
7510 return BeginThenGen(AllocaIP, Builder.saveIP());
7516 return Builder.saveIP();
7520OpenMPIRBuilder::createForStaticInitFunction(
unsigned IVSize,
bool IVSigned,
7521 bool IsGPUDistribute) {
7522 assert((IVSize == 32 || IVSize == 64) &&
7523 "IV size is not compatible with the omp runtime");
7525 if (IsGPUDistribute)
7527 ? (IVSigned ? omp::OMPRTL___kmpc_distribute_static_init_4
7528 : omp::OMPRTL___kmpc_distribute_static_init_4u)
7529 : (IVSigned ?
omp::OMPRTL___kmpc_distribute_static_init_8
7530 :
omp::OMPRTL___kmpc_distribute_static_init_8u);
7532 Name = IVSize == 32 ? (IVSigned ? omp::OMPRTL___kmpc_for_static_init_4
7533 : omp::OMPRTL___kmpc_for_static_init_4u)
7534 : (IVSigned ? omp::OMPRTL___kmpc_for_static_init_8
7535 : omp::OMPRTL___kmpc_for_static_init_8u);
7537 return getOrCreateRuntimeFunction(M, Name);
7540FunctionCallee OpenMPIRBuilder::createDispatchInitFunction(
unsigned IVSize,
7542 assert((IVSize == 32 || IVSize == 64) &&
7543 "IV size is not compatible with the omp runtime");
7545 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_init_4
7546 : omp::OMPRTL___kmpc_dispatch_init_4u)
7547 : (IVSigned ?
omp::OMPRTL___kmpc_dispatch_init_8
7548 :
omp::OMPRTL___kmpc_dispatch_init_8u);
7550 return getOrCreateRuntimeFunction(M, Name);
7553FunctionCallee OpenMPIRBuilder::createDispatchNextFunction(
unsigned IVSize,
7555 assert((IVSize == 32 || IVSize == 64) &&
7556 "IV size is not compatible with the omp runtime");
7558 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_next_4
7559 : omp::OMPRTL___kmpc_dispatch_next_4u)
7560 : (IVSigned ?
omp::OMPRTL___kmpc_dispatch_next_8
7561 :
omp::OMPRTL___kmpc_dispatch_next_8u);
7563 return getOrCreateRuntimeFunction(M, Name);
7566FunctionCallee OpenMPIRBuilder::createDispatchFiniFunction(
unsigned IVSize,
7568 assert((IVSize == 32 || IVSize == 64) &&
7569 "IV size is not compatible with the omp runtime");
7571 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_fini_4
7572 : omp::OMPRTL___kmpc_dispatch_fini_4u)
7573 : (IVSigned ?
omp::OMPRTL___kmpc_dispatch_fini_8
7574 :
omp::OMPRTL___kmpc_dispatch_fini_8u);
7576 return getOrCreateRuntimeFunction(M, Name);
7580 return getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_dispatch_deinit);
7585 DenseMap<
Value *, std::tuple<Value *, unsigned>> &ValueReplacementMap) {
7593 auto GetUpdatedDIVariable = [&](
DILocalVariable *OldVar,
unsigned arg) {
7597 if (NewVar && (arg == NewVar->
getArg()))
7607 auto UpdateDebugRecord = [&](
auto *DR) {
7610 for (
auto Loc : DR->location_ops()) {
7611 auto Iter = ValueReplacementMap.find(
Loc);
7612 if (Iter != ValueReplacementMap.end()) {
7613 DR->replaceVariableLocationOp(
Loc, std::get<0>(Iter->second));
7614 ArgNo = std::get<1>(Iter->second) + 1;
7618 DR->setVariable(GetUpdatedDIVariable(OldVar, ArgNo));
7625 "Unexpected debug intrinsic");
7627 UpdateDebugRecord(&DVR);
7630 if (OMPBuilder.Config.isTargetDevice()) {
7632 Module *M = Func->getParent();
7635 DB.createQualifiedType(dwarf::DW_TAG_pointer_type,
nullptr);
7637 NewSP,
"dyn_ptr", 1, NewSP->
getFile(), 0,
7638 VoidPtrTy,
false, DINode::DIFlags::FlagArtificial);
7640 DB.insertDeclare(&(*Func->arg_begin()), Var, DB.createExpression(),
Loc,
7653 const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs,
7655 OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc,
7656 OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB) {
7658 if (OMPBuilder.Config.isTargetDevice()) {
7666 for (
auto &Arg : Inputs)
7671 for (
auto &Arg : Inputs)
7675 auto BB = Builder.GetInsertBlock();
7687 if (TargetCpuAttr.isStringAttribute())
7688 Func->addFnAttr(TargetCpuAttr);
7690 auto TargetFeaturesAttr = ParentFn->
getFnAttribute(
"target-features");
7691 if (TargetFeaturesAttr.isStringAttribute())
7692 Func->addFnAttr(TargetFeaturesAttr);
7694 if (OMPBuilder.Config.isTargetDevice()) {
7696 OMPBuilder.emitKernelExecutionMode(FuncName, DefaultAttrs.ExecFlags);
7697 OMPBuilder.emitUsed(
"llvm.compiler.used", {ExecMode});
7708 Builder.SetInsertPoint(EntryBB);
7711 if (OMPBuilder.Config.isTargetDevice())
7712 Builder.restoreIP(OMPBuilder.createTargetInit(Builder, DefaultAttrs));
7714 BasicBlock *UserCodeEntryBB = Builder.GetInsertBlock();
7719 if (OMPBuilder.Config.isTargetDevice())
7720 OMPBuilder.ConstantAllocaRaiseCandidates.emplace_back(Func);
7724 splitBB(Builder,
true,
"outlined.body");
7725 llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = CBFunc(
7727 OpenMPIRBuilder::InsertPointTy(OutlinedBodyBB, OutlinedBodyBB->
begin()));
7729 return AfterIP.takeError();
7730 Builder.restoreIP(*AfterIP);
7731 if (OMPBuilder.Config.isTargetDevice())
7732 OMPBuilder.createTargetDeinit(Builder);
7735 Builder.CreateRetVoid();
7739 auto AllocaIP = Builder.saveIP();
7744 const auto &ArgRange =
7745 OMPBuilder.Config.isTargetDevice()
7746 ?
make_range(Func->arg_begin() + 1, Func->arg_end())
7779 if (Instr->getFunction() == Func)
7780 Instr->replaceUsesOfWith(
Input, InputCopy);
7786 for (
auto InArg :
zip(Inputs, ArgRange)) {
7788 Argument &Arg = std::get<1>(InArg);
7789 Value *InputCopy =
nullptr;
7791 llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
7792 ArgAccessorFuncCB(Arg,
Input, InputCopy, AllocaIP, Builder.saveIP());
7794 return AfterIP.takeError();
7795 Builder.restoreIP(*AfterIP);
7796 ValueReplacementMap[
Input] = std::make_tuple(InputCopy, Arg.
getArgNo());
7816 DeferredReplacement.push_back(std::make_pair(
Input, InputCopy));
7823 ReplaceValue(
Input, InputCopy, Func);
7827 for (
auto Deferred : DeferredReplacement)
7828 ReplaceValue(std::get<0>(Deferred), std::get<1>(Deferred), Func);
7831 ValueReplacementMap);
7839 Value *TaskWithPrivates,
7840 Type *TaskWithPrivatesTy) {
7842 Type *TaskTy = OMPIRBuilder.Task;
7845 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskWithPrivates, 0);
7846 Value *Shareds = TaskT;
7856 if (TaskWithPrivatesTy != TaskTy)
7857 Shareds = Builder.CreateStructGEP(TaskTy, TaskT, 0);
7874 const size_t NumOffloadingArrays,
const int SharedArgsOperandNo) {
7879 assert((!NumOffloadingArrays || PrivatesTy) &&
7880 "PrivatesTy cannot be nullptr when there are offloadingArrays"
7883 Module &M = OMPBuilder.M;
7907 OpenMPIRBuilder::InsertPointTy IP(StaleCI->
getParent(),
7913 Type *TaskPtrTy = OMPBuilder.TaskPtr;
7914 [[maybe_unused]]
Type *TaskTy = OMPBuilder.Task;
7920 ".omp_target_task_proxy_func",
7921 Builder.GetInsertBlock()->getModule());
7922 Value *ThreadId = ProxyFn->getArg(0);
7923 Value *TaskWithPrivates = ProxyFn->getArg(1);
7924 ThreadId->
setName(
"thread.id");
7925 TaskWithPrivates->
setName(
"task");
7927 bool HasShareds = SharedArgsOperandNo > 0;
7928 bool HasOffloadingArrays = NumOffloadingArrays > 0;
7931 Builder.SetInsertPoint(EntryBB);
7937 if (HasOffloadingArrays) {
7938 assert(TaskTy != TaskWithPrivatesTy &&
7939 "If there are offloading arrays to pass to the target"
7940 "TaskTy cannot be the same as TaskWithPrivatesTy");
7943 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskWithPrivates, 1);
7944 for (
unsigned int i = 0; i < NumOffloadingArrays; ++i)
7946 Builder.CreateStructGEP(PrivatesTy, Privates, i));
7950 auto *ArgStructAlloca =
7952 assert(ArgStructAlloca &&
7953 "Unable to find the alloca instruction corresponding to arguments "
7954 "for extracted function");
7958 Builder.CreateAlloca(ArgStructType,
nullptr,
"structArg");
7960 Value *SharedsSize =
7961 Builder.getInt64(M.getDataLayout().getTypeStoreSize(ArgStructType));
7964 OMPBuilder, Builder, TaskWithPrivates, TaskWithPrivatesTy);
7966 Builder.CreateMemCpy(
7967 NewArgStructAlloca, NewArgStructAlloca->
getAlign(), LoadShared,
7969 KernelLaunchArgs.
push_back(NewArgStructAlloca);
7971 OMPBuilder.createRuntimeFunctionCall(KernelLaunchFunction, KernelLaunchArgs);
7972 Builder.CreateRetVoid();
7978 return GEP->getSourceElementType();
7980 return Alloca->getAllocatedType();
8003 if (OffloadingArraysToPrivatize.
empty())
8004 return OMPIRBuilder.Task;
8007 for (
Value *V : OffloadingArraysToPrivatize) {
8008 assert(V->getType()->isPointerTy() &&
8009 "Expected pointer to array to privatize. Got a non-pointer value "
8012 assert(ArrayTy &&
"ArrayType cannot be nullptr");
8018 "struct.task_with_privates");
8021 OpenMPIRBuilder &OMPBuilder,
IRBuilderBase &Builder,
bool IsOffloadEntry,
8022 TargetRegionEntryInfo &EntryInfo,
8023 const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs,
8026 OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc,
8027 OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB) {
8029 OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction =
8032 EntryFnName, Inputs, CBFunc,
8036 return OMPBuilder.emitTargetRegionFunction(
8037 EntryInfo, GenerateOutlinedFunction, IsOffloadEntry, OutlinedFn,
8041OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitTargetTask(
8042 TargetTaskBodyCallbackTy TaskBodyCB,
Value *DeviceID,
Value *RTLoc,
8043 OpenMPIRBuilder::InsertPointTy AllocaIP,
8045 const TargetDataRTArgs &RTArgs,
bool HasNoWait) {
8169 splitBB(Builder,
true,
"target.task.body");
8171 splitBB(Builder,
true,
"target.task.alloca");
8173 InsertPointTy TargetTaskAllocaIP(TargetTaskAllocaBB,
8174 TargetTaskAllocaBB->
begin());
8175 InsertPointTy TargetTaskBodyIP(TargetTaskBodyBB, TargetTaskBodyBB->
begin());
8178 OI.EntryBB = TargetTaskAllocaBB;
8179 OI.OuterAllocaBB = AllocaIP.getBlock();
8184 Builder, AllocaIP, ToBeDeleted, TargetTaskAllocaIP,
"global.tid",
false));
8187 Builder.restoreIP(TargetTaskBodyIP);
8188 if (
Error Err = TaskBodyCB(DeviceID, RTLoc, TargetTaskAllocaIP))
8202 emitBlock(OI.ExitBB, Builder.GetInsertBlock()->getParent(),
8206 bool NeedsTargetTask = HasNoWait && DeviceID;
8207 if (NeedsTargetTask) {
8209 {RTArgs.BasePointersArray, RTArgs.PointersArray, RTArgs.MappersArray,
8210 RTArgs.MapNamesArray, RTArgs.MapTypesArray, RTArgs.MapTypesArrayEnd,
8211 RTArgs.SizesArray}) {
8213 OffloadingArraysToPrivatize.
push_back(V);
8214 OI.ExcludeArgsFromAggregate.push_back(V);
8218 OI.PostOutlineCB = [
this, ToBeDeleted, Dependencies, NeedsTargetTask,
8219 DeviceID, OffloadingArraysToPrivatize](
8222 "there must be a single user for the outlined function");
8236 const unsigned int NumStaleCIArgs = StaleCI->
arg_size();
8237 bool HasShareds = NumStaleCIArgs > OffloadingArraysToPrivatize.
size() + 1;
8239 NumStaleCIArgs == (OffloadingArraysToPrivatize.
size() + 2)) &&
8240 "Wrong number of arguments for StaleCI when shareds are present");
8241 int SharedArgOperandNo =
8242 HasShareds ? OffloadingArraysToPrivatize.
size() + 1 : 0;
8248 if (!OffloadingArraysToPrivatize.
empty())
8253 *
this, Builder, StaleCI, PrivatesTy, TaskWithPrivatesTy,
8254 OffloadingArraysToPrivatize.
size(), SharedArgOperandNo);
8256 LLVM_DEBUG(
dbgs() <<
"Proxy task entry function created: " << *ProxyFn
8259 Builder.SetInsertPoint(StaleCI);
8264 getOrCreateSrcLocStr(LocationDescription(Builder), SrcLocStrSize);
8265 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
8274 ? getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc)
8275 : getOrCreateRuntimeFunctionPtr(
8276 OMPRTL___kmpc_omp_target_task_alloc);
8280 Value *ThreadID = getOrCreateThreadID(Ident);
8287 Value *TaskSize = Builder.getInt64(
8288 M.getDataLayout().getTypeStoreSize(TaskWithPrivatesTy));
8293 Value *SharedsSize = Builder.getInt64(0);
8295 auto *ArgStructAlloca =
8297 assert(ArgStructAlloca &&
8298 "Unable to find the alloca instruction corresponding to arguments "
8299 "for extracted function");
8300 auto *ArgStructType =
8302 assert(ArgStructType &&
"Unable to find struct type corresponding to "
8303 "arguments for extracted function");
8305 Builder.getInt64(M.getDataLayout().getTypeStoreSize(ArgStructType));
8314 Value *Flags = Builder.getInt32(0);
8324 TaskSize, SharedsSize,
8327 if (NeedsTargetTask) {
8328 assert(DeviceID &&
"Expected non-empty device ID.");
8332 TaskData = createRuntimeFunctionCall(TaskAllocFn, TaskAllocArgs);
8338 *
this, Builder, TaskData, TaskWithPrivatesTy);
8339 Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment,
8342 if (!OffloadingArraysToPrivatize.
empty()) {
8344 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskData, 1);
8345 for (
unsigned int i = 0; i < OffloadingArraysToPrivatize.
size(); ++i) {
8346 Value *PtrToPrivatize = OffloadingArraysToPrivatize[i];
8353 "ElementType should match ArrayType");
8356 Value *Dst = Builder.CreateStructGEP(PrivatesTy, Privates, i);
8357 Builder.CreateMemCpy(
8358 Dst, Alignment, PtrToPrivatize, Alignment,
8359 Builder.getInt64(M.getDataLayout().getTypeStoreSize(ElementType)));
8373 if (!NeedsTargetTask) {
8376 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_wait_deps);
8377 createRuntimeFunctionCall(
8380 Builder.getInt32(Dependencies.size()),
8382 ConstantInt::get(Builder.getInt32Ty(), 0),
8388 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_begin_if0);
8390 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_complete_if0);
8391 createRuntimeFunctionCall(TaskBeginFn, {Ident, ThreadID, TaskData});
8392 CallInst *CI = createRuntimeFunctionCall(ProxyFn, {ThreadID, TaskData});
8394 createRuntimeFunctionCall(TaskCompleteFn, {Ident, ThreadID, TaskData});
8395 }
else if (DepArray) {
8400 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_with_deps);
8401 createRuntimeFunctionCall(
8403 {Ident, ThreadID, TaskData, Builder.getInt32(Dependencies.size()),
8404 DepArray, ConstantInt::get(Builder.getInt32Ty(), 0),
8408 Function *TaskFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task);
8409 createRuntimeFunctionCall(TaskFn, {Ident, ThreadID, TaskData});
8414 I->eraseFromParent();
8416 addOutlineInfo(std::move(OI));
8419 << *(Builder.GetInsertBlock()) <<
"\n");
8421 << *(Builder.GetInsertBlock()->getParent()->getParent())
8423 return Builder.saveIP();
8426Error OpenMPIRBuilder::emitOffloadingArraysAndArgs(
8427 InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &
Info,
8428 TargetDataRTArgs &RTArgs, MapInfosTy &CombinedInfo,
8429 CustomMapperCallbackTy CustomMapperCB,
bool IsNonContiguous,
8432 emitOffloadingArrays(AllocaIP, CodeGenIP, CombinedInfo,
Info,
8433 CustomMapperCB, IsNonContiguous, DeviceAddrCB))
8435 emitOffloadingArraysArgument(Builder, RTArgs,
Info, ForEndCall);
8441 OpenMPIRBuilder::InsertPointTy AllocaIP,
8442 OpenMPIRBuilder::TargetDataInfo &
Info,
8443 const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs,
8444 const OpenMPIRBuilder::TargetKernelRuntimeAttrs &RuntimeAttrs,
8447 OpenMPIRBuilder::GenMapInfoCallbackTy GenMapInfoCB,
8448 OpenMPIRBuilder::CustomMapperCallbackTy CustomMapperCB,
8450 bool HasNoWait,
Value *DynCGroupMem,
8455 auto &&EmitTargetCallFallbackCB = [&](OpenMPIRBuilder::InsertPointTy IP)
8456 -> OpenMPIRBuilder::InsertPointOrErrorTy {
8457 Builder.restoreIP(IP);
8458 OMPBuilder.createRuntimeFunctionCall(OutlinedFn, Args);
8459 return Builder.saveIP();
8462 bool HasDependencies = Dependencies.
size() > 0;
8463 bool RequiresOuterTargetTask = HasNoWait || HasDependencies;
8465 OpenMPIRBuilder::TargetKernelArgs KArgs;
8472 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
cantFail([&]() {
8480 if (OutlinedFnID && DeviceID)
8481 return OMPBuilder.emitKernelLaunch(Builder, OutlinedFnID,
8482 EmitTargetCallFallbackCB, KArgs,
8483 DeviceID, RTLoc, TargetTaskAllocaIP);
8491 return EmitTargetCallFallbackCB(OMPBuilder.Builder.saveIP());
8494 OMPBuilder.Builder.restoreIP(AfterIP);
8498 auto &&EmitTargetCallElse =
8499 [&](OpenMPIRBuilder::InsertPointTy AllocaIP,
8500 OpenMPIRBuilder::InsertPointTy CodeGenIP) ->
Error {
8503 OpenMPIRBuilder::InsertPointTy AfterIP =
cantFail([&]() {
8504 if (RequiresOuterTargetTask) {
8508 OpenMPIRBuilder::TargetDataRTArgs EmptyRTArgs;
8509 return OMPBuilder.emitTargetTask(TaskBodyCB,
nullptr,
8511 Dependencies, EmptyRTArgs, HasNoWait);
8513 return EmitTargetCallFallbackCB(Builder.saveIP());
8516 Builder.restoreIP(AfterIP);
8520 auto &&EmitTargetCallThen =
8521 [&](OpenMPIRBuilder::InsertPointTy AllocaIP,
8522 OpenMPIRBuilder::InsertPointTy CodeGenIP) ->
Error {
8523 Info.HasNoWait = HasNoWait;
8524 OpenMPIRBuilder::MapInfosTy &MapInfo = GenMapInfoCB(Builder.saveIP());
8525 OpenMPIRBuilder::TargetDataRTArgs RTArgs;
8526 if (
Error Err = OMPBuilder.emitOffloadingArraysAndArgs(
8527 AllocaIP, Builder.saveIP(),
Info, RTArgs, MapInfo, CustomMapperCB,
8534 zip_equal(DefaultAttrs.MaxTeams, RuntimeAttrs.MaxTeams))
8535 NumTeamsC.
push_back(RuntimeVal ? RuntimeVal
8540 auto InitMaxThreadsClause = [&Builder](
Value *
Clause) {
8542 Clause = Builder.CreateIntCast(
Clause, Builder.getInt32Ty(),
8546 auto CombineMaxThreadsClauses = [&Builder](
Value *
Clause,
Value *&Result) {
8549 Result ? Builder.CreateSelect(Builder.CreateICmpULT(Result,
Clause),
8557 Value *MaxThreadsClause =
8558 RuntimeAttrs.TeamsThreadLimit.size() == 1
8559 ? InitMaxThreadsClause(RuntimeAttrs.MaxThreads)
8562 for (
auto [TeamsVal, TargetVal] :
zip_equal(
8563 RuntimeAttrs.TeamsThreadLimit, RuntimeAttrs.TargetThreadLimit)) {
8564 Value *TeamsThreadLimitClause = InitMaxThreadsClause(TeamsVal);
8565 Value *NumThreads = InitMaxThreadsClause(TargetVal);
8567 CombineMaxThreadsClauses(TeamsThreadLimitClause, NumThreads);
8568 CombineMaxThreadsClauses(MaxThreadsClause, NumThreads);
8570 NumThreadsC.
push_back(NumThreads ? NumThreads : Builder.getInt32(0));
8573 unsigned NumTargetItems =
Info.NumberOfPtrs;
8577 Constant *SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
8578 Value *RTLoc = OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize,
8581 Value *TripCount = RuntimeAttrs.LoopTripCount
8582 ? Builder.CreateIntCast(RuntimeAttrs.LoopTripCount,
8583 Builder.getInt64Ty(),
8585 : Builder.getInt64(0);
8589 DynCGroupMem = Builder.getInt32(0);
8591 KArgs = OpenMPIRBuilder::TargetKernelArgs(
8592 NumTargetItems, RTArgs, TripCount, NumTeamsC, NumThreadsC, DynCGroupMem,
8593 HasNoWait, DynCGroupMemFallback);
8597 OpenMPIRBuilder::InsertPointTy AfterIP =
cantFail([&]() {
8600 if (RequiresOuterTargetTask)
8601 return OMPBuilder.emitTargetTask(TaskBodyCB, DeviceID, RTLoc, AllocaIP,
8602 Dependencies, KArgs.RTArgs,
8605 return OMPBuilder.emitKernelLaunch(Builder, OutlinedFnID,
8606 EmitTargetCallFallbackCB, KArgs,
8607 DeviceID, RTLoc, AllocaIP);
8610 Builder.restoreIP(AfterIP);
8617 if (!OutlinedFnID) {
8618 cantFail(EmitTargetCallElse(AllocaIP, Builder.saveIP()));
8624 cantFail(EmitTargetCallThen(AllocaIP, Builder.saveIP()));
8628 cantFail(OMPBuilder.emitIfClause(IfCond, EmitTargetCallThen,
8629 EmitTargetCallElse, AllocaIP));
8632OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTarget(
8633 const LocationDescription &
Loc,
bool IsOffloadEntry, InsertPointTy AllocaIP,
8634 InsertPointTy CodeGenIP, TargetDataInfo &
Info,
8635 TargetRegionEntryInfo &EntryInfo,
8636 const TargetKernelDefaultAttrs &DefaultAttrs,
8637 const TargetKernelRuntimeAttrs &RuntimeAttrs,
Value *IfCond,
8639 OpenMPIRBuilder::TargetBodyGenCallbackTy CBFunc,
8640 OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy ArgAccessorFuncCB,
8641 CustomMapperCallbackTy CustomMapperCB,
8645 if (!updateToLocation(
Loc))
8646 return InsertPointTy();
8648 Builder.restoreIP(CodeGenIP);
8656 *
this, Builder, IsOffloadEntry, EntryInfo, DefaultAttrs, OutlinedFn,
8657 OutlinedFnID, Inputs, CBFunc, ArgAccessorFuncCB))
8663 if (!Config.isTargetDevice())
8665 IfCond, OutlinedFn, OutlinedFnID, Inputs, GenMapInfoCB,
8666 CustomMapperCB, Dependencies, HasNowait, DynCGroupMem,
8667 DynCGroupMemFallback);
8668 return Builder.saveIP();
8681 return OS.
str().str();
8686 return OpenMPIRBuilder::getNameWithSeparators(Parts, Config.firstSeparator(),
8687 Config.separator());
8692 auto &Elem = *InternalVars.try_emplace(Name,
nullptr).first;
8694 assert(Elem.second->getValueType() == Ty &&
8695 "OMP internal variable has different type than requested");
8708 : M.getTargetTriple().isAMDGPU()
8710 :
DL.getDefaultGlobalsAddressSpace();
8719 const llvm::Align PtrAlign =
DL.getPointerABIAlignment(AddressSpaceVal);
8720 GV->setAlignment(std::max(TypeAlign, PtrAlign));
8727Value *OpenMPIRBuilder::getOMPCriticalRegionLock(
StringRef CriticalName) {
8728 std::string
Prefix =
Twine(
"gomp_critical_user_", CriticalName).
str();
8729 std::string
Name = getNameWithSeparators({
Prefix,
"var"},
".",
".");
8730 return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
8733Value *OpenMPIRBuilder::getSizeInBytes(
Value *BasePtr) {
8738 Builder.CreateGEP(
BasePtr->getType(),
Null, Builder.getInt32(1));
8740 return SizePtrToInt;
8745 std::string VarName) {
8749 M, MaptypesArrayInit->
getType(),
8753 return MaptypesArrayGlobal;
8756void OpenMPIRBuilder::createMapperAllocas(
const LocationDescription &
Loc,
8757 InsertPointTy AllocaIP,
8758 unsigned NumOperands,
8759 struct MapperAllocas &MapperAllocas) {
8760 if (!updateToLocation(
Loc))
8765 Builder.restoreIP(AllocaIP);
8767 ArrI8PtrTy,
nullptr,
".offload_baseptrs");
8771 ArrI64Ty,
nullptr,
".offload_sizes");
8772 updateToLocation(
Loc);
8773 MapperAllocas.ArgsBase = ArgsBase;
8774 MapperAllocas.Args =
Args;
8775 MapperAllocas.ArgSizes = ArgSizes;
8778void OpenMPIRBuilder::emitMapperCall(
const LocationDescription &
Loc,
8781 struct MapperAllocas &MapperAllocas,
8782 int64_t DeviceID,
unsigned NumOperands) {
8783 if (!updateToLocation(
Loc))
8788 Value *ArgsBaseGEP =
8789 Builder.CreateInBoundsGEP(ArrI8PtrTy, MapperAllocas.ArgsBase,
8790 {Builder.getInt32(0), Builder.getInt32(0)});
8792 Builder.CreateInBoundsGEP(ArrI8PtrTy, MapperAllocas.Args,
8793 {Builder.getInt32(0), Builder.getInt32(0)});
8794 Value *ArgSizesGEP =
8795 Builder.CreateInBoundsGEP(ArrI64Ty, MapperAllocas.ArgSizes,
8796 {Builder.getInt32(0), Builder.getInt32(0)});
8799 createRuntimeFunctionCall(MapperFunc, {SrcLocInfo, Builder.getInt64(DeviceID),
8800 Builder.getInt32(NumOperands),
8801 ArgsBaseGEP, ArgsGEP, ArgSizesGEP,
8802 MaptypesArg, MapnamesArg, NullPtr});
8805void OpenMPIRBuilder::emitOffloadingArraysArgument(
IRBuilderBase &Builder,
8806 TargetDataRTArgs &RTArgs,
8807 TargetDataInfo &
Info,
8809 assert((!ForEndCall ||
Info.separateBeginEndCalls()) &&
8810 "expected region end call to runtime only when end call is separate");
8812 auto VoidPtrTy = UnqualPtrTy;
8813 auto VoidPtrPtrTy = UnqualPtrTy;
8815 auto Int64PtrTy = UnqualPtrTy;
8817 if (!
Info.NumberOfPtrs) {
8827 RTArgs.BasePointersArray = Builder.CreateConstInBoundsGEP2_32(
8829 Info.RTArgs.BasePointersArray,
8831 RTArgs.PointersArray = Builder.CreateConstInBoundsGEP2_32(
8835 RTArgs.SizesArray = Builder.CreateConstInBoundsGEP2_32(
8838 RTArgs.MapTypesArray = Builder.CreateConstInBoundsGEP2_32(
8840 ForEndCall &&
Info.RTArgs.MapTypesArrayEnd ?
Info.RTArgs.MapTypesArrayEnd
8841 :
Info.RTArgs.MapTypesArray,
8847 if (!
Info.EmitDebug)
8850 RTArgs.MapNamesArray = Builder.CreateConstInBoundsGEP2_32(
8856 if (!
Info.HasMapper)
8859 RTArgs.MappersArray =
8860 Builder.CreatePointerCast(
Info.RTArgs.MappersArray, VoidPtrPtrTy);
8863void OpenMPIRBuilder::emitNonContiguousDescriptor(InsertPointTy AllocaIP,
8864 InsertPointTy CodeGenIP,
8865 MapInfosTy &CombinedInfo,
8866 TargetDataInfo &
Info) {
8867 MapInfosTy::StructNonContiguousInfo &NonContigInfo =
8868 CombinedInfo.NonContigInfo;
8881 "struct.descriptor_dim");
8883 enum { OffsetFD = 0, CountFD, StrideFD };
8887 for (
unsigned I = 0, L = 0,
E = NonContigInfo.Dims.size();
I <
E; ++
I) {
8890 if (NonContigInfo.Dims[
I] == 1)
8892 Builder.restoreIP(AllocaIP);
8895 Builder.CreateAlloca(ArrayTy,
nullptr,
"dims");
8896 Builder.restoreIP(CodeGenIP);
8897 for (
unsigned II = 0, EE = NonContigInfo.Dims[
I];
II < EE; ++
II) {
8898 unsigned RevIdx = EE -
II - 1;
8899 Value *DimsLVal = Builder.CreateInBoundsGEP(
8901 {Builder.getInt64(0), Builder.getInt64(II)});
8903 Value *OffsetLVal = Builder.CreateStructGEP(DimTy, DimsLVal, OffsetFD);
8904 Builder.CreateAlignedStore(
8905 NonContigInfo.Offsets[L][RevIdx], OffsetLVal,
8906 M.getDataLayout().getPrefTypeAlign(OffsetLVal->
getType()));
8908 Value *CountLVal = Builder.CreateStructGEP(DimTy, DimsLVal, CountFD);
8909 Builder.CreateAlignedStore(
8910 NonContigInfo.Counts[L][RevIdx], CountLVal,
8911 M.getDataLayout().getPrefTypeAlign(CountLVal->
getType()));
8913 Value *StrideLVal = Builder.CreateStructGEP(DimTy, DimsLVal, StrideFD);
8914 Builder.CreateAlignedStore(
8915 NonContigInfo.Strides[L][RevIdx], StrideLVal,
8916 M.getDataLayout().getPrefTypeAlign(CountLVal->
getType()));
8919 Builder.restoreIP(CodeGenIP);
8920 Value *DAddr = Builder.CreatePointerBitCastOrAddrSpaceCast(
8921 DimsAddr, Builder.getPtrTy());
8922 Value *
P = Builder.CreateConstInBoundsGEP2_32(
8924 Info.RTArgs.PointersArray, 0,
I);
8925 Builder.CreateAlignedStore(
8926 DAddr,
P, M.getDataLayout().getPrefTypeAlign(Builder.getPtrTy()));
8931void OpenMPIRBuilder::emitUDMapperArrayInitOrDel(
8939 M.getContext(), createPlatformSpecificName({
"omp.array",
Prefix}));
8941 Builder.CreateICmpSGT(
Size, Builder.getInt64(1),
"omp.arrayinit.isarray");
8942 Value *DeleteBit = Builder.CreateAnd(
8945 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8946 OpenMPOffloadMappingFlags::OMP_MAP_DELETE)));
8951 Value *BaseIsBegin = Builder.CreateICmpNE(
Base, Begin);
8953 Value *PtrAndObjBit = Builder.CreateAnd(
8956 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8957 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ)));
8958 PtrAndObjBit = Builder.CreateIsNotNull(PtrAndObjBit);
8959 BaseIsBegin = Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
8960 Cond = Builder.CreateOr(IsArray, BaseIsBegin);
8961 DeleteCond = Builder.CreateIsNull(
8963 createPlatformSpecificName({
"omp.array",
Prefix,
".delete"}));
8966 DeleteCond = Builder.CreateIsNotNull(
8968 createPlatformSpecificName({
"omp.array",
Prefix,
".delete"}));
8970 Cond = Builder.CreateAnd(
Cond, DeleteCond);
8971 Builder.CreateCondBr(
Cond, BodyBB, ExitBB);
8973 emitBlock(BodyBB, MapperFn);
8976 Value *ArraySize = Builder.CreateNUWMul(
Size, Builder.getInt64(ElementSize));
8979 Value *MapTypeArg = Builder.CreateAnd(
8982 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8983 OpenMPOffloadMappingFlags::OMP_MAP_TO |
8984 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
8985 MapTypeArg = Builder.CreateOr(
8988 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
8989 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT)));
8993 Value *OffloadingArgs[] = {MapperHandle,
Base, Begin,
8994 ArraySize, MapTypeArg, MapName};
8995 createRuntimeFunctionCall(
8996 getOrCreateRuntimeFunction(M, OMPRTL___tgt_push_mapper_component),
9004 Type *ElemTy,
StringRef FuncName, CustomMapperCallbackTy CustomMapperCB) {
9020 MapperFn->
addFnAttr(Attribute::NoInline);
9021 MapperFn->
addFnAttr(Attribute::NoUnwind);
9031 auto SavedIP = Builder.saveIP();
9032 Builder.SetInsertPoint(EntryBB);
9044 TypeSize ElementSize = M.getDataLayout().getTypeStoreSize(ElemTy);
9045 Size = Builder.CreateExactUDiv(
Size, Builder.getInt64(ElementSize));
9046 Value *PtrBegin = BeginIn;
9047 Value *PtrEnd = Builder.CreateGEP(ElemTy, PtrBegin,
Size);
9052 emitUDMapperArrayInitOrDel(MapperFn, MapperHandle, BaseIn, BeginIn,
Size,
9053 MapType, MapName, ElementSize, HeadBB,
9059 emitBlock(HeadBB, MapperFn);
9064 Builder.CreateICmpEQ(PtrBegin, PtrEnd,
"omp.arraymap.isempty");
9065 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9068 emitBlock(BodyBB, MapperFn);
9071 Builder.CreatePHI(PtrBegin->
getType(), 2,
"omp.arraymap.ptrcurrent");
9075 MapInfosOrErrorTy
Info = GenMapInfoCB(Builder.saveIP(), PtrPHI, BeginIn);
9077 return Info.takeError();
9081 Value *OffloadingArgs[] = {MapperHandle};
9082 Value *PreviousSize = createRuntimeFunctionCall(
9083 getOrCreateRuntimeFunction(M, OMPRTL___tgt_mapper_num_components),
9085 Value *ShiftedPreviousSize =
9086 Builder.CreateShl(PreviousSize, Builder.getInt64(getFlagMemberOffset()));
9089 for (
unsigned I = 0;
I <
Info->BasePointers.size(); ++
I) {
9098 Value *OriMapType = Builder.getInt64(
9099 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9101 Value *MemberMapType =
9102 Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9116 Value *LeftToFrom = Builder.CreateAnd(
9119 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9120 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9121 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9130 Value *IsAlloc = Builder.CreateIsNull(LeftToFrom);
9131 Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9133 emitBlock(AllocBB, MapperFn);
9134 Value *AllocMapType = Builder.CreateAnd(
9137 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9138 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9139 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9140 Builder.CreateBr(EndBB);
9141 emitBlock(AllocElseBB, MapperFn);
9142 Value *IsTo = Builder.CreateICmpEQ(
9145 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9146 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
9147 Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9149 emitBlock(ToBB, MapperFn);
9150 Value *ToMapType = Builder.CreateAnd(
9153 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9154 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9155 Builder.CreateBr(EndBB);
9156 emitBlock(ToElseBB, MapperFn);
9157 Value *IsFrom = Builder.CreateICmpEQ(
9160 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9161 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9162 Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9164 emitBlock(FromBB, MapperFn);
9165 Value *FromMapType = Builder.CreateAnd(
9168 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9169 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
9171 emitBlock(EndBB, MapperFn);
9174 Builder.CreatePHI(Builder.getInt64Ty(), 4,
"omp.maptype");
9180 Value *OffloadingArgs[] = {MapperHandle, CurBaseArg, CurBeginArg,
9181 CurSizeArg, CurMapType, CurNameArg};
9183 auto ChildMapperFn = CustomMapperCB(
I);
9185 return ChildMapperFn.takeError();
9186 if (*ChildMapperFn) {
9188 createRuntimeFunctionCall(*ChildMapperFn, OffloadingArgs)
9189 ->setDoesNotThrow();
9193 createRuntimeFunctionCall(
9194 getOrCreateRuntimeFunction(M, OMPRTL___tgt_push_mapper_component),
9201 Value *PtrNext = Builder.CreateConstGEP1_32(ElemTy, PtrPHI, 1,
9202 "omp.arraymap.next");
9204 Value *IsDone = Builder.CreateICmpEQ(PtrNext, PtrEnd,
"omp.arraymap.isdone");
9206 Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9208 emitBlock(ExitBB, MapperFn);
9211 emitUDMapperArrayInitOrDel(MapperFn, MapperHandle, BaseIn, BeginIn,
Size,
9212 MapType, MapName, ElementSize, DoneBB,
9216 emitBlock(DoneBB, MapperFn,
true);
9218 Builder.CreateRetVoid();
9219 Builder.restoreIP(SavedIP);
9223Error OpenMPIRBuilder::emitOffloadingArrays(
9224 InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo,
9225 TargetDataInfo &
Info, CustomMapperCallbackTy CustomMapperCB,
9226 bool IsNonContiguous,
9230 Info.clearArrayInfo();
9231 Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
9233 if (
Info.NumberOfPtrs == 0)
9236 Builder.restoreIP(AllocaIP);
9242 Info.RTArgs.BasePointersArray = Builder.CreateAlloca(
9243 PointerArrayType,
nullptr,
".offload_baseptrs");
9245 Info.RTArgs.PointersArray = Builder.CreateAlloca(
9246 PointerArrayType,
nullptr,
".offload_ptrs");
9247 AllocaInst *MappersArray = Builder.CreateAlloca(
9248 PointerArrayType,
nullptr,
".offload_mappers");
9249 Info.RTArgs.MappersArray = MappersArray;
9256 ConstantInt::get(Int64Ty, 0));
9258 for (
unsigned I = 0,
E = CombinedInfo.Sizes.size();
I <
E; ++
I) {
9261 if (IsNonContiguous &&
9262 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9263 CombinedInfo.Types[
I] &
9264 OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG))
9266 ConstantInt::get(Int64Ty, CombinedInfo.NonContigInfo.Dims[
I]);
9272 RuntimeSizes.set(
I);
9275 if (RuntimeSizes.all()) {
9277 Info.RTArgs.SizesArray = Builder.CreateAlloca(
9278 SizeArrayType,
nullptr,
".offload_sizes");
9283 std::string
Name = createPlatformSpecificName({
"offload_sizes"});
9284 auto *SizesArrayGbl =
9289 if (!RuntimeSizes.any()) {
9290 Info.RTArgs.SizesArray = SizesArrayGbl;
9292 unsigned IndexSize = M.getDataLayout().getIndexSizeInBits(0);
9293 Align OffloadSizeAlign = M.getDataLayout().getABIIntegerTypeAlignment(64);
9296 SizeArrayType,
nullptr,
".offload_sizes");
9299 Builder.CreateMemCpy(
9300 Buffer, M.getDataLayout().getPrefTypeAlign(Buffer->
getType()),
9301 SizesArrayGbl, OffloadSizeAlign,
9306 Info.RTArgs.SizesArray = Buffer;
9314 for (
auto mapFlag : CombinedInfo.Types)
9316 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9318 std::string MaptypesName = createPlatformSpecificName({
"offload_maptypes"});
9319 auto *MapTypesArrayGbl = createOffloadMaptypes(Mapping, MaptypesName);
9320 Info.RTArgs.MapTypesArray = MapTypesArrayGbl;
9323 if (!CombinedInfo.Names.empty()) {
9324 auto *MapNamesArrayGbl = createOffloadMapnames(
9325 CombinedInfo.Names, createPlatformSpecificName({
"offload_mapnames"}));
9326 Info.RTArgs.MapNamesArray = MapNamesArrayGbl;
9327 Info.EmitDebug =
true;
9329 Info.RTArgs.MapNamesArray =
9331 Info.EmitDebug =
false;
9336 if (
Info.separateBeginEndCalls()) {
9337 bool EndMapTypesDiffer =
false;
9339 if (
Type &
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9340 OpenMPOffloadMappingFlags::OMP_MAP_PRESENT)) {
9341 Type &= ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9342 OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
9343 EndMapTypesDiffer =
true;
9346 if (EndMapTypesDiffer) {
9347 MapTypesArrayGbl = createOffloadMaptypes(Mapping, MaptypesName);
9348 Info.RTArgs.MapTypesArrayEnd = MapTypesArrayGbl;
9353 for (
unsigned I = 0;
I <
Info.NumberOfPtrs; ++
I) {
9354 Value *BPVal = CombinedInfo.BasePointers[
I];
9355 Value *BP = Builder.CreateConstInBoundsGEP2_32(
9358 Builder.CreateAlignedStore(BPVal, BP,
9359 M.getDataLayout().getPrefTypeAlign(PtrTy));
9361 if (
Info.requiresDevicePointerInfo()) {
9362 if (CombinedInfo.DevicePointers[
I] == DeviceInfoTy::Pointer) {
9363 CodeGenIP = Builder.saveIP();
9364 Builder.restoreIP(AllocaIP);
9365 Info.DevicePtrInfoMap[BPVal] = {BP, Builder.CreateAlloca(PtrTy)};
9366 Builder.restoreIP(CodeGenIP);
9368 DeviceAddrCB(
I,
Info.DevicePtrInfoMap[BPVal].second);
9369 }
else if (CombinedInfo.DevicePointers[
I] == DeviceInfoTy::Address) {
9370 Info.DevicePtrInfoMap[BPVal] = {BP, BP};
9372 DeviceAddrCB(
I, BP);
9376 Value *PVal = CombinedInfo.Pointers[
I];
9377 Value *
P = Builder.CreateConstInBoundsGEP2_32(
9381 Builder.CreateAlignedStore(PVal,
P,
9382 M.getDataLayout().getPrefTypeAlign(PtrTy));
9384 if (RuntimeSizes.test(
I)) {
9385 Value *S = Builder.CreateConstInBoundsGEP2_32(
9389 Builder.CreateAlignedStore(Builder.CreateIntCast(CombinedInfo.Sizes[
I],
9392 S, M.getDataLayout().getPrefTypeAlign(PtrTy));
9395 unsigned IndexSize = M.getDataLayout().getIndexSizeInBits(0);
9398 auto CustomMFunc = CustomMapperCB(
I);
9400 return CustomMFunc.takeError();
9402 MFunc = Builder.CreatePointerCast(*CustomMFunc, PtrTy);
9404 Value *MAddr = Builder.CreateInBoundsGEP(
9406 {Builder.getIntN(IndexSize, 0), Builder.getIntN(IndexSize, I)});
9407 Builder.CreateAlignedStore(
9408 MFunc, MAddr, M.getDataLayout().getPrefTypeAlign(MAddr->
getType()));
9411 if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() ||
9412 Info.NumberOfPtrs == 0)
9414 emitNonContiguousDescriptor(AllocaIP, CodeGenIP, CombinedInfo,
Info);
9419 BasicBlock *CurBB = Builder.GetInsertBlock();
9426 Builder.CreateBr(
Target);
9429 Builder.ClearInsertionPoint();
9434 BasicBlock *CurBB = Builder.GetInsertBlock();
9450 Builder.SetInsertPoint(BB);
9453Error OpenMPIRBuilder::emitIfClause(
Value *
Cond, BodyGenCallbackTy ThenGen,
9454 BodyGenCallbackTy ElseGen,
9455 InsertPointTy AllocaIP) {
9459 auto CondConstant = CI->getSExtValue();
9461 return ThenGen(AllocaIP, Builder.saveIP());
9463 return ElseGen(AllocaIP, Builder.saveIP());
9473 Builder.CreateCondBr(
Cond, ThenBlock, ElseBlock);
9475 emitBlock(ThenBlock, CurFn);
9476 if (
Error Err = ThenGen(AllocaIP, Builder.saveIP()))
9478 emitBranch(ContBlock);
9481 emitBlock(ElseBlock, CurFn);
9482 if (
Error Err = ElseGen(AllocaIP, Builder.saveIP()))
9485 emitBranch(ContBlock);
9487 emitBlock(ContBlock, CurFn,
true);
9491bool OpenMPIRBuilder::checkAndEmitFlushAfterAtomic(
9495 "Unexpected Atomic Ordering.");
9552OpenMPIRBuilder::InsertPointTy
9553OpenMPIRBuilder::createAtomicRead(
const LocationDescription &
Loc,
9554 AtomicOpValue &
X, AtomicOpValue &V,
9556 if (!updateToLocation(
Loc))
9559 assert(
X.Var->getType()->isPointerTy() &&
9560 "OMP Atomic expects a pointer to target memory");
9561 Type *XElemTy =
X.ElemTy;
9564 "OMP atomic read expected a scalar type");
9566 Value *XRead =
nullptr;
9570 Builder.CreateLoad(XElemTy,
X.Var,
X.IsVolatile,
"omp.atomic.read");
9576 LoadInst *OldVal = Builder.CreateLoad(XElemTy,
X.Var,
"omp.atomic.read");
9579 unsigned LoadSize =
DL.getTypeStoreSize(XElemTy);
9580 OpenMPIRBuilder::AtomicInfo atomicInfo(
9581 &Builder, XElemTy, LoadSize * 8, LoadSize * 8, OldVal->
getAlign(),
9582 OldVal->
getAlign(),
true , AllocaIP,
X.Var);
9583 auto AtomicLoadRes = atomicInfo.EmitAtomicLoadLibcall(AO);
9584 XRead = AtomicLoadRes.first;
9591 Builder.CreateLoad(IntCastTy,
X.Var,
X.IsVolatile,
"omp.atomic.load");
9594 XRead = Builder.CreateBitCast(XLoad, XElemTy,
"atomic.flt.cast");
9596 XRead = Builder.CreateIntToPtr(XLoad, XElemTy,
"atomic.ptr.cast");
9599 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Read);
9600 Builder.CreateStore(XRead, V.Var, V.IsVolatile);
9601 return Builder.saveIP();
9604OpenMPIRBuilder::InsertPointTy
9605OpenMPIRBuilder::createAtomicWrite(
const LocationDescription &
Loc,
9606 AtomicOpValue &
X,
Value *Expr,
9608 if (!updateToLocation(
Loc))
9611 assert(
X.Var->getType()->isPointerTy() &&
9612 "OMP Atomic expects a pointer to target memory");
9613 Type *XElemTy =
X.ElemTy;
9616 "OMP atomic write expected a scalar type");
9619 StoreInst *XSt = Builder.CreateStore(Expr,
X.Var,
X.IsVolatile);
9622 LoadInst *OldVal = Builder.CreateLoad(XElemTy,
X.Var,
"omp.atomic.read");
9624 unsigned LoadSize =
DL.getTypeStoreSize(XElemTy);
9625 OpenMPIRBuilder::AtomicInfo atomicInfo(
9626 &Builder, XElemTy, LoadSize * 8, LoadSize * 8, OldVal->
getAlign(),
9627 OldVal->
getAlign(),
true , AllocaIP,
X.Var);
9628 atomicInfo.EmitAtomicStoreLibcall(AO, Expr);
9635 Builder.CreateBitCast(Expr, IntCastTy,
"atomic.src.int.cast");
9636 StoreInst *XSt = Builder.CreateStore(ExprCast,
X.Var,
X.IsVolatile);
9640 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Write);
9641 return Builder.saveIP();
9644OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createAtomicUpdate(
9645 const LocationDescription &
Loc, InsertPointTy AllocaIP, AtomicOpValue &
X,
9647 AtomicUpdateCallbackTy &UpdateOp,
bool IsXBinopExpr,
9648 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
9650 if (!updateToLocation(
Loc))
9654 Type *XTy =
X.Var->getType();
9656 "OMP Atomic expects a pointer to target memory");
9657 Type *XElemTy =
X.ElemTy;
9660 "OMP atomic update expected a scalar type");
9663 "OpenMP atomic does not support LT or GT operations");
9667 AllocaIP,
X.Var,
X.ElemTy, Expr, AO, RMWOp, UpdateOp,
X.IsVolatile,
9668 IsXBinopExpr, IsIgnoreDenormalMode, IsFineGrainedMemory, IsRemoteMemory);
9670 return AtomicResult.takeError();
9671 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Update);
9672 return Builder.saveIP();
9676Value *OpenMPIRBuilder::emitRMWOpAsInstruction(
Value *Src1,
Value *Src2,
9680 return Builder.CreateAdd(Src1, Src2);
9682 return Builder.CreateSub(Src1, Src2);
9684 return Builder.CreateAnd(Src1, Src2);
9686 return Builder.CreateNeg(Builder.CreateAnd(Src1, Src2));
9688 return Builder.CreateOr(Src1, Src2);
9690 return Builder.CreateXor(Src1, Src2);
9715 AtomicUpdateCallbackTy &UpdateOp,
bool VolatileX,
bool IsXBinopExpr,
9716 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
9719 bool emitRMWOp =
false;
9727 emitRMWOp = XElemTy;
9730 emitRMWOp = (IsXBinopExpr && XElemTy);
9737 std::pair<Value *, Value *> Res;
9742 if (IsIgnoreDenormalMode)
9743 RMWInst->
setMetadata(
"amdgpu.ignore.denormal.mode",
9745 if (!IsFineGrainedMemory)
9746 RMWInst->
setMetadata(
"amdgpu.no.fine.grained.memory",
9748 if (!IsRemoteMemory)
9752 Res.first = RMWInst;
9757 Res.second = Res.first;
9759 Res.second = emitRMWOpAsInstruction(Res.first, Expr, RMWOp);
9763 Builder.CreateLoad(XElemTy,
X,
X->getName() +
".atomic.load");
9769 OpenMPIRBuilder::AtomicInfo atomicInfo(
9770 &Builder, XElemTy, LoadSize * 8, LoadSize * 8, OldVal->
getAlign(),
9771 OldVal->
getAlign(),
true , AllocaIP,
X);
9772 auto AtomicLoadRes = atomicInfo.EmitAtomicLoadLibcall(AO);
9773 BasicBlock *CurBB = Builder.GetInsertBlock();
9775 CurBBTI = CurBBTI ? CurBBTI : Builder.CreateUnreachable();
9779 X->getName() +
".atomic.cont");
9781 Builder.restoreIP(AllocaIP);
9782 AllocaInst *NewAtomicAddr = Builder.CreateAlloca(XElemTy);
9783 NewAtomicAddr->
setName(
X->getName() +
"x.new.val");
9784 Builder.SetInsertPoint(ContBB);
9786 PHI->addIncoming(AtomicLoadRes.first, CurBB);
9791 Value *Upd = *CBResult;
9792 Builder.CreateStore(Upd, NewAtomicAddr);
9795 auto Result = atomicInfo.EmitAtomicCompareExchangeLibcall(
9796 AtomicLoadRes.second, NewAtomicAddr, AO, Failure);
9798 PHI->addIncoming(PHILoad, Builder.GetInsertBlock());
9799 Builder.CreateCondBr(
Result.second, ExitBB, ContBB);
9801 Res.first = OldExprVal;
9807 Builder.SetInsertPoint(ExitBB);
9809 Builder.SetInsertPoint(ExitTI);
9815 Builder.CreateLoad(IntCastTy,
X,
X->getName() +
".atomic.load");
9822 BasicBlock *CurBB = Builder.GetInsertBlock();
9824 CurBBTI = CurBBTI ? CurBBTI : Builder.CreateUnreachable();
9828 X->getName() +
".atomic.cont");
9830 Builder.restoreIP(AllocaIP);
9831 AllocaInst *NewAtomicAddr = Builder.CreateAlloca(XElemTy);
9832 NewAtomicAddr->
setName(
X->getName() +
"x.new.val");
9833 Builder.SetInsertPoint(ContBB);
9835 PHI->addIncoming(OldVal, CurBB);
9840 OldExprVal = Builder.CreateBitCast(
PHI, XElemTy,
9841 X->getName() +
".atomic.fltCast");
9843 OldExprVal = Builder.CreateIntToPtr(
PHI, XElemTy,
9844 X->getName() +
".atomic.ptrCast");
9851 Value *Upd = *CBResult;
9852 Builder.CreateStore(Upd, NewAtomicAddr);
9853 LoadInst *DesiredVal = Builder.CreateLoad(IntCastTy, NewAtomicAddr);
9858 Result->setVolatile(VolatileX);
9859 Value *PreviousVal = Builder.CreateExtractValue(Result, 0);
9860 Value *SuccessFailureVal = Builder.CreateExtractValue(Result, 1);
9861 PHI->addIncoming(PreviousVal, Builder.GetInsertBlock());
9862 Builder.CreateCondBr(SuccessFailureVal, ExitBB, ContBB);
9864 Res.first = OldExprVal;
9871 Builder.SetInsertPoint(ExitBB);
9873 Builder.SetInsertPoint(ExitTI);
9880OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createAtomicCapture(
9881 const LocationDescription &
Loc, InsertPointTy AllocaIP, AtomicOpValue &
X,
9884 bool UpdateExpr,
bool IsPostfixUpdate,
bool IsXBinopExpr,
9885 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
9886 if (!updateToLocation(
Loc))
9890 Type *XTy =
X.Var->getType();
9892 "OMP Atomic expects a pointer to target memory");
9893 Type *XElemTy =
X.ElemTy;
9896 "OMP atomic capture expected a scalar type");
9898 "OpenMP atomic does not support LT or GT operations");
9905 AllocaIP,
X.Var,
X.ElemTy, Expr, AO, AtomicOp, UpdateOp,
X.IsVolatile,
9906 IsXBinopExpr, IsIgnoreDenormalMode, IsFineGrainedMemory, IsRemoteMemory);
9909 Value *CapturedVal =
9910 (IsPostfixUpdate ? AtomicResult->first : AtomicResult->second);
9911 Builder.CreateStore(CapturedVal, V.Var, V.IsVolatile);
9913 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Capture);
9914 return Builder.saveIP();
9917OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCompare(
9918 const LocationDescription &
Loc, AtomicOpValue &
X, AtomicOpValue &V,
9924 return createAtomicCompare(
Loc,
X, V, R,
E,
D, AO,
Op, IsXBinopExpr,
9925 IsPostfixUpdate, IsFailOnly, Failure);
9928OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCompare(
9929 const LocationDescription &
Loc, AtomicOpValue &
X, AtomicOpValue &V,
9934 if (!updateToLocation(
Loc))
9937 assert(
X.Var->getType()->isPointerTy() &&
9938 "OMP atomic expects a pointer to target memory");
9941 assert(V.Var->getType()->isPointerTy() &&
"v.var must be of pointer type");
9942 assert(V.ElemTy ==
X.ElemTy &&
"x and v must be of same type");
9945 bool IsInteger =
E->getType()->isIntegerTy();
9947 if (
Op == OMPAtomicCompareOp::EQ) {
9952 Value *EBCast = Builder.CreateBitCast(
E, IntCastTy);
9953 Value *DBCast = Builder.CreateBitCast(
D, IntCastTy);
9958 Builder.CreateAtomicCmpXchg(
X.Var,
E,
D,
MaybeAlign(), AO, Failure);
9962 Value *OldValue = Builder.CreateExtractValue(Result, 0);
9964 OldValue = Builder.CreateBitCast(OldValue,
X.ElemTy);
9966 "OldValue and V must be of same type");
9967 if (IsPostfixUpdate) {
9968 Builder.CreateStore(OldValue, V.Var, V.IsVolatile);
9970 Value *SuccessOrFail = Builder.CreateExtractValue(Result, 1);
9981 BasicBlock *CurBB = Builder.GetInsertBlock();
9983 CurBBTI = CurBBTI ? CurBBTI : Builder.CreateUnreachable();
9985 CurBBTI,
X.Var->getName() +
".atomic.exit");
9991 Builder.CreateCondBr(SuccessOrFail, ExitBB, ContBB);
9993 Builder.SetInsertPoint(ContBB);
9994 Builder.CreateStore(OldValue, V.Var);
9995 Builder.CreateBr(ExitBB);
10000 Builder.SetInsertPoint(ExitBB);
10002 Builder.SetInsertPoint(ExitTI);
10005 Value *CapturedValue =
10006 Builder.CreateSelect(SuccessOrFail,
E, OldValue);
10007 Builder.CreateStore(CapturedValue, V.Var, V.IsVolatile);
10013 assert(
R.Var->getType()->isPointerTy() &&
10014 "r.var must be of pointer type");
10015 assert(
R.ElemTy->isIntegerTy() &&
"r must be of integral type");
10017 Value *SuccessFailureVal = Builder.CreateExtractValue(Result, 1);
10018 Value *ResultCast =
R.IsSigned
10019 ? Builder.CreateSExt(SuccessFailureVal,
R.ElemTy)
10020 : Builder.CreateZExt(SuccessFailureVal,
R.ElemTy);
10021 Builder.CreateStore(ResultCast,
R.Var,
R.IsVolatile);
10024 assert((
Op == OMPAtomicCompareOp::MAX ||
Op == OMPAtomicCompareOp::MIN) &&
10025 "Op should be either max or min at this point");
10026 assert(!IsFailOnly &&
"IsFailOnly is only valid when the comparison is ==");
10037 if (IsXBinopExpr) {
10064 Builder.CreateAtomicRMW(NewOp,
X.Var,
E,
MaybeAlign(), AO);
10066 Value *CapturedValue =
nullptr;
10067 if (IsPostfixUpdate) {
10068 CapturedValue = OldValue;
10093 Value *NonAtomicCmp = Builder.CreateCmp(Pred, OldValue,
E);
10094 CapturedValue = Builder.CreateSelect(NonAtomicCmp,
E, OldValue);
10096 Builder.CreateStore(CapturedValue, V.Var, V.IsVolatile);
10100 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Compare);
10102 return Builder.saveIP();
10105OpenMPIRBuilder::InsertPointOrErrorTy
10106OpenMPIRBuilder::createTeams(
const LocationDescription &
Loc,
10107 BodyGenCallbackTy BodyGenCB,
Value *NumTeamsLower,
10110 if (!updateToLocation(
Loc))
10111 return InsertPointTy();
10114 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
10115 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
10120 if (&OuterAllocaBB == Builder.GetInsertBlock()) {
10121 BasicBlock *BodyBB = splitBB(Builder,
true,
"teams.entry");
10122 Builder.SetInsertPoint(BodyBB, BodyBB->
begin());
10142 BasicBlock *ExitBB = splitBB(Builder,
true,
"teams.exit");
10143 BasicBlock *BodyBB = splitBB(Builder,
true,
"teams.body");
10145 splitBB(Builder,
true,
"teams.alloca");
10147 bool SubClausesPresent =
10148 (NumTeamsLower || NumTeamsUpper || ThreadLimit || IfExpr);
10150 if (!Config.isTargetDevice() && SubClausesPresent) {
10151 assert((NumTeamsLower ==
nullptr || NumTeamsUpper !=
nullptr) &&
10152 "if lowerbound is non-null, then upperbound must also be non-null "
10153 "for bounds on num_teams");
10155 if (NumTeamsUpper ==
nullptr)
10156 NumTeamsUpper = Builder.getInt32(0);
10158 if (NumTeamsLower ==
nullptr)
10159 NumTeamsLower = NumTeamsUpper;
10163 "argument to if clause must be an integer value");
10167 IfExpr = Builder.CreateICmpNE(IfExpr,
10168 ConstantInt::get(IfExpr->
getType(), 0));
10169 NumTeamsUpper = Builder.CreateSelect(
10170 IfExpr, NumTeamsUpper, Builder.getInt32(1),
"numTeamsUpper");
10173 NumTeamsLower = Builder.CreateSelect(
10174 IfExpr, NumTeamsLower, Builder.getInt32(1),
"numTeamsLower");
10177 if (ThreadLimit ==
nullptr)
10178 ThreadLimit = Builder.getInt32(0);
10180 Value *ThreadNum = getOrCreateThreadID(Ident);
10181 createRuntimeFunctionCall(
10182 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_num_teams_51),
10183 {Ident, ThreadNum, NumTeamsLower, NumTeamsUpper, ThreadLimit});
10186 InsertPointTy AllocaIP(AllocaBB, AllocaBB->
begin());
10187 InsertPointTy CodeGenIP(BodyBB, BodyBB->
begin());
10188 if (
Error Err = BodyGenCB(AllocaIP, CodeGenIP))
10192 OI.EntryBB = AllocaBB;
10193 OI.ExitBB = ExitBB;
10194 OI.OuterAllocaBB = &OuterAllocaBB;
10198 InsertPointTy OuterAllocaIP(&OuterAllocaBB, OuterAllocaBB.
begin());
10200 Builder, OuterAllocaIP, ToBeDeleted, AllocaIP,
"gid",
true));
10202 Builder, OuterAllocaIP, ToBeDeleted, AllocaIP,
"tid",
true));
10204 auto HostPostOutlineCB = [
this, Ident,
10205 ToBeDeleted](
Function &OutlinedFn)
mutable {
10210 "there must be a single user for the outlined function");
10215 "Outlined function must have two or three arguments only");
10217 bool HasShared = OutlinedFn.
arg_size() == 3;
10225 assert(StaleCI &&
"Error while outlining - no CallInst user found for the "
10226 "outlined function.");
10227 Builder.SetInsertPoint(StaleCI);
10229 Ident, Builder.getInt32(StaleCI->
arg_size() - 2), &OutlinedFn};
10232 createRuntimeFunctionCall(
10233 getOrCreateRuntimeFunctionPtr(
10234 omp::RuntimeFunction::OMPRTL___kmpc_fork_teams),
10238 I->eraseFromParent();
10241 if (!Config.isTargetDevice())
10242 OI.PostOutlineCB = HostPostOutlineCB;
10244 addOutlineInfo(std::move(OI));
10246 Builder.SetInsertPoint(ExitBB, ExitBB->
begin());
10248 return Builder.saveIP();
10251OpenMPIRBuilder::InsertPointOrErrorTy
10252OpenMPIRBuilder::createDistribute(
const LocationDescription &
Loc,
10253 InsertPointTy OuterAllocaIP,
10254 BodyGenCallbackTy BodyGenCB) {
10255 if (!updateToLocation(
Loc))
10256 return InsertPointTy();
10258 BasicBlock *OuterAllocaBB = OuterAllocaIP.getBlock();
10260 if (OuterAllocaBB == Builder.GetInsertBlock()) {
10262 splitBB(Builder,
true,
"distribute.entry");
10263 Builder.SetInsertPoint(BodyBB, BodyBB->
begin());
10266 splitBB(Builder,
true,
"distribute.exit");
10268 splitBB(Builder,
true,
"distribute.body");
10270 splitBB(Builder,
true,
"distribute.alloca");
10273 InsertPointTy AllocaIP(AllocaBB, AllocaBB->
begin());
10274 InsertPointTy CodeGenIP(BodyBB, BodyBB->
begin());
10275 if (
Error Err = BodyGenCB(AllocaIP, CodeGenIP))
10280 if (Config.isTargetDevice()) {
10282 OI.OuterAllocaBB = OuterAllocaIP.getBlock();
10283 OI.EntryBB = AllocaBB;
10284 OI.ExitBB = ExitBB;
10286 addOutlineInfo(std::move(OI));
10288 Builder.SetInsertPoint(ExitBB, ExitBB->
begin());
10290 return Builder.saveIP();
10295 std::string VarName) {
10301 M, MapNamesArrayInit->
getType(),
10304 return MapNamesArrayGlobal;
10309void OpenMPIRBuilder::initializeTypes(
Module &M) {
10312 unsigned DefaultTargetAS = Config.getDefaultTargetAS();
10313 unsigned ProgramAS = M.getDataLayout().getProgramAddressSpace();
10314#define OMP_TYPE(VarName, InitValue) VarName = InitValue;
10315#define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \
10316 VarName##Ty = ArrayType::get(ElemTy, ArraySize); \
10317 VarName##PtrTy = PointerType::get(Ctx, DefaultTargetAS);
10318#define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \
10319 VarName = FunctionType::get(ReturnType, {__VA_ARGS__}, IsVarArg); \
10320 VarName##Ptr = PointerType::get(Ctx, ProgramAS);
10321#define OMP_STRUCT_TYPE(VarName, StructName, Packed, ...) \
10322 T = StructType::getTypeByName(Ctx, StructName); \
10324 T = StructType::create(Ctx, {__VA_ARGS__}, StructName, Packed); \
10326 VarName##Ptr = PointerType::get(Ctx, DefaultTargetAS);
10327#include "llvm/Frontend/OpenMP/OMPKinds.def"
10330void OpenMPIRBuilder::OutlineInfo::collectBlocks(
10338 while (!Worklist.
empty()) {
10342 if (
BlockSet.insert(SuccBB).second)
10351 if (!Config.isGPU()) {
10366 Fn->
addFnAttr(
"uniform-work-group-size",
"true");
10367 Fn->
addFnAttr(Attribute::MustProgress);
10371void OpenMPIRBuilder::createOffloadEntriesAndInfoMetadata(
10372 EmitMetadataErrorReportFunctionTy &ErrorFn) {
10375 if (OffloadInfoManager.empty())
10379 SmallVector<std::pair<
const OffloadEntriesInfoManager::OffloadEntryInfo *,
10380 TargetRegionEntryInfo>,
10382 OrderedEntries(OffloadInfoManager.size());
10385 auto &&GetMDInt = [
this](
unsigned V) {
10392 NamedMDNode *MD = M.getOrInsertNamedMetadata(
"omp_offload.info");
10393 auto &&TargetRegionMetadataEmitter =
10394 [&
C, MD, &OrderedEntries, &GetMDInt, &GetMDString](
10395 const TargetRegionEntryInfo &EntryInfo,
10396 const OffloadEntriesInfoManager::OffloadEntryInfoTargetRegion &
E) {
10409 GetMDInt(
E.getKind()), GetMDInt(EntryInfo.DeviceID),
10410 GetMDInt(EntryInfo.FileID), GetMDString(EntryInfo.ParentName),
10411 GetMDInt(EntryInfo.Line), GetMDInt(EntryInfo.Count),
10412 GetMDInt(
E.getOrder())};
10415 OrderedEntries[
E.getOrder()] = std::make_pair(&
E, EntryInfo);
10421 OffloadInfoManager.actOnTargetRegionEntriesInfo(TargetRegionMetadataEmitter);
10424 auto &&DeviceGlobalVarMetadataEmitter =
10425 [&
C, &OrderedEntries, &GetMDInt, &GetMDString, MD](
10427 const OffloadEntriesInfoManager::OffloadEntryInfoDeviceGlobalVar &
E) {
10435 Metadata *
Ops[] = {GetMDInt(
E.getKind()), GetMDString(MangledName),
10436 GetMDInt(
E.getFlags()), GetMDInt(
E.getOrder())};
10439 TargetRegionEntryInfo varInfo(MangledName, 0, 0, 0);
10440 OrderedEntries[
E.getOrder()] = std::make_pair(&
E, varInfo);
10446 OffloadInfoManager.actOnDeviceGlobalVarEntriesInfo(
10447 DeviceGlobalVarMetadataEmitter);
10449 for (
const auto &
E : OrderedEntries) {
10450 assert(
E.first &&
"All ordered entries must exist!");
10451 if (
const auto *CE =
10454 if (!
CE->getID() || !
CE->getAddress()) {
10456 TargetRegionEntryInfo EntryInfo =
E.second;
10457 StringRef FnName = EntryInfo.ParentName;
10458 if (!M.getNamedValue(FnName))
10460 ErrorFn(EMIT_MD_TARGET_REGION_ERROR, EntryInfo);
10463 createOffloadEntry(
CE->getID(),
CE->getAddress(),
10466 }
else if (
const auto *CE =
dyn_cast<
10467 OffloadEntriesInfoManager::OffloadEntryInfoDeviceGlobalVar>(
10469 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind Flags =
10470 static_cast<OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
>(
10473 case OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter:
10474 case OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo:
10475 if (Config.isTargetDevice() && Config.hasRequiresUnifiedSharedMemory())
10477 if (!
CE->getAddress()) {
10478 ErrorFn(EMIT_MD_DECLARE_TARGET_ERROR,
E.second);
10482 if (
CE->getVarSize() == 0)
10485 case OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink:
10486 assert(((Config.isTargetDevice() && !
CE->getAddress()) ||
10487 (!Config.isTargetDevice() &&
CE->getAddress())) &&
10488 "Declaret target link address is set.");
10489 if (Config.isTargetDevice())
10491 if (!
CE->getAddress()) {
10492 ErrorFn(EMIT_MD_GLOBAL_VAR_LINK_ERROR, TargetRegionEntryInfo());
10504 if ((
GV->hasLocalLinkage() ||
GV->hasHiddenVisibility()) &&
10505 Flags != OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect)
10510 if (Flags == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect)
10511 createOffloadEntry(
CE->getAddress(),
CE->getAddress(),
CE->getVarSize(),
10512 Flags,
CE->getLinkage(),
CE->getVarName());
10514 createOffloadEntry(
CE->getAddress(),
CE->getAddress(),
CE->getVarSize(),
10515 Flags,
CE->getLinkage());
10526 if (Config.hasRequiresFlags() && !Config.isTargetDevice())
10531 OffloadEntriesInfoManager::OMPTargetGlobalRegisterRequires,
10532 Config.getRequiresFlags());
10535void TargetRegionEntryInfo::getTargetRegionEntryFnName(
10537 unsigned FileID,
unsigned Line,
unsigned Count) {
10539 OS << KernelNamePrefix <<
llvm::format(
"%x", DeviceID)
10540 <<
llvm::format(
"_%x_", FileID) << ParentName <<
"_l" << Line;
10542 OS <<
"_" <<
Count;
10545void OffloadEntriesInfoManager::getTargetRegionEntryFnName(
10547 unsigned NewCount = getTargetRegionEntryInfoCount(EntryInfo);
10548 TargetRegionEntryInfo::getTargetRegionEntryFnName(
10549 Name, EntryInfo.ParentName, EntryInfo.DeviceID, EntryInfo.FileID,
10550 EntryInfo.Line, NewCount);
10553TargetRegionEntryInfo
10554OpenMPIRBuilder::getTargetEntryUniqueInfo(FileIdentifierInfoCallbackTy CallBack,
10558 auto FileIDInfo = CallBack();
10562 FileID =
Status->getUniqueID().getFile();
10566 FileID =
hash_value(std::get<0>(FileIDInfo));
10569 return TargetRegionEntryInfo(ParentName,
ID.getDevice(), FileID,
10570 std::get<1>(FileIDInfo));
10573unsigned OpenMPIRBuilder::getFlagMemberOffset() {
10576 static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
10578 !(Remain & 1); Remain = Remain >> 1)
10584OpenMPIRBuilder::getMemberOfFlag(
unsigned Position) {
10587 << getFlagMemberOffset());
10590void OpenMPIRBuilder::setCorrectMemberOfFlag(
10596 if (
static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
10598 static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
10605 Flags &=
~omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
10606 Flags |= MemberOfFlag;
10609Constant *OpenMPIRBuilder::getAddrOfDeclareTargetVar(
10610 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause,
10611 OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause,
10612 bool IsDeclaration,
bool IsExternallyVisible,
10613 TargetRegionEntryInfo EntryInfo,
StringRef MangledName,
10614 std::vector<GlobalVariable *> &GeneratedRefs,
bool OpenMPSIMD,
10615 std::vector<Triple> TargetTriple,
Type *LlvmPtrTy,
10616 std::function<
Constant *()> GlobalInitializer,
10623 if (CaptureClause == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink ||
10624 ((CaptureClause == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo ||
10626 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter) &&
10627 Config.hasRequiresUnifiedSharedMemory())) {
10632 if (!IsExternallyVisible)
10633 OS <<
format(
"_%x", EntryInfo.FileID);
10634 OS <<
"_decl_tgt_ref_ptr";
10637 Value *Ptr = M.getNamedValue(PtrName);
10641 Ptr = getOrCreateInternalVariable(LlvmPtrTy, PtrName);
10646 if (!Config.isTargetDevice()) {
10647 if (GlobalInitializer)
10648 GV->setInitializer(GlobalInitializer());
10653 registerTargetGlobalVariable(
10654 CaptureClause, DeviceClause, IsDeclaration, IsExternallyVisible,
10655 EntryInfo, MangledName, GeneratedRefs, OpenMPSIMD, TargetTriple,
10656 GlobalInitializer, VariableLinkage, LlvmPtrTy,
cast<Constant>(Ptr));
10665void OpenMPIRBuilder::registerTargetGlobalVariable(
10666 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause,
10667 OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause,
10668 bool IsDeclaration,
bool IsExternallyVisible,
10669 TargetRegionEntryInfo EntryInfo,
StringRef MangledName,
10670 std::vector<GlobalVariable *> &GeneratedRefs,
bool OpenMPSIMD,
10671 std::vector<Triple> TargetTriple,
10672 std::function<
Constant *()> GlobalInitializer,
10675 if (DeviceClause != OffloadEntriesInfoManager::OMPTargetDeviceClauseAny ||
10676 (TargetTriple.empty() && !Config.isTargetDevice()))
10679 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind Flags;
10684 if ((CaptureClause == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo ||
10686 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter) &&
10687 !Config.hasRequiresUnifiedSharedMemory()) {
10688 Flags = OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
10692 if (!IsDeclaration)
10694 M.getDataLayout().getTypeSizeInBits(LlvmVal->
getValueType()), 8);
10701 if (Config.isTargetDevice() &&
10705 if (!OffloadInfoManager.hasDeviceGlobalVarEntryInfo(VarName))
10708 std::string RefName = createPlatformSpecificName({
VarName,
"ref"});
10710 if (!M.getNamedValue(RefName)) {
10712 getOrCreateInternalVariable(Addr->
getType(), RefName);
10714 GvAddrRef->setConstant(
true);
10716 GvAddrRef->setInitializer(Addr);
10717 GeneratedRefs.push_back(GvAddrRef);
10721 if (CaptureClause == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink)
10722 Flags = OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
10724 Flags = OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
10726 if (Config.isTargetDevice()) {
10730 Addr = getAddrOfDeclareTargetVar(
10731 CaptureClause, DeviceClause, IsDeclaration, IsExternallyVisible,
10732 EntryInfo, MangledName, GeneratedRefs, OpenMPSIMD, TargetTriple,
10733 LlvmPtrTy, GlobalInitializer, VariableLinkage);
10736 VarSize = M.getDataLayout().getPointerSize();
10740 OffloadInfoManager.registerDeviceGlobalVarEntryInfo(VarName, Addr, VarSize,
10746void OpenMPIRBuilder::loadOffloadInfoMetadata(
Module &M) {
10750 NamedMDNode *MD = M.getNamedMetadata(ompOffloadInfoName);
10755 auto &&GetMDInt = [MN](
unsigned Idx) {
10760 auto &&GetMDString = [MN](
unsigned Idx) {
10762 return V->getString();
10765 switch (GetMDInt(0)) {
10769 case OffloadEntriesInfoManager::OffloadEntryInfo::
10770 OffloadingEntryInfoTargetRegion: {
10771 TargetRegionEntryInfo EntryInfo(GetMDString(3),
10776 OffloadInfoManager.initializeTargetRegionEntryInfo(EntryInfo,
10780 case OffloadEntriesInfoManager::OffloadEntryInfo::
10781 OffloadingEntryInfoDeviceGlobalVar:
10782 OffloadInfoManager.initializeDeviceGlobalVarEntryInfo(
10784 static_cast<OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
>(
10794 if (HostFilePath.
empty())
10798 if (std::error_code Err = Buf.getError()) {
10800 "OpenMPIRBuilder: " +
10808 if (std::error_code Err = M.getError()) {
10810 (
"error parsing host file inside of OpenMPIRBuilder: " + Err.message())
10814 loadOffloadInfoMetadata(*M.get());
10821bool OffloadEntriesInfoManager::empty()
const {
10822 return OffloadEntriesTargetRegion.empty() &&
10823 OffloadEntriesDeviceGlobalVar.empty();
10826unsigned OffloadEntriesInfoManager::getTargetRegionEntryInfoCount(
10827 const TargetRegionEntryInfo &EntryInfo)
const {
10828 auto It = OffloadEntriesTargetRegionCount.find(
10829 getTargetRegionEntryCountKey(EntryInfo));
10830 if (It == OffloadEntriesTargetRegionCount.end())
10835void OffloadEntriesInfoManager::incrementTargetRegionEntryInfoCount(
10836 const TargetRegionEntryInfo &EntryInfo) {
10837 OffloadEntriesTargetRegionCount[getTargetRegionEntryCountKey(EntryInfo)] =
10838 EntryInfo.Count + 1;
10842void OffloadEntriesInfoManager::initializeTargetRegionEntryInfo(
10843 const TargetRegionEntryInfo &EntryInfo,
unsigned Order) {
10844 OffloadEntriesTargetRegion[EntryInfo] =
10845 OffloadEntryInfoTargetRegion(Order,
nullptr,
nullptr,
10846 OMPTargetRegionEntryTargetRegion);
10847 ++OffloadingEntriesNum;
10850void OffloadEntriesInfoManager::registerTargetRegionEntryInfo(
10852 OMPTargetRegionEntryKind Flags) {
10853 assert(EntryInfo.Count == 0 &&
"expected default EntryInfo");
10856 EntryInfo.Count = getTargetRegionEntryInfoCount(EntryInfo);
10860 if (OMPBuilder->Config.isTargetDevice()) {
10862 if (!hasTargetRegionEntryInfo(EntryInfo)) {
10865 auto &Entry = OffloadEntriesTargetRegion[EntryInfo];
10866 Entry.setAddress(Addr);
10868 Entry.setFlags(Flags);
10870 if (Flags == OffloadEntriesInfoManager::OMPTargetRegionEntryTargetRegion &&
10871 hasTargetRegionEntryInfo(EntryInfo,
true))
10873 assert(!hasTargetRegionEntryInfo(EntryInfo) &&
10874 "Target region entry already registered!");
10875 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr,
ID, Flags);
10876 OffloadEntriesTargetRegion[EntryInfo] = Entry;
10877 ++OffloadingEntriesNum;
10879 incrementTargetRegionEntryInfoCount(EntryInfo);
10882bool OffloadEntriesInfoManager::hasTargetRegionEntryInfo(
10883 TargetRegionEntryInfo EntryInfo,
bool IgnoreAddressId)
const {
10886 EntryInfo.Count = getTargetRegionEntryInfoCount(EntryInfo);
10888 auto It = OffloadEntriesTargetRegion.find(EntryInfo);
10889 if (It == OffloadEntriesTargetRegion.end()) {
10893 if (!IgnoreAddressId && (It->second.getAddress() || It->second.getID()))
10898void OffloadEntriesInfoManager::actOnTargetRegionEntriesInfo(
10899 const OffloadTargetRegionEntryInfoActTy &Action) {
10901 for (
const auto &It : OffloadEntriesTargetRegion) {
10902 Action(It.first, It.second);
10906void OffloadEntriesInfoManager::initializeDeviceGlobalVarEntryInfo(
10907 StringRef Name, OMPTargetGlobalVarEntryKind Flags,
unsigned Order) {
10908 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
10909 ++OffloadingEntriesNum;
10912void OffloadEntriesInfoManager::registerDeviceGlobalVarEntryInfo(
10915 if (OMPBuilder->Config.isTargetDevice()) {
10917 if (!hasDeviceGlobalVarEntryInfo(VarName))
10919 auto &Entry = OffloadEntriesDeviceGlobalVar[
VarName];
10920 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
10921 if (Entry.getVarSize() == 0) {
10922 Entry.setVarSize(VarSize);
10927 Entry.setVarSize(VarSize);
10929 Entry.setAddress(Addr);
10931 if (hasDeviceGlobalVarEntryInfo(VarName)) {
10932 auto &Entry = OffloadEntriesDeviceGlobalVar[
VarName];
10933 assert(Entry.isValid() && Entry.getFlags() == Flags &&
10934 "Entry not initialized!");
10935 if (Entry.getVarSize() == 0) {
10936 Entry.setVarSize(VarSize);
10941 if (Flags == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect)
10942 OffloadEntriesDeviceGlobalVar.try_emplace(VarName, OffloadingEntriesNum,
10943 Addr, VarSize, Flags,
Linkage,
10946 OffloadEntriesDeviceGlobalVar.try_emplace(
10947 VarName, OffloadingEntriesNum, Addr, VarSize, Flags,
Linkage,
"");
10948 ++OffloadingEntriesNum;
10952void OffloadEntriesInfoManager::actOnDeviceGlobalVarEntriesInfo(
10953 const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
10955 for (
const auto &
E : OffloadEntriesDeviceGlobalVar)
10956 Action(
E.getKey(),
E.getValue());
10963void CanonicalLoopInfo::collectControlBlocks(
10970 BBs.
append({getPreheader(), Header,
Cond, Latch,
Exit, getAfter()});
10973BasicBlock *CanonicalLoopInfo::getPreheader()
const {
10982void CanonicalLoopInfo::setTripCount(
Value *TripCount) {
10994void CanonicalLoopInfo::mapIndVar(
11004 for (
Use &U : OldIV->
uses()) {
11008 if (
User->getParent() == getCond())
11010 if (
User->getParent() == getLatch())
11016 Value *NewIV = Updater(OldIV);
11019 for (
Use *U : ReplacableUses)
11027void CanonicalLoopInfo::assertOK()
const {
11040 "Preheader must terminate with unconditional branch");
11042 "Preheader must jump to header");
11046 "Header must terminate with unconditional branch");
11047 assert(Header->getSingleSuccessor() ==
Cond &&
11048 "Header must jump to exiting block");
11051 assert(
Cond->getSinglePredecessor() == Header &&
11052 "Exiting block only reachable from header");
11055 "Exiting block must terminate with conditional branch");
11057 "Exiting block must have two successors");
11059 "Exiting block's first successor jump to the body");
11061 "Exiting block's second successor must exit the loop");
11065 "Body only reachable from exiting block");
11070 "Latch must terminate with unconditional branch");
11079 "Exit block must terminate with unconditional branch");
11080 assert(
Exit->getSingleSuccessor() == After &&
11081 "Exit block must jump to after block");
11085 "After block only reachable from exit block");
11089 assert(IndVar &&
"Canonical induction variable not found?");
11091 "Induction variable must be an integer");
11093 "Induction variable must be a PHI in the loop header");
11099 auto *NextIndVar =
cast<PHINode>(IndVar)->getIncomingValue(1);
11106 Value *TripCount = getTripCount();
11107 assert(TripCount &&
"Loop trip count not found?");
11109 "Trip count and induction variable must have the same type");
11113 "Exit condition must be a signed less-than comparison");
11115 "Exit condition must compare the induction variable");
11117 "Exit condition must compare with the trip count");
11121void CanonicalLoopInfo::invalidate() {
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Expand Atomic instructions
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This header defines various interfaces for pass management in LLVM.
iv Induction Variable Users
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
static cl::opt< unsigned > TileSize("fuse-matrix-tile-size", cl::init(4), cl::Hidden, cl::desc("Tile size for matrix instruction fusion using square-shaped tiles."))
uint64_t IntrinsicInst * II
#define OMP_KERNEL_ARG_VERSION
Provides definitions for Target specific Grid Values.
static Value * removeASCastIfPresent(Value *V)
static void createTargetLoopWorkshareCall(OpenMPIRBuilder *OMPBuilder, WorksharingLoopType LoopType, BasicBlock *InsertBlock, Value *Ident, Value *LoopBodyArg, Value *TripCount, Function &LoopBodyFn, bool NoLoop)
static void redirectTo(BasicBlock *Source, BasicBlock *Target, DebugLoc DL)
Make Source branch to Target.
static FunctionCallee getKmpcDistForStaticInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Value * createFakeIntVal(IRBuilderBase &Builder, OpenMPIRBuilder::InsertPointTy OuterAllocaIP, llvm::SmallVectorImpl< Instruction * > &ToBeDeleted, OpenMPIRBuilder::InsertPointTy InnerAllocaIP, const Twine &Name="", bool AsPtr=true)
static void applyParallelAccessesMetadata(CanonicalLoopInfo *CLI, LLVMContext &Ctx, Loop *Loop, LoopInfo &LoopInfo, SmallVector< Metadata * > &LoopMDList)
static FunctionCallee getKmpcForDynamicFiniForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for finalizing the dynamic loop using depending on type.
static Expected< Function * > createOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, StringRef FuncName, SmallVectorImpl< Value * > &Inputs, OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc, OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB)
static void FixupDebugInfoForOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, Function *Func, DenseMap< Value *, std::tuple< Value *, unsigned > > &ValueReplacementMap)
static OMPScheduleType getOpenMPOrderingScheduleType(OMPScheduleType BaseScheduleType, bool HasOrderedClause)
Adds ordering modifier flags to schedule type.
static OMPScheduleType getOpenMPMonotonicityScheduleType(OMPScheduleType ScheduleType, bool HasSimdModifier, bool HasMonotonic, bool HasNonmonotonic, bool HasOrderedClause)
Adds monotonicity modifier flags to schedule type.
static void workshareLoopTargetCallback(OpenMPIRBuilder *OMPIRBuilder, CanonicalLoopInfo *CLI, Value *Ident, Function &OutlinedFn, const SmallVector< Instruction *, 4 > &ToBeDeleted, WorksharingLoopType LoopType, bool NoLoop)
static bool isValidWorkshareLoopScheduleType(OMPScheduleType SchedType)
static llvm::CallInst * emitNoUnwindRuntimeCall(IRBuilder<> &Builder, llvm::FunctionCallee Callee, ArrayRef< llvm::Value * > Args, const llvm::Twine &Name)
static Error populateReductionFunction(Function *ReductionFunc, ArrayRef< OpenMPIRBuilder::ReductionInfo > ReductionInfos, IRBuilder<> &Builder, ArrayRef< bool > IsByRef, bool IsGPU)
static Function * getFreshReductionFunc(Module &M)
static void raiseUserConstantDataAllocasToEntryBlock(IRBuilderBase &Builder, Function *Function)
static FunctionCallee getKmpcForDynamicNextForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for updating the next loop using OpenMP dynamic scheduling depending...
static bool isConflictIP(IRBuilder<>::InsertPoint IP1, IRBuilder<>::InsertPoint IP2)
Return whether IP1 and IP2 are ambiguous, i.e.
static void checkReductionInfos(ArrayRef< OpenMPIRBuilder::ReductionInfo > ReductionInfos, bool IsGPU)
static Type * getOffloadingArrayType(Value *V)
static OMPScheduleType getOpenMPBaseScheduleType(llvm::omp::ScheduleKind ClauseKind, bool HasChunks, bool HasSimdModifier, bool HasDistScheduleChunks)
Determine which scheduling algorithm to use, determined from schedule clause arguments.
static OMPScheduleType computeOpenMPScheduleType(ScheduleKind ClauseKind, bool HasChunks, bool HasSimdModifier, bool HasMonotonicModifier, bool HasNonmonotonicModifier, bool HasOrderedClause, bool HasDistScheduleChunks)
Determine the schedule type using schedule and ordering clause arguments.
static FunctionCallee getKmpcForDynamicInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for initializing loop bounds using OpenMP dynamic scheduling dependi...
static StructType * createTaskWithPrivatesTy(OpenMPIRBuilder &OMPIRBuilder, ArrayRef< Value * > OffloadingArraysToPrivatize)
static cl::opt< double > UnrollThresholdFactor("openmp-ir-builder-unroll-threshold-factor", cl::Hidden, cl::desc("Factor for the unroll threshold to account for code " "simplifications still taking place"), cl::init(1.5))
static int32_t computeHeuristicUnrollFactor(CanonicalLoopInfo *CLI)
Heuristically determine the best-performant unroll factor for CLI.
static Value * emitTaskDependencies(OpenMPIRBuilder &OMPBuilder, const SmallVectorImpl< OpenMPIRBuilder::DependData > &Dependencies)
static Error emitTargetOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, bool IsOffloadEntry, TargetRegionEntryInfo &EntryInfo, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, Function *&OutlinedFn, Constant *&OutlinedFnID, SmallVectorImpl< Value * > &Inputs, OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc, OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB)
static void updateNVPTXAttr(Function &Kernel, StringRef Name, int32_t Value, bool Min)
static OpenMPIRBuilder::InsertPointTy getInsertPointAfterInstr(Instruction *I)
static void redirectAllPredecessorsTo(BasicBlock *OldTarget, BasicBlock *NewTarget, DebugLoc DL)
Redirect all edges that branch to OldTarget to NewTarget.
static std::unique_ptr< TargetMachine > createTargetMachine(Function *F, CodeGenOptLevel OptLevel)
Create the TargetMachine object to query the backend for optimization preferences.
static FunctionCallee getKmpcForStaticInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
static void addAccessGroupMetadata(BasicBlock *Block, MDNode *AccessGroup, LoopInfo &LI)
Attach llvm.access.group metadata to the memref instructions of Block.
static void addBasicBlockMetadata(BasicBlock *BB, ArrayRef< Metadata * > Properties)
Attach metadata Properties to the basic block described by BB.
static void restoreIPandDebugLoc(llvm::IRBuilderBase &Builder, llvm::IRBuilderBase::InsertPoint IP)
This is wrapper over IRBuilderBase::restoreIP that also restores the current debug location to the la...
static LoadInst * loadSharedDataFromTaskDescriptor(OpenMPIRBuilder &OMPIRBuilder, IRBuilderBase &Builder, Value *TaskWithPrivates, Type *TaskWithPrivatesTy)
Given a task descriptor, TaskWithPrivates, return the pointer to the block of pointers containing sha...
static cl::opt< bool > OptimisticAttributes("openmp-ir-builder-optimistic-attributes", cl::Hidden, cl::desc("Use optimistic attributes describing " "'as-if' properties of runtime calls."), cl::init(false))
static FunctionCallee getKmpcForStaticLoopForType(Type *Ty, OpenMPIRBuilder *OMPBuilder, WorksharingLoopType LoopType)
static void emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, OpenMPIRBuilder::InsertPointTy AllocaIP, OpenMPIRBuilder::TargetDataInfo &Info, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, const OpenMPIRBuilder::TargetKernelRuntimeAttrs &RuntimeAttrs, Value *IfCond, Function *OutlinedFn, Constant *OutlinedFnID, SmallVectorImpl< Value * > &Args, OpenMPIRBuilder::GenMapInfoCallbackTy GenMapInfoCB, OpenMPIRBuilder::CustomMapperCallbackTy CustomMapperCB, const SmallVector< llvm::OpenMPIRBuilder::DependData > &Dependencies, bool HasNoWait, Value *DynCGroupMem, OMPDynGroupprivateFallbackType DynCGroupMemFallback)
static const omp::GV & getGridValue(const Triple &T, Function *Kernel)
static Function * emitTargetTaskProxyFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, CallInst *StaleCI, StructType *PrivatesTy, StructType *TaskWithPrivatesTy, const size_t NumOffloadingArrays, const int SharedArgsOperandNo)
Create an entry point for a target task with the following.
static void addLoopMetadata(CanonicalLoopInfo *Loop, ArrayRef< Metadata * > Properties)
Attach loop metadata Properties to the loop described by Loop.
static void removeUnusedBlocksFromParent(ArrayRef< BasicBlock * > BBs)
Determine which blocks in BBs are reachable from outside and remove the ones that are not reachable f...
static void targetParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn, Function *OuterFn, BasicBlock *OuterAllocaBB, Value *Ident, Value *IfCondition, Value *NumThreads, Instruction *PrivTID, AllocaInst *PrivTIDAddr, Value *ThreadID, const SmallVector< Instruction *, 4 > &ToBeDeleted)
static void hostParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn, Function *OuterFn, Value *Ident, Value *IfCondition, Instruction *PrivTID, AllocaInst *PrivTIDAddr, const SmallVector< Instruction *, 4 > &ToBeDeleted)
FunctionAnalysisManager FAM
This file defines the Pass Instrumentation classes that provide instrumentation points into the pass ...
const SmallVectorImpl< MachineOperand > & Cond
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
Remove Loads Into Fake Uses
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
unsigned unsigned DefaultVal
std::unordered_set< BasicBlock * > BlockSet
This file implements the SmallBitVector class.
This file defines the SmallSet class.
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Defines the virtual file system interface vfs::FileSystem.
static cl::opt< unsigned > MaxThreads("xcore-max-threads", cl::Optional, cl::desc("Maximum number of threads (for emulation thread-local storage)"), cl::Hidden, cl::value_desc("number"), cl::init(8))
static const uint32_t IV[8]
Class for arbitrary precision integers.
This class represents a conversion between pointers from one address space to another.
an instruction to allocate memory on the stack
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
PointerType * getType() const
Overload to return most specific pointer type.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
unsigned getAddressSpace() const
Return the address space for the allocation.
LLVM_ABI std::optional< TypeSize > getAllocationSize(const DataLayout &DL) const
Get allocation size in bytes.
void setAlignment(Align Align)
const Value * getArraySize() const
Get the number of elements allocated.
This class represents an incoming formal argument to a Function.
unsigned getArgNo() const
Return the index of this formal argument in its containing function.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
A function analysis which provides an AssumptionCache.
LLVM_ABI AssumptionCache run(Function &F, FunctionAnalysisManager &)
A cache of @llvm.assume calls within a function.
An instruction that atomically checks whether a specified value is in a memory location,...
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
an instruction that atomically reads a memory location, combines it with another value,...
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ USubCond
Subtract only if no unsigned overflow.
@ FMinimum
*p = minimum(old, v) minimum matches the behavior of llvm.minimum.
@ Min
*p = old <signed v ? old : v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ FMaximum
*p = maximum(old, v) maximum matches the behavior of llvm.maximum.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ UMax
*p = old >unsigned v ? old : v
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
This class holds the attributes for a particular argument, parameter, function, or return value.
LLVM_ABI AttributeSet addAttributes(LLVMContext &C, AttributeSet AS) const
Add attributes to the attribute set.
LLVM_ABI AttributeSet addAttribute(LLVMContext &C, Attribute::AttrKind Kind) const
Add an argument attribute.
LLVM Basic Block Representation.
LLVM_ABI void replaceSuccessorsPhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block's successors to refer to basic block New instead of basic bl...
iterator begin()
Instruction iterator methods.
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
reverse_iterator rbegin()
const Instruction & back() const
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
LLVM_ABI InstListType::const_iterator getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
LLVM_ABI const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Instruction & front() const
InstListType::reverse_iterator reverse_iterator
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
LLVM_ABI SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
InstListType::iterator iterator
Instruction iterators...
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
LLVM_ABI const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
LLVM_ABI void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Conditional or Unconditional Branch instruction.
unsigned getNumSuccessors() const
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
void setSuccessor(unsigned idx, BasicBlock *NewSucc)
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Value * getArgOperand(unsigned i) const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ ICMP_ULT
unsigned less than
@ ICMP_ULE
unsigned less or equal
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static LLVM_ABI Constant * getString(LLVMContext &Context, StringRef Initializer, bool AddNull=true)
This method constructs a CDS and initializes it with a text string.
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getTruncOrBitCast(Constant *C, Type *Ty)
static LLVM_ABI Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
static LLVM_ABI Constant * getSizeOf(Type *Ty)
getSizeOf constant expr - computes the (alloc) size of a type (in address-units, not bits) in a targe...
static LLVM_ABI Constant * getAddrSpaceCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
This is an important base class in LLVM.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
DILocalScope * getScope() const
Get the local scope for this variable.
DINodeArray getAnnotations() const
Subprogram description. Uses SubclassData1.
uint32_t getAlignInBits() const
StringRef getName() const
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Record of a variable value-assignment, aka a non instruction representation of the dbg....
Analysis pass which computes a DominatorTree.
LLVM_ABI DominatorTree run(Function &F, FunctionAnalysisManager &)
Run the analysis pass over a function and produce a dominator tree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Represents either an error or a value T.
Lightweight error class with error context and mandatory checking.
static ErrorSuccess success()
Create a success value.
Tagged union holding either a T or a Error.
Error takeError()
Take ownership of the stored error.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Type * getParamType(unsigned i) const
Parameter type accessors.
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
void addFnAttr(Attribute::AttrKind Kind)
Add function attributes to this function.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
const BasicBlock & getEntryBlock() const
FunctionType * getFunctionType() const
Returns the FunctionType for me.
void removeFromParent()
removeFromParent - This method unlinks 'this' from the containing module, but does not delete it.
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
AttributeList getAttributes() const
Return the attribute list for this Function.
const Function & getFunction() const
void setAttributes(AttributeList Attrs)
Set the attribute list for this Function.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
adds the attribute to the list of attributes for the given arg.
Function::iterator insert(Function::iterator Position, BasicBlock *BB)
Insert BB in the basic block list at Position.
Type * getReturnType() const
Returns the type of the ret val.
void setCallingConv(CallingConv::ID CC)
Argument * getArg(unsigned i) const
bool hasMetadata() const
Return true if this value has any metadata attached to it.
LLVM_ABI void addMetadata(unsigned KindID, MDNode &MD)
Add a metadata attachment.
LinkageTypes getLinkage() const
void setLinkage(LinkageTypes LT)
Module * getParent()
Get the module that this global value is contained inside of...
void setDSOLocal(bool Local)
PointerType * getType() const
Global values are always pointers.
@ HiddenVisibility
The GV is hidden.
@ ProtectedVisibility
The GV is protected.
void setVisibility(VisibilityTypes V)
LinkageTypes
An enumeration for the kinds of linkage for global values.
@ PrivateLinkage
Like Internal, but omit from symbol table.
@ CommonLinkage
Tentative definitions.
@ InternalLinkage
Rename collisions when linking (static functions).
@ WeakODRLinkage
Same, but only replaced by something equivalent.
@ WeakAnyLinkage
Keep one copy of named function when linking (weak)
@ AppendingLinkage
Special purpose, only applies to global arrays.
@ LinkOnceODRLinkage
Same, but only replaced by something equivalent.
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
LLVM_ABI void setInitializer(Constant *InitVal)
setInitializer - Sets the initializer for this global variable, removing any existing initializer if ...
InsertPoint - A saved insertion point.
BasicBlock * getBlock() const
BasicBlock::iterator getPoint() const
Common base class shared among various IRBuilders.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
LLVM_ABI const DebugLoc & getStableDebugLoc() const
Fetch the debug location for this node, unless this is a debug intrinsic, in which case fetch the deb...
LLVM_ABI void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI void moveBeforePreserving(InstListType::iterator MovePos)
Perform a moveBefore operation, while signalling that the caller intends to preserve the original ord...
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void insertAfter(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately after the specified instruction.
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Value * getPointerOperand()
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
Align getAlign() const
Return the alignment of the access that is being performed.
Analysis pass that exposes the LoopInfo for a function.
LLVM_ABI LoopInfo run(Function &F, FunctionAnalysisManager &AM)
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
This class represents a loop nest and can be used to query its properties.
Represents a single loop in the control flow graph.
LLVM_ABI MDNode * createCallbackEncoding(unsigned CalleeArgNo, ArrayRef< int > Arguments, bool VarArgsArePassed)
Return metadata describing a callback (see llvm::AbstractCallSite).
LLVM_ABI void replaceOperandWith(unsigned I, Metadata *New)
Replace a specific operand.
static MDTuple * getDistinct(LLVMContext &Context, ArrayRef< Metadata * > MDs)
ArrayRef< MDOperand > operands() const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
This class implements a map that also provides access to all stored values in a deterministic order.
A Module instance is used to store all the information related to an LLVM module.
const Triple & getTargetTriple() const
Get the target triple which is a string describing the target host.
LLVMContext & getContext() const
Get the global data context.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
iterator_range< op_iterator > operands()
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Pseudo-analysis pass that exposes the PassInstrumentation to pass managers.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Analysis pass that exposes the ScalarEvolution for a function.
LLVM_ABI ScalarEvolution run(Function &F, FunctionAnalysisManager &AM)
The main scalar evolution driver.
A vector that has set insertion semantics.
bool remove_if(UnaryPredicate P)
Remove items from the set vector based on a predicate function.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
bool remove_if(UnaryPredicate P)
Remove elements that match the given predicate.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
void append(StringRef RHS)
Append from a StringRef.
StringRef str() const
Explicit conversion to StringRef.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAlignment(Align Align)
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
ValueTy lookup(StringRef Key) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
StringRef - Represent a constant reference to a string, i.e.
std::string str() const
str - Get the contents as an std::string.
constexpr bool empty() const
empty - Check if the string is empty.
constexpr size_t size() const
size - Get the string size.
size_t count(char C) const
Return the number of occurrences of C in the string.
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
StringRef drop_back(size_t N=1) const
Return a StringRef equal to 'this' but with the last N elements dropped.
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
static LLVM_ABI StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
Type * getElementType(unsigned N) const
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
Analysis pass providing the TargetTransformInfo.
LLVM_ABI Result run(const Function &F, FunctionAnalysisManager &)
TargetTransformInfo Result
Analysis pass providing the TargetLibraryInfo.
Target - Wrapper for Target specific information.
TargetMachine * createTargetMachine(const Triple &TT, StringRef CPU, StringRef Features, const TargetOptions &Options, std::optional< Reloc::Model > RM, std::optional< CodeModel::Model > CM=std::nullopt, CodeGenOptLevel OL=CodeGenOptLevel::Default, bool JIT=false) const
createTargetMachine - Create a target specific machine implementation for the specified Triple.
Triple - Helper class for working with autoconf configuration names.
bool isPPC() const
Tests whether the target is PowerPC (32- or 64-bit LE or BE).
bool isX86() const
Tests whether the target is x86 (32- or 64-bit).
ArchType getArch() const
Get the parsed architecture type of this triple.
bool isWasm() const
Tests whether the target is wasm (32- and 64-bit).
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
LLVM_ABI unsigned getIntegerBitWidth() const
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
bool isStructTy() const
True if this is an instance of StructType.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isVoidTy() const
Return true if this is 'void'.
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
This function has undefined behavior.
Produce an estimate of the unrolled cost of the specified loop.
LLVM_ABI bool canUnroll() const
Whether it is legal to unroll this loop.
uint64_t getRolledLoopSize() const
A Use represents the edge between a Value definition and its users.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
LLVM_ABI bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
LLVM_ABI void replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)
Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...
LLVM_ABI User * getUniqueUndroppableUser()
Return true if there is exactly one unique user of this value that cannot be dropped (that user can h...
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
iterator_range< use_iterator > uses()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
A raw_ostream that writes to an SmallVector or SmallString.
The virtual file system interface.
llvm::ErrorOr< std::unique_ptr< llvm::MemoryBuffer > > getBufferForFile(const Twine &Name, int64_t FileSize=-1, bool RequiresNullTerminator=true, bool IsVolatile=false, bool IsText=true)
This is a convenience method that opens a file, gets its content and then closes the file.
virtual llvm::ErrorOr< Status > status(const Twine &Path)=0
Get the status of the entry at Path, if one exists.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
@ CE
Windows NT (Windows on ARM)
initializer< Ty > init(const Ty &Val)
@ Switch
The "resume-switch" lowering, where there are separate resume and destroy functions that are shared b...
LLVM_ABI GlobalVariable * emitOffloadingEntry(Module &M, object::OffloadKind Kind, Constant *Addr, StringRef Name, uint64_t Size, uint32_t Flags, uint64_t Data, Constant *AuxAddr=nullptr, StringRef SectionName="llvm_offload_entries")
Create an offloading section struct used to register this global at runtime.
OpenMPOffloadMappingFlags
Values for bit flags used to specify the mapping type for offloading.
@ OMP_MAP_PTR_AND_OBJ
The element being mapped is a pointer-pointee pair; both the pointer and the pointee should be mapped...
@ OMP_MAP_MEMBER_OF
The 16 MSBs of the flags indicate whether the entry is member of some struct/class.
@ OMP_DEVICEID_UNDEF
Device ID if the device was not defined, runtime should get it from environment variables in the spec...
IdentFlag
IDs for all omp runtime library ident_t flag encodings (see their defintion in openmp/runtime/src/kmp...
RuntimeFunction
IDs for all omp runtime library (RTL) functions.
constexpr const GV & getAMDGPUGridValues()
static constexpr GV SPIRVGridValues
For generic SPIR-V GPUs.
OMPDynGroupprivateFallbackType
The fallback types for the dyn_groupprivate clause.
@ Null
Return null pointer.
static constexpr GV NVPTXGridValues
For Nvidia GPUs.
Function * Kernel
Summary of a kernel (=entry point for target offloading).
WorksharingLoopType
A type of worksharing loop construct.
OMPAtomicCompareOp
Atomic compare operations. Currently OpenMP only supports ==, >, and <.
NodeAddr< PhiNode * > Phi
NodeAddr< FuncNode * > Func
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
hash_code hash_value(const FixedPointSemantics &Val)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI Expected< std::unique_ptr< Module > > parseBitcodeFile(MemoryBufferRef Buffer, LLVMContext &Context, ParserCallbacks Callbacks={})
Read the specified bitcode file, returning the module.
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
LLVM_ABI BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, bool MapAtoms=true)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
unsigned getPointerAddressSpace(const Type *T)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
auto successors(const MachineBasicBlock *BB)
testing::Matcher< const detail::ErrorHolder & > Failed()
constexpr from_range_t from_range
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
std::string utostr(uint64_t X, bool isNeg=false)
ErrorOr< T > expectedToErrorOrAndEmitErrors(LLVMContext &Ctx, Expected< T > Val)
bool isa_and_nonnull(const Y &Val)
LLVM_ABI bool convertUsersOfConstantsToInstructions(ArrayRef< Constant * > Consts, Function *RestrictToFunc=nullptr, bool RemoveDeadConstants=true, bool IncludeSelf=false)
Replace constant expressions users of the given constants with instructions.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
auto reverse(ContainerTy &&C)
TargetTransformInfo::PeelingPreferences gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, std::optional< bool > UserAllowPeeling, std::optional< bool > UserAllowProfileBasedPeeling, bool UnrollingSpecficValues=false)
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
FunctionAddr VTableAddr Count
CodeGenOptLevel
Code generation optimization level.
LLVM_ABI bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, AssumptionCache *AC, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, OptimizationRemarkEmitter *ORE, unsigned TripCount, unsigned MaxTripCount, bool MaxOrZero, unsigned TripMultiple, const UnrollCostEstimator &UCE, TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound)
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ LLVM_MARK_AS_BITMASK_ENUM
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
LLVM_ABI bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
FunctionAddr VTableAddr Next
DWARFExpression::Operation Op
LLVM_ABI void remapInstructionsInBlocks(ArrayRef< BasicBlock * > Blocks, ValueToValueMapTy &VMap)
Remaps instructions in Blocks using the mapping in VMap.
LLVM_ABI TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, llvm::OptimizationRemarkEmitter &ORE, int OptLevel, std::optional< unsigned > UserThreshold, std::optional< unsigned > UserCount, std::optional< bool > UserAllowPartial, std::optional< bool > UserRuntime, std::optional< bool > UserUpperBound, std::optional< unsigned > UserFullUnrollMaxCount)
Gather the various unrolling parameters based on the defaults, compiler flags, TTI overrides and user...
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto predecessors(const MachineBasicBlock *BB)
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
LLVM_ABI Constant * ConstantFoldInsertValueInstruction(Constant *Agg, Constant *Val, ArrayRef< unsigned > Idxs)
Attempt to constant fold an insertvalue instruction with the specified operands and indices.
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI void DeleteDeadBlocks(ArrayRef< BasicBlock * > BBs, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified blocks from BB.
bool to_integer(StringRef S, N &Num, unsigned Base=0)
Convert the string S to an integer of the specified type using the radix Base. If Base is 0,...
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
This struct is a compact representation of a valid (non-zero power of two) alignment.
static LLVM_ABI void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
static const Target * lookupTarget(StringRef TripleStr, std::string &Error)
lookupTarget - Lookup a target based on a target triple.
Defines various target-specific GPU grid values that must be consistent between host RTL (plugin),...