66#define DEBUG_TYPE "openmp-ir-builder"
73 cl::desc(
"Use optimistic attributes describing "
74 "'as-if' properties of runtime calls."),
78 "openmp-ir-builder-unroll-threshold-factor",
cl::Hidden,
79 cl::desc(
"Factor for the unroll threshold to account for code "
80 "simplifications still taking place"),
91 if (!IP1.isSet() || !IP2.isSet())
93 return IP1.getBlock() == IP2.getBlock() && IP1.getPoint() == IP2.getPoint();
98 switch (SchedType & ~OMPScheduleType::MonotonicityMask) {
99 case OMPScheduleType::UnorderedStaticChunked:
100 case OMPScheduleType::UnorderedStatic:
101 case OMPScheduleType::UnorderedDynamicChunked:
102 case OMPScheduleType::UnorderedGuidedChunked:
103 case OMPScheduleType::UnorderedRuntime:
104 case OMPScheduleType::UnorderedAuto:
105 case OMPScheduleType::UnorderedTrapezoidal:
106 case OMPScheduleType::UnorderedGreedy:
107 case OMPScheduleType::UnorderedBalanced:
108 case OMPScheduleType::UnorderedGuidedIterativeChunked:
109 case OMPScheduleType::UnorderedGuidedAnalyticalChunked:
110 case OMPScheduleType::UnorderedSteal:
111 case OMPScheduleType::UnorderedStaticBalancedChunked:
112 case OMPScheduleType::UnorderedGuidedSimd:
113 case OMPScheduleType::UnorderedRuntimeSimd:
114 case OMPScheduleType::OrderedStaticChunked:
115 case OMPScheduleType::OrderedStatic:
116 case OMPScheduleType::OrderedDynamicChunked:
117 case OMPScheduleType::OrderedGuidedChunked:
118 case OMPScheduleType::OrderedRuntime:
119 case OMPScheduleType::OrderedAuto:
120 case OMPScheduleType::OrderdTrapezoidal:
121 case OMPScheduleType::NomergeUnorderedStaticChunked:
122 case OMPScheduleType::NomergeUnorderedStatic:
123 case OMPScheduleType::NomergeUnorderedDynamicChunked:
124 case OMPScheduleType::NomergeUnorderedGuidedChunked:
125 case OMPScheduleType::NomergeUnorderedRuntime:
126 case OMPScheduleType::NomergeUnorderedAuto:
127 case OMPScheduleType::NomergeUnorderedTrapezoidal:
128 case OMPScheduleType::NomergeUnorderedGreedy:
129 case OMPScheduleType::NomergeUnorderedBalanced:
130 case OMPScheduleType::NomergeUnorderedGuidedIterativeChunked:
131 case OMPScheduleType::NomergeUnorderedGuidedAnalyticalChunked:
132 case OMPScheduleType::NomergeUnorderedSteal:
133 case OMPScheduleType::NomergeOrderedStaticChunked:
134 case OMPScheduleType::NomergeOrderedStatic:
135 case OMPScheduleType::NomergeOrderedDynamicChunked:
136 case OMPScheduleType::NomergeOrderedGuidedChunked:
137 case OMPScheduleType::NomergeOrderedRuntime:
138 case OMPScheduleType::NomergeOrderedAuto:
139 case OMPScheduleType::NomergeOrderedTrapezoidal:
140 case OMPScheduleType::OrderedDistributeChunked:
141 case OMPScheduleType::OrderedDistribute:
149 SchedType & OMPScheduleType::MonotonicityMask;
150 if (MonotonicityFlags == OMPScheduleType::MonotonicityMask)
162 Builder.restoreIP(IP);
172 Kernel->getFnAttribute(
"target-features").getValueAsString();
173 if (Features.
count(
"+wavefrontsize64"))
188 bool HasSimdModifier,
bool HasDistScheduleChunks) {
190 switch (ClauseKind) {
191 case OMP_SCHEDULE_Default:
192 case OMP_SCHEDULE_Static:
193 return HasChunks ? OMPScheduleType::BaseStaticChunked
194 : OMPScheduleType::BaseStatic;
195 case OMP_SCHEDULE_Dynamic:
196 return OMPScheduleType::BaseDynamicChunked;
197 case OMP_SCHEDULE_Guided:
198 return HasSimdModifier ? OMPScheduleType::BaseGuidedSimd
199 : OMPScheduleType::BaseGuidedChunked;
200 case OMP_SCHEDULE_Auto:
202 case OMP_SCHEDULE_Runtime:
203 return HasSimdModifier ? OMPScheduleType::BaseRuntimeSimd
204 : OMPScheduleType::BaseRuntime;
205 case OMP_SCHEDULE_Distribute:
206 return HasDistScheduleChunks ? OMPScheduleType::BaseDistributeChunked
207 : OMPScheduleType::BaseDistribute;
215 bool HasOrderedClause) {
216 assert((BaseScheduleType & OMPScheduleType::ModifierMask) ==
217 OMPScheduleType::None &&
218 "Must not have ordering nor monotonicity flags already set");
221 ? OMPScheduleType::ModifierOrdered
222 : OMPScheduleType::ModifierUnordered;
223 OMPScheduleType OrderingScheduleType = BaseScheduleType | OrderingModifier;
226 if (OrderingScheduleType ==
227 (OMPScheduleType::BaseGuidedSimd | OMPScheduleType::ModifierOrdered))
228 return OMPScheduleType::OrderedGuidedChunked;
229 else if (OrderingScheduleType == (OMPScheduleType::BaseRuntimeSimd |
230 OMPScheduleType::ModifierOrdered))
231 return OMPScheduleType::OrderedRuntime;
233 return OrderingScheduleType;
239 bool HasSimdModifier,
bool HasMonotonic,
240 bool HasNonmonotonic,
bool HasOrderedClause) {
241 assert((ScheduleType & OMPScheduleType::MonotonicityMask) ==
242 OMPScheduleType::None &&
243 "Must not have monotonicity flags already set");
244 assert((!HasMonotonic || !HasNonmonotonic) &&
245 "Monotonic and Nonmonotonic are contradicting each other");
248 return ScheduleType | OMPScheduleType::ModifierMonotonic;
249 }
else if (HasNonmonotonic) {
250 return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
260 if ((BaseScheduleType == OMPScheduleType::BaseStatic) ||
261 (BaseScheduleType == OMPScheduleType::BaseStaticChunked) ||
267 return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
275 bool HasSimdModifier,
bool HasMonotonicModifier,
276 bool HasNonmonotonicModifier,
bool HasOrderedClause,
277 bool HasDistScheduleChunks) {
279 ClauseKind, HasChunks, HasSimdModifier, HasDistScheduleChunks);
283 OrderedSchedule, HasSimdModifier, HasMonotonicModifier,
284 HasNonmonotonicModifier, HasOrderedClause);
299 assert(!Br->isConditional() &&
300 "BB's terminator must be an unconditional branch (or degenerate)");
303 Br->setSuccessor(0,
Target);
308 NewBr->setDebugLoc(
DL);
314 "Target BB must not have PHI nodes");
334 NewBr->setDebugLoc(
DL);
342 spliceBB(Builder.saveIP(), New, CreateBranch,
DebugLoc);
346 Builder.SetInsertPoint(Old);
350 Builder.SetCurrentDebugLocation(
DebugLoc);
359 spliceBB(IP, New, CreateBranch,
DL);
360 New->replaceSuccessorsPhiUsesWith(Old, New);
369 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
371 Builder.SetInsertPoint(Builder.GetInsertBlock());
374 Builder.SetCurrentDebugLocation(
DebugLoc);
383 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
385 Builder.SetInsertPoint(Builder.GetInsertBlock());
388 Builder.SetCurrentDebugLocation(
DebugLoc);
395 return splitBB(Builder, CreateBranch, Old->
getName() + Suffix);
402 OpenMPIRBuilder::InsertPointTy OuterAllocaIP,
404 OpenMPIRBuilder::InsertPointTy InnerAllocaIP,
405 const Twine &Name =
"",
bool AsPtr =
true) {
406 Builder.restoreIP(OuterAllocaIP);
409 Builder.CreateAlloca(Builder.getInt32Ty(),
nullptr, Name +
".addr");
413 FakeVal = FakeValAddr;
416 Builder.CreateLoad(Builder.getInt32Ty(), FakeValAddr, Name +
".val");
421 Builder.restoreIP(InnerAllocaIP);
425 Builder.CreateLoad(Builder.getInt32Ty(), FakeVal, Name +
".use");
441enum OpenMPOffloadingRequiresDirFlags {
443 OMP_REQ_UNDEFINED = 0x000,
445 OMP_REQ_NONE = 0x001,
447 OMP_REQ_REVERSE_OFFLOAD = 0x002,
449 OMP_REQ_UNIFIED_ADDRESS = 0x004,
451 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008,
453 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010,
459OpenMPIRBuilderConfig::OpenMPIRBuilderConfig()
460 : RequiresFlags(OMP_REQ_UNDEFINED) {}
462OpenMPIRBuilderConfig::OpenMPIRBuilderConfig(
463 bool IsTargetDevice,
bool IsGPU,
bool OpenMPOffloadMandatory,
464 bool HasRequiresReverseOffload,
bool HasRequiresUnifiedAddress,
465 bool HasRequiresUnifiedSharedMemory,
bool HasRequiresDynamicAllocators)
466 : IsTargetDevice(IsTargetDevice), IsGPU(IsGPU),
467 OpenMPOffloadMandatory(OpenMPOffloadMandatory),
468 RequiresFlags(OMP_REQ_UNDEFINED) {
469 if (HasRequiresReverseOffload)
470 RequiresFlags |= OMP_REQ_REVERSE_OFFLOAD;
471 if (HasRequiresUnifiedAddress)
472 RequiresFlags |= OMP_REQ_UNIFIED_ADDRESS;
473 if (HasRequiresUnifiedSharedMemory)
474 RequiresFlags |= OMP_REQ_UNIFIED_SHARED_MEMORY;
475 if (HasRequiresDynamicAllocators)
476 RequiresFlags |= OMP_REQ_DYNAMIC_ALLOCATORS;
479bool OpenMPIRBuilderConfig::hasRequiresReverseOffload()
const {
480 return RequiresFlags & OMP_REQ_REVERSE_OFFLOAD;
483bool OpenMPIRBuilderConfig::hasRequiresUnifiedAddress()
const {
484 return RequiresFlags & OMP_REQ_UNIFIED_ADDRESS;
487bool OpenMPIRBuilderConfig::hasRequiresUnifiedSharedMemory()
const {
488 return RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY;
491bool OpenMPIRBuilderConfig::hasRequiresDynamicAllocators()
const {
492 return RequiresFlags & OMP_REQ_DYNAMIC_ALLOCATORS;
495int64_t OpenMPIRBuilderConfig::getRequiresFlags()
const {
496 return hasRequiresFlags() ? RequiresFlags
497 :
static_cast<int64_t
>(OMP_REQ_NONE);
500void OpenMPIRBuilderConfig::setHasRequiresReverseOffload(
bool Value) {
502 RequiresFlags |= OMP_REQ_REVERSE_OFFLOAD;
504 RequiresFlags &= ~OMP_REQ_REVERSE_OFFLOAD;
507void OpenMPIRBuilderConfig::setHasRequiresUnifiedAddress(
bool Value) {
509 RequiresFlags |= OMP_REQ_UNIFIED_ADDRESS;
511 RequiresFlags &= ~OMP_REQ_UNIFIED_ADDRESS;
514void OpenMPIRBuilderConfig::setHasRequiresUnifiedSharedMemory(
bool Value) {
516 RequiresFlags |= OMP_REQ_UNIFIED_SHARED_MEMORY;
518 RequiresFlags &= ~OMP_REQ_UNIFIED_SHARED_MEMORY;
521void OpenMPIRBuilderConfig::setHasRequiresDynamicAllocators(
bool Value) {
523 RequiresFlags |= OMP_REQ_DYNAMIC_ALLOCATORS;
525 RequiresFlags &= ~OMP_REQ_DYNAMIC_ALLOCATORS;
532void OpenMPIRBuilder::getKernelArgsVector(TargetKernelArgs &KernelArgs,
536 Value *PointerNum = Builder.getInt32(KernelArgs.NumTargetItems);
538 constexpr size_t MaxDim = 3;
541 Value *HasNoWaitFlag = Builder.getInt64(KernelArgs.HasNoWait);
543 Value *DynCGroupMemFallbackFlag =
544 Builder.getInt64(
static_cast<uint64_t>(KernelArgs.DynCGroupMemFallback));
545 DynCGroupMemFallbackFlag = Builder.CreateShl(DynCGroupMemFallbackFlag, 2);
546 Value *Flags = Builder.CreateOr(HasNoWaitFlag, DynCGroupMemFallbackFlag);
548 assert(!KernelArgs.NumTeams.empty() && !KernelArgs.NumThreads.empty());
551 Builder.CreateInsertValue(ZeroArray, KernelArgs.NumTeams[0], {0});
552 Value *NumThreads3D =
553 Builder.CreateInsertValue(ZeroArray, KernelArgs.NumThreads[0], {0});
555 seq<unsigned>(1, std::min(KernelArgs.NumTeams.size(), MaxDim)))
557 Builder.CreateInsertValue(NumTeams3D, KernelArgs.NumTeams[
I], {I});
559 seq<unsigned>(1, std::min(KernelArgs.NumThreads.size(), MaxDim)))
561 Builder.CreateInsertValue(NumThreads3D, KernelArgs.NumThreads[
I], {I});
563 ArgsVector = {Version,
565 KernelArgs.RTArgs.BasePointersArray,
566 KernelArgs.RTArgs.PointersArray,
567 KernelArgs.RTArgs.SizesArray,
568 KernelArgs.RTArgs.MapTypesArray,
569 KernelArgs.RTArgs.MapNamesArray,
570 KernelArgs.RTArgs.MappersArray,
571 KernelArgs.NumIterations,
575 KernelArgs.DynCGroupMem};
583 auto FnAttrs =
Attrs.getFnAttrs();
584 auto RetAttrs =
Attrs.getRetAttrs();
586 for (
size_t ArgNo = 0; ArgNo < Fn.
arg_size(); ++ArgNo)
591 bool Param =
true) ->
void {
592 bool HasSignExt = AS.hasAttribute(Attribute::SExt);
593 bool HasZeroExt = AS.hasAttribute(Attribute::ZExt);
594 if (HasSignExt || HasZeroExt) {
595 assert(AS.getNumAttributes() == 1 &&
596 "Currently not handling extension attr combined with others.");
598 if (
auto AK = TargetLibraryInfo::getExtAttrForI32Param(
T, HasSignExt))
601 TargetLibraryInfo::getExtAttrForI32Return(
T, HasSignExt))
608#define OMP_ATTRS_SET(VarName, AttrSet) AttributeSet VarName = AttrSet;
609#include "llvm/Frontend/OpenMP/OMPKinds.def"
613#define OMP_RTL_ATTRS(Enum, FnAttrSet, RetAttrSet, ArgAttrSets) \
615 FnAttrs = FnAttrs.addAttributes(Ctx, FnAttrSet); \
616 addAttrSet(RetAttrs, RetAttrSet, false); \
617 for (size_t ArgNo = 0; ArgNo < ArgAttrSets.size(); ++ArgNo) \
618 addAttrSet(ArgAttrs[ArgNo], ArgAttrSets[ArgNo]); \
619 Fn.setAttributes(AttributeList::get(Ctx, FnAttrs, RetAttrs, ArgAttrs)); \
621#include "llvm/Frontend/OpenMP/OMPKinds.def"
635#define OMP_RTL(Enum, Str, IsVarArg, ReturnType, ...) \
637 FnTy = FunctionType::get(ReturnType, ArrayRef<Type *>{__VA_ARGS__}, \
639 Fn = M.getFunction(Str); \
641#include "llvm/Frontend/OpenMP/OMPKinds.def"
647#define OMP_RTL(Enum, Str, ...) \
649 Fn = Function::Create(FnTy, GlobalValue::ExternalLinkage, Str, M); \
651#include "llvm/Frontend/OpenMP/OMPKinds.def"
655 if (FnID == OMPRTL___kmpc_fork_call || FnID == OMPRTL___kmpc_fork_teams) {
665 LLVMContext::MD_callback,
667 2, {-1, -1},
true)}));
673 addAttributes(FnID, *Fn);
680 assert(Fn &&
"Failed to create OpenMP runtime function");
686OpenMPIRBuilder::FinalizationInfo::getFiniBB(
IRBuilderBase &Builder) {
691 Builder.SetInsertPoint(FiniBB);
693 if (
Error Err = FiniCB(Builder.saveIP()))
703 FiniBB = OtherFiniBB;
705 Builder.SetInsertPoint(FiniBB->getFirstNonPHIIt());
706 if (
Error Err = FiniCB(Builder.saveIP()))
713 auto EndIt = FiniBB->end();
714 if (FiniBB->size() >= 1)
715 if (
auto Prev = std::prev(EndIt); Prev->isTerminator())
720 FiniBB->replaceAllUsesWith(OtherFiniBB);
721 FiniBB->eraseFromParent();
722 FiniBB = OtherFiniBB;
729 assert(Fn &&
"Failed to create OpenMP runtime function pointer");
736 CallInst *
Call = Builder.CreateCall(Callee, Args, Name);
737 Call->setCallingConv(Config.getRuntimeCC());
741void OpenMPIRBuilder::initialize() { initializeTypes(M); }
752 for (
auto Inst =
Block->getReverseIterator()->begin();
753 Inst !=
Block->getReverseIterator()->end();) {
766void OpenMPIRBuilder::finalize(
Function *Fn) {
770 for (OutlineInfo &OI : OutlineInfos) {
773 if (Fn && OI.getFunction() != Fn) {
778 ParallelRegionBlockSet.
clear();
780 OI.collectBlocks(ParallelRegionBlockSet, Blocks);
790 bool ArgsInZeroAddressSpace = Config.isTargetDevice();
799 ".omp_par", ArgsInZeroAddressSpace);
803 <<
" Exit: " << OI.ExitBB->getName() <<
"\n");
804 assert(Extractor.isEligible() &&
805 "Expected OpenMP outlining to be possible!");
807 for (
auto *V : OI.ExcludeArgsFromAggregate)
808 Extractor.excludeArgFromAggregate(V);
810 Function *OutlinedFn = Extractor.extractCodeRegion(CEAC);
814 if (TargetCpuAttr.isStringAttribute())
817 auto TargetFeaturesAttr = OuterFn->
getFnAttribute(
"target-features");
818 if (TargetFeaturesAttr.isStringAttribute())
819 OutlinedFn->
addFnAttr(TargetFeaturesAttr);
822 LLVM_DEBUG(
dbgs() <<
" Outlined function: " << *OutlinedFn <<
"\n");
824 "OpenMP outlined functions should not return a value!");
829 M.getFunctionList().insertAfter(OuterFn->
getIterator(), OutlinedFn);
836 assert(OI.EntryBB->getUniquePredecessor() == &ArtificialEntry);
843 "Expected instructions to add in the outlined region entry");
845 End = ArtificialEntry.
rend();
850 if (
I.isTerminator()) {
852 if (OI.EntryBB->getTerminator())
853 OI.EntryBB->getTerminator()->adoptDbgRecords(
854 &ArtificialEntry,
I.getIterator(),
false);
858 I.moveBeforePreserving(*OI.EntryBB, OI.EntryBB->getFirstInsertionPt());
861 OI.EntryBB->moveBefore(&ArtificialEntry);
868 if (OI.PostOutlineCB)
869 OI.PostOutlineCB(*OutlinedFn);
873 OutlineInfos = std::move(DeferredOutlines);
894 for (
Function *
F : ConstantAllocaRaiseCandidates)
897 EmitMetadataErrorReportFunctionTy &&ErrorReportFn =
898 [](EmitMetadataErrorKind Kind,
899 const TargetRegionEntryInfo &EntryInfo) ->
void {
900 errs() <<
"Error of kind: " << Kind
901 <<
" when emitting offload entries and metadata during "
902 "OMPIRBuilder finalization \n";
905 if (!OffloadInfoManager.empty())
906 createOffloadEntriesAndInfoMetadata(ErrorReportFn);
908 if (Config.EmitLLVMUsedMetaInfo.value_or(
false)) {
909 std::vector<WeakTrackingVH> LLVMCompilerUsed = {
910 M.getGlobalVariable(
"__openmp_nvptx_data_transfer_temporary_storage")};
911 emitUsed(
"llvm.compiler.used", LLVMCompilerUsed);
917bool OpenMPIRBuilder::isFinalized() {
return IsFinalized; }
919OpenMPIRBuilder::~OpenMPIRBuilder() {
920 assert(OutlineInfos.empty() &&
"There must be no outstanding outlinings");
928 ConstantInt::get(I32Ty,
Value), Name);
940 UsedArray.
resize(List.size());
941 for (
unsigned I = 0,
E = List.size();
I !=
E; ++
I)
945 if (UsedArray.
empty())
952 GV->setSection(
"llvm.metadata");
956OpenMPIRBuilder::emitKernelExecutionMode(
StringRef KernelName,
958 auto *Int8Ty = Builder.getInt8Ty();
961 ConstantInt::get(Int8Ty,
Mode),
Twine(KernelName,
"_exec_mode"));
969 unsigned Reserve2Flags) {
971 LocFlags |= OMP_IDENT_FLAG_KMPC;
974 IdentMap[{SrcLocStr,
uint64_t(LocFlags) << 31 | Reserve2Flags}];
978 ConstantInt::get(Int32,
uint32_t(LocFlags)),
979 ConstantInt::get(Int32, Reserve2Flags),
980 ConstantInt::get(Int32, SrcLocStrSize), SrcLocStr};
982 size_t SrcLocStrArgIdx = 4;
983 if (OpenMPIRBuilder::Ident->getElementType(SrcLocStrArgIdx)
987 SrcLocStr, OpenMPIRBuilder::Ident->getElementType(SrcLocStrArgIdx));
994 if (
GV.getValueType() == OpenMPIRBuilder::Ident &&
GV.hasInitializer())
995 if (
GV.getInitializer() == Initializer)
1000 M, OpenMPIRBuilder::Ident,
1003 M.getDataLayout().getDefaultGlobalsAddressSpace());
1015 SrcLocStrSize = LocStr.
size();
1016 Constant *&SrcLocStr = SrcLocStrMap[LocStr];
1024 if (
GV.isConstant() &&
GV.hasInitializer() &&
1025 GV.getInitializer() == Initializer)
1028 SrcLocStr = Builder.CreateGlobalString(
1029 LocStr,
"", M.getDataLayout().getDefaultGlobalsAddressSpace(),
1037 unsigned Line,
unsigned Column,
1043 Buffer.
append(FunctionName);
1045 Buffer.
append(std::to_string(Line));
1047 Buffer.
append(std::to_string(Column));
1050 return getOrCreateSrcLocStr(Buffer.
str(), SrcLocStrSize);
1054OpenMPIRBuilder::getOrCreateDefaultSrcLocStr(
uint32_t &SrcLocStrSize) {
1055 StringRef UnknownLoc =
";unknown;unknown;0;0;;";
1056 return getOrCreateSrcLocStr(UnknownLoc, SrcLocStrSize);
1064 return getOrCreateDefaultSrcLocStr(SrcLocStrSize);
1066 if (
DIFile *DIF = DIL->getFile())
1067 if (std::optional<StringRef> Source = DIF->getSource())
1072 return getOrCreateSrcLocStr(
Function, FileName, DIL->getLine(),
1073 DIL->getColumn(), SrcLocStrSize);
1076Constant *OpenMPIRBuilder::getOrCreateSrcLocStr(
const LocationDescription &
Loc,
1078 return getOrCreateSrcLocStr(
Loc.DL, SrcLocStrSize,
1079 Loc.IP.getBlock()->getParent());
1082Value *OpenMPIRBuilder::getOrCreateThreadID(
Value *Ident) {
1083 return createRuntimeFunctionCall(
1084 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num), Ident,
1085 "omp_global_thread_num");
1088OpenMPIRBuilder::InsertPointOrErrorTy
1089OpenMPIRBuilder::createBarrier(
const LocationDescription &
Loc,
Directive Kind,
1090 bool ForceSimpleCall,
bool CheckCancelFlag) {
1091 if (!updateToLocation(
Loc))
1100 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_FOR;
1103 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SECTIONS;
1106 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SINGLE;
1109 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_EXPL;
1112 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL;
1117 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1119 getOrCreateIdent(SrcLocStr, SrcLocStrSize, BarrierLocFlags),
1120 getOrCreateThreadID(getOrCreateIdent(SrcLocStr, SrcLocStrSize))};
1125 bool UseCancelBarrier =
1126 !ForceSimpleCall && isLastFinalizationInfoCancellable(OMPD_parallel);
1129 getOrCreateRuntimeFunctionPtr(UseCancelBarrier
1130 ? OMPRTL___kmpc_cancel_barrier
1131 : OMPRTL___kmpc_barrier),
1134 if (UseCancelBarrier && CheckCancelFlag)
1135 if (
Error Err = emitCancelationCheckImpl(Result, OMPD_parallel))
1138 return Builder.saveIP();
1141OpenMPIRBuilder::InsertPointOrErrorTy
1142OpenMPIRBuilder::createCancel(
const LocationDescription &
Loc,
1144 omp::Directive CanceledDirective) {
1145 if (!updateToLocation(
Loc))
1149 auto *UI = Builder.CreateUnreachable();
1157 Builder.SetInsertPoint(ElseTI);
1158 auto ElseIP = Builder.saveIP();
1160 InsertPointOrErrorTy IPOrErr = createCancellationPoint(
1161 LocationDescription{ElseIP,
Loc.DL}, CanceledDirective);
1166 Builder.SetInsertPoint(ThenTI);
1168 Value *CancelKind =
nullptr;
1169 switch (CanceledDirective) {
1170#define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
1171 case DirectiveEnum: \
1172 CancelKind = Builder.getInt32(Value); \
1174#include "llvm/Frontend/OpenMP/OMPKinds.def"
1180 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1181 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1182 Value *
Args[] = {Ident, getOrCreateThreadID(Ident), CancelKind};
1184 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancel), Args);
1187 if (
Error Err = emitCancelationCheckImpl(Result, CanceledDirective))
1191 Builder.SetInsertPoint(UI->getParent());
1192 UI->eraseFromParent();
1194 return Builder.saveIP();
1197OpenMPIRBuilder::InsertPointOrErrorTy
1198OpenMPIRBuilder::createCancellationPoint(
const LocationDescription &
Loc,
1199 omp::Directive CanceledDirective) {
1200 if (!updateToLocation(
Loc))
1204 auto *UI = Builder.CreateUnreachable();
1205 Builder.SetInsertPoint(UI);
1207 Value *CancelKind =
nullptr;
1208 switch (CanceledDirective) {
1209#define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
1210 case DirectiveEnum: \
1211 CancelKind = Builder.getInt32(Value); \
1213#include "llvm/Frontend/OpenMP/OMPKinds.def"
1219 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1220 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1221 Value *
Args[] = {Ident, getOrCreateThreadID(Ident), CancelKind};
1223 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancellationpoint), Args);
1226 if (
Error Err = emitCancelationCheckImpl(Result, CanceledDirective))
1230 Builder.SetInsertPoint(UI->getParent());
1231 UI->eraseFromParent();
1233 return Builder.saveIP();
1236OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitTargetKernel(
1237 const LocationDescription &
Loc, InsertPointTy AllocaIP,
Value *&Return,
1240 if (!updateToLocation(
Loc))
1243 Builder.restoreIP(AllocaIP);
1244 auto *KernelArgsPtr =
1245 Builder.CreateAlloca(OpenMPIRBuilder::KernelArgs,
nullptr,
"kernel_args");
1246 updateToLocation(
Loc);
1250 Builder.CreateStructGEP(OpenMPIRBuilder::KernelArgs, KernelArgsPtr,
I);
1251 Builder.CreateAlignedStore(
1253 M.getDataLayout().getPrefTypeAlign(KernelArgs[
I]->getType()));
1257 NumThreads, HostPtr, KernelArgsPtr};
1259 Return = createRuntimeFunctionCall(
1260 getOrCreateRuntimeFunction(M, OMPRTL___tgt_target_kernel),
1263 return Builder.saveIP();
1266OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitKernelLaunch(
1267 const LocationDescription &
Loc,
Value *OutlinedFnID,
1268 EmitFallbackCallbackTy EmitTargetCallFallbackCB, TargetKernelArgs &Args,
1269 Value *DeviceID,
Value *RTLoc, InsertPointTy AllocaIP) {
1271 if (!updateToLocation(
Loc))
1284 assert(OutlinedFnID &&
"Invalid outlined function ID!");
1288 Value *Return =
nullptr;
1292 getKernelArgsVector(Args, Builder, ArgsVector);
1307 Builder.restoreIP(emitTargetKernel(
1308 Builder, AllocaIP, Return, RTLoc, DeviceID,
Args.NumTeams.front(),
1309 Args.NumThreads.front(), OutlinedFnID, ArgsVector));
1316 Builder.CreateCondBr(
Failed, OffloadFailedBlock, OffloadContBlock);
1318 auto CurFn = Builder.GetInsertBlock()->getParent();
1319 emitBlock(OffloadFailedBlock, CurFn);
1320 InsertPointOrErrorTy AfterIP = EmitTargetCallFallbackCB(Builder.saveIP());
1322 return AfterIP.takeError();
1323 Builder.restoreIP(*AfterIP);
1324 emitBranch(OffloadContBlock);
1325 emitBlock(OffloadContBlock, CurFn,
true);
1326 return Builder.saveIP();
1329Error OpenMPIRBuilder::emitCancelationCheckImpl(
1330 Value *CancelFlag, omp::Directive CanceledDirective) {
1331 assert(isLastFinalizationInfoCancellable(CanceledDirective) &&
1332 "Unexpected cancellation!");
1337 if (Builder.GetInsertPoint() == BB->
end()) {
1343 NonCancellationBlock =
SplitBlock(BB, &*Builder.GetInsertPoint());
1345 Builder.SetInsertPoint(BB);
1351 Value *
Cmp = Builder.CreateIsNull(CancelFlag);
1352 Builder.CreateCondBr(Cmp, NonCancellationBlock, CancellationBlock,
1357 auto &FI = FinalizationStack.back();
1361 Builder.SetInsertPoint(CancellationBlock);
1362 Builder.CreateBr(*FiniBBOrErr);
1365 Builder.SetInsertPoint(NonCancellationBlock, NonCancellationBlock->
begin());
1384 OutlinedFn.
addFnAttr(Attribute::NoUnwind);
1387 "Expected at least tid and bounded tid as arguments");
1388 unsigned NumCapturedVars = OutlinedFn.
arg_size() - 2;
1391 assert(CI &&
"Expected call instruction to outlined function");
1392 CI->
getParent()->setName(
"omp_parallel");
1394 Builder.SetInsertPoint(CI);
1395 Type *PtrTy = OMPIRBuilder->VoidPtr;
1399 OpenMPIRBuilder ::InsertPointTy CurrentIP = Builder.saveIP();
1403 Value *Args = ArgsAlloca;
1407 Args = Builder.CreatePointerCast(ArgsAlloca, PtrTy);
1408 Builder.restoreIP(CurrentIP);
1411 for (
unsigned Idx = 0; Idx < NumCapturedVars; Idx++) {
1413 Value *StoreAddress = Builder.CreateConstInBoundsGEP2_64(
1415 Builder.CreateStore(V, StoreAddress);
1419 IfCondition ? Builder.CreateSExtOrTrunc(IfCondition, OMPIRBuilder->Int32)
1420 : Builder.getInt32(1);
1423 Value *Parallel60CallArgs[] = {
1427 NumThreads ? NumThreads : Builder.getInt32(-1),
1428 Builder.getInt32(-1),
1432 Builder.getInt64(NumCapturedVars),
1433 Builder.getInt32(0)};
1436 OMPIRBuilder->getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_parallel_60);
1438 OMPIRBuilder->createRuntimeFunctionCall(RTLFn, Parallel60CallArgs);
1441 << *Builder.GetInsertBlock()->getParent() <<
"\n");
1444 Builder.SetInsertPoint(PrivTID);
1446 Builder.CreateStore(Builder.CreateLoad(OMPIRBuilder->Int32, OutlinedAI),
1453 I->eraseFromParent();
1470 OMPIRBuilder->getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_call_if);
1473 OMPIRBuilder->getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_call);
1476 if (!
F->hasMetadata(LLVMContext::MD_callback)) {
1484 F->addMetadata(LLVMContext::MD_callback,
1493 OutlinedFn.
addFnAttr(Attribute::NoUnwind);
1496 "Expected at least tid and bounded tid as arguments");
1497 unsigned NumCapturedVars = OutlinedFn.
arg_size() - 2;
1500 CI->
getParent()->setName(
"omp_parallel");
1501 Builder.SetInsertPoint(CI);
1504 Value *ForkCallArgs[] = {Ident, Builder.getInt32(NumCapturedVars),
1508 RealArgs.
append(std::begin(ForkCallArgs), std::end(ForkCallArgs));
1510 Value *
Cond = Builder.CreateSExtOrTrunc(IfCondition, OMPIRBuilder->Int32);
1517 auto PtrTy = OMPIRBuilder->VoidPtr;
1518 if (IfCondition && NumCapturedVars == 0) {
1523 OMPIRBuilder->createRuntimeFunctionCall(RTLFn, RealArgs);
1526 << *Builder.GetInsertBlock()->getParent() <<
"\n");
1529 Builder.SetInsertPoint(PrivTID);
1531 Builder.CreateStore(Builder.CreateLoad(OMPIRBuilder->Int32, OutlinedAI),
1538 I->eraseFromParent();
1542OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createParallel(
1543 const LocationDescription &
Loc, InsertPointTy OuterAllocaIP,
1544 BodyGenCallbackTy BodyGenCB, PrivatizeCallbackTy PrivCB,
1545 FinalizeCallbackTy FiniCB,
Value *IfCondition,
Value *NumThreads,
1546 omp::ProcBindKind ProcBind,
bool IsCancellable) {
1549 if (!updateToLocation(
Loc))
1553 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1554 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1555 Value *ThreadID = getOrCreateThreadID(Ident);
1561 bool ArgsInZeroAddressSpace = Config.isTargetDevice();
1565 if (NumThreads && !Config.isTargetDevice()) {
1568 Builder.CreateIntCast(NumThreads, Int32,
false)};
1569 createRuntimeFunctionCall(
1570 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_num_threads), Args);
1573 if (ProcBind != OMP_PROC_BIND_default) {
1577 ConstantInt::get(Int32,
unsigned(ProcBind),
true)};
1578 createRuntimeFunctionCall(
1579 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_proc_bind), Args);
1582 BasicBlock *InsertBB = Builder.GetInsertBlock();
1587 BasicBlock *OuterAllocaBlock = OuterAllocaIP.getBlock();
1595 InsertPointTy NewOuter(OuterAllocaBlock, OuterAllocaBlock->
begin());
1596 Builder.restoreIP(NewOuter);
1597 AllocaInst *TIDAddrAlloca = Builder.CreateAlloca(Int32,
nullptr,
"tid.addr");
1599 Builder.CreateAlloca(Int32,
nullptr,
"zero.addr");
1602 if (ArgsInZeroAddressSpace && M.getDataLayout().getAllocaAddrSpace() != 0) {
1605 TIDAddrAlloca, PointerType ::get(M.getContext(), 0),
"tid.addr.ascast");
1609 PointerType ::get(M.getContext(), 0),
1610 "zero.addr.ascast");
1631 auto FiniCBWrapper = [&](InsertPointTy IP) {
1636 Builder.restoreIP(IP);
1638 IP = InsertPointTy(
I->getParent(),
I->getIterator());
1642 "Unexpected insertion point for finalization call!");
1646 FinalizationStack.push_back({FiniCBWrapper, OMPD_parallel, IsCancellable});
1651 InsertPointTy InnerAllocaIP = Builder.saveIP();
1654 Builder.CreateAlloca(Int32,
nullptr,
"tid.addr.local");
1655 Instruction *PrivTID = Builder.CreateLoad(Int32, PrivTIDAddr,
"tid");
1658 ToBeDeleted.
push_back(Builder.CreateLoad(Int32, TIDAddr,
"tid.addr.use"));
1660 Builder.CreateLoad(Int32, ZeroAddr,
"zero.addr.use");
1678 LLVM_DEBUG(
dbgs() <<
"Before body codegen: " << *OuterFn <<
"\n");
1681 assert(BodyGenCB &&
"Expected body generation callback!");
1682 InsertPointTy CodeGenIP(PRegBodyBB, PRegBodyBB->
begin());
1683 if (
Error Err = BodyGenCB(InnerAllocaIP, CodeGenIP))
1686 LLVM_DEBUG(
dbgs() <<
"After body codegen: " << *OuterFn <<
"\n");
1689 if (Config.isTargetDevice()) {
1691 OI.PostOutlineCB = [=, ToBeDeletedVec =
1692 std::move(ToBeDeleted)](
Function &OutlinedFn) {
1694 IfCondition, NumThreads, PrivTID, PrivTIDAddr,
1695 ThreadID, ToBeDeletedVec);
1699 OI.PostOutlineCB = [=, ToBeDeletedVec =
1700 std::move(ToBeDeleted)](
Function &OutlinedFn) {
1702 PrivTID, PrivTIDAddr, ToBeDeletedVec);
1706 OI.OuterAllocaBB = OuterAllocaBlock;
1707 OI.EntryBB = PRegEntryBB;
1708 OI.ExitBB = PRegExitBB;
1712 OI.collectBlocks(ParallelRegionBlockSet, Blocks);
1723 ".omp_par", ArgsInZeroAddressSpace);
1728 Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
1730 Extractor.findInputsOutputs(Inputs, Outputs, SinkingCands,
1735 return GV->getValueType() == OpenMPIRBuilder::Ident;
1740 LLVM_DEBUG(
dbgs() <<
"Before privatization: " << *OuterFn <<
"\n");
1743 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num);
1746 if (&V == TIDAddr || &V == ZeroAddr) {
1747 OI.ExcludeArgsFromAggregate.push_back(&V);
1752 for (
Use &U : V.uses())
1754 if (ParallelRegionBlockSet.
count(UserI->getParent()))
1764 if (!V.getType()->isPointerTy()) {
1768 Builder.restoreIP(OuterAllocaIP);
1770 Builder.CreateAlloca(V.getType(),
nullptr, V.getName() +
".reloaded");
1774 Builder.SetInsertPoint(InsertBB,
1776 Builder.CreateStore(&V, Ptr);
1779 Builder.restoreIP(InnerAllocaIP);
1780 Inner = Builder.CreateLoad(V.getType(), Ptr);
1783 Value *ReplacementValue =
nullptr;
1786 ReplacementValue = PrivTID;
1788 InsertPointOrErrorTy AfterIP =
1789 PrivCB(InnerAllocaIP, Builder.saveIP(), V, *Inner, ReplacementValue);
1791 return AfterIP.takeError();
1792 Builder.restoreIP(*AfterIP);
1794 InnerAllocaIP.getBlock(),
1795 InnerAllocaIP.getBlock()->getTerminator()->getIterator()};
1797 assert(ReplacementValue &&
1798 "Expected copy/create callback to set replacement value!");
1799 if (ReplacementValue == &V)
1804 UPtr->set(ReplacementValue);
1829 for (
Value *Output : Outputs)
1832 assert(Outputs.empty() &&
1833 "OpenMP outlining should not produce live-out values!");
1835 LLVM_DEBUG(
dbgs() <<
"After privatization: " << *OuterFn <<
"\n");
1837 for (
auto *BB : Blocks)
1844 auto FiniInfo = FinalizationStack.pop_back_val();
1846 assert(FiniInfo.DK == OMPD_parallel &&
1847 "Unexpected finalization stack state!");
1851 InsertPointTy PreFiniIP(PRegPreFiniBB, PRegPreFiniTI->
getIterator());
1857 Builder.restoreIP(PreFiniIP);
1858 Builder.CreateBr(*FiniBBOrErr);
1861 if (
Instruction *Term = Builder.GetInsertBlock()->getTerminator())
1862 Term->eraseFromParent();
1866 addOutlineInfo(std::move(OI));
1868 InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end());
1869 UI->eraseFromParent();
1874void OpenMPIRBuilder::emitFlush(
const LocationDescription &
Loc) {
1877 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1878 Value *
Args[] = {getOrCreateIdent(SrcLocStr, SrcLocStrSize)};
1880 createRuntimeFunctionCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_flush),
1884void OpenMPIRBuilder::createFlush(
const LocationDescription &
Loc) {
1885 if (!updateToLocation(
Loc))
1890void OpenMPIRBuilder::emitTaskwaitImpl(
const LocationDescription &
Loc) {
1894 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1895 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1896 Value *
Args[] = {Ident, getOrCreateThreadID(Ident)};
1899 createRuntimeFunctionCall(
1900 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskwait), Args);
1903void OpenMPIRBuilder::createTaskwait(
const LocationDescription &
Loc) {
1904 if (!updateToLocation(
Loc))
1906 emitTaskwaitImpl(
Loc);
1909void OpenMPIRBuilder::emitTaskyieldImpl(
const LocationDescription &
Loc) {
1912 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
1913 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
1915 Value *
Args[] = {Ident, getOrCreateThreadID(Ident), I32Null};
1917 createRuntimeFunctionCall(
1918 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskyield), Args);
1921void OpenMPIRBuilder::createTaskyield(
const LocationDescription &
Loc) {
1922 if (!updateToLocation(
Loc))
1924 emitTaskyieldImpl(
Loc);
1933 OpenMPIRBuilder &OMPBuilder,
1936 if (Dependencies.
empty())
1956 Type *DependInfo = OMPBuilder.DependInfo;
1957 Module &M = OMPBuilder.M;
1959 Value *DepArray =
nullptr;
1960 OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
1961 Builder.SetInsertPoint(
1962 OldIP.getBlock()->getParent()->getEntryBlock().getTerminator());
1965 DepArray = Builder.CreateAlloca(DepArrayTy,
nullptr,
".dep.arr.addr");
1967 Builder.restoreIP(OldIP);
1969 for (
const auto &[DepIdx, Dep] :
enumerate(Dependencies)) {
1971 Builder.CreateConstInBoundsGEP2_64(DepArrayTy, DepArray, 0, DepIdx);
1973 Value *Addr = Builder.CreateStructGEP(
1975 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr));
1976 Value *DepValPtr = Builder.CreatePtrToInt(Dep.DepVal, Builder.getInt64Ty());
1977 Builder.CreateStore(DepValPtr, Addr);
1980 DependInfo,
Base,
static_cast<unsigned int>(RTLDependInfoFields::Len));
1981 Builder.CreateStore(
1982 Builder.getInt64(M.getDataLayout().getTypeStoreSize(Dep.DepValueType)),
1985 Value *Flags = Builder.CreateStructGEP(
1987 static_cast<unsigned int>(RTLDependInfoFields::Flags));
1988 Builder.CreateStore(
1989 ConstantInt::get(Builder.getInt8Ty(),
1990 static_cast<unsigned int>(Dep.DepKind)),
1996OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTask(
1997 const LocationDescription &
Loc, InsertPointTy AllocaIP,
1998 BodyGenCallbackTy BodyGenCB,
bool Tied,
Value *Final,
Value *IfCondition,
2002 if (!updateToLocation(
Loc))
2003 return InsertPointTy();
2006 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
2007 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
2024 BasicBlock *TaskExitBB = splitBB(Builder,
true,
"task.exit");
2025 BasicBlock *TaskBodyBB = splitBB(Builder,
true,
"task.body");
2027 splitBB(Builder,
true,
"task.alloca");
2029 InsertPointTy TaskAllocaIP =
2030 InsertPointTy(TaskAllocaBB, TaskAllocaBB->
begin());
2031 InsertPointTy TaskBodyIP = InsertPointTy(TaskBodyBB, TaskBodyBB->
begin());
2032 if (
Error Err = BodyGenCB(TaskAllocaIP, TaskBodyIP))
2036 OI.EntryBB = TaskAllocaBB;
2037 OI.OuterAllocaBB = AllocaIP.getBlock();
2038 OI.ExitBB = TaskExitBB;
2043 Builder, AllocaIP, ToBeDeleted, TaskAllocaIP,
"global.tid",
false));
2045 OI.PostOutlineCB = [
this, Ident, Tied, Final, IfCondition, Dependencies,
2046 Mergeable, Priority, EventHandle, TaskAllocaBB,
2047 ToBeDeleted](
Function &OutlinedFn)
mutable {
2050 "there must be a single user for the outlined function");
2055 bool HasShareds = StaleCI->
arg_size() > 1;
2056 Builder.SetInsertPoint(StaleCI);
2061 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc);
2065 Value *ThreadID = getOrCreateThreadID(Ident);
2077 Value *Flags = Builder.getInt32(Tied);
2080 Builder.CreateSelect(Final, Builder.getInt32(2), Builder.getInt32(0));
2081 Flags = Builder.CreateOr(FinalFlag, Flags);
2085 Flags = Builder.CreateOr(Builder.getInt32(4), Flags);
2087 Flags = Builder.CreateOr(Builder.getInt32(32), Flags);
2093 Value *TaskSize = Builder.getInt64(
2094 divideCeil(M.getDataLayout().getTypeSizeInBits(Task), 8));
2099 Value *SharedsSize = Builder.getInt64(0);
2103 assert(ArgStructAlloca &&
2104 "Unable to find the alloca instruction corresponding to arguments "
2105 "for extracted function");
2108 assert(ArgStructType &&
"Unable to find struct type corresponding to "
2109 "arguments for extracted function");
2111 Builder.getInt64(M.getDataLayout().getTypeStoreSize(ArgStructType));
2116 CallInst *TaskData = createRuntimeFunctionCall(
2117 TaskAllocFn, {Ident, ThreadID, Flags,
2118 TaskSize, SharedsSize,
2125 Function *TaskDetachFn = getOrCreateRuntimeFunctionPtr(
2126 OMPRTL___kmpc_task_allow_completion_event);
2128 createRuntimeFunctionCall(TaskDetachFn, {Ident, ThreadID, TaskData});
2130 Builder.CreatePointerBitCastOrAddrSpaceCast(EventHandle,
2131 Builder.getPtrTy(0));
2132 EventVal = Builder.CreatePtrToInt(EventVal, Builder.getInt64Ty());
2133 Builder.CreateStore(EventVal, EventHandleAddr);
2139 Value *TaskShareds = Builder.CreateLoad(VoidPtr, TaskData);
2140 Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment,
2158 Builder.CreateInBoundsGEP(TaskPtr, TaskData, {
Zero,
Zero});
2161 VoidPtr, VoidPtr, Builder.getInt32Ty(), VoidPtr, VoidPtr);
2162 Value *PriorityData = Builder.CreateInBoundsGEP(
2163 TaskStructType, TaskGEP, {
Zero, ConstantInt::get(
Int32Ty, 4)});
2166 Value *CmplrData = Builder.CreateInBoundsGEP(CmplrStructType,
2168 Builder.CreateStore(Priority, CmplrData);
2193 splitBB(Builder,
true,
"if.end");
2195 Builder.GetInsertPoint()->
getParent()->getTerminator();
2196 Instruction *ThenTI = IfTerminator, *ElseTI =
nullptr;
2197 Builder.SetInsertPoint(IfTerminator);
2200 Builder.SetInsertPoint(ElseTI);
2202 if (Dependencies.size()) {
2204 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_wait_deps);
2205 createRuntimeFunctionCall(
2207 {Ident, ThreadID, Builder.getInt32(Dependencies.size()), DepArray,
2208 ConstantInt::get(Builder.getInt32Ty(), 0),
2212 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_begin_if0);
2214 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_complete_if0);
2215 createRuntimeFunctionCall(TaskBeginFn, {Ident, ThreadID, TaskData});
2218 CI = createRuntimeFunctionCall(&OutlinedFn, {ThreadID, TaskData});
2220 CI = createRuntimeFunctionCall(&OutlinedFn, {ThreadID});
2222 createRuntimeFunctionCall(TaskCompleteFn, {Ident, ThreadID, TaskData});
2223 Builder.SetInsertPoint(ThenTI);
2226 if (Dependencies.size()) {
2228 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_with_deps);
2229 createRuntimeFunctionCall(
2231 {Ident, ThreadID, TaskData, Builder.getInt32(Dependencies.size()),
2232 DepArray, ConstantInt::get(Builder.getInt32Ty(), 0),
2237 Function *TaskFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task);
2238 createRuntimeFunctionCall(TaskFn, {Ident, ThreadID, TaskData});
2243 Builder.SetInsertPoint(TaskAllocaBB, TaskAllocaBB->begin());
2245 LoadInst *Shareds = Builder.CreateLoad(VoidPtr, OutlinedFn.
getArg(1));
2247 Shareds, [Shareds](
Use &U) {
return U.getUser() != Shareds; });
2251 I->eraseFromParent();
2254 addOutlineInfo(std::move(OI));
2255 Builder.SetInsertPoint(TaskExitBB, TaskExitBB->
begin());
2257 return Builder.saveIP();
2260OpenMPIRBuilder::InsertPointOrErrorTy
2261OpenMPIRBuilder::createTaskgroup(
const LocationDescription &
Loc,
2262 InsertPointTy AllocaIP,
2263 BodyGenCallbackTy BodyGenCB) {
2264 if (!updateToLocation(
Loc))
2265 return InsertPointTy();
2268 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
2269 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
2270 Value *ThreadID = getOrCreateThreadID(Ident);
2274 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_taskgroup);
2275 createRuntimeFunctionCall(TaskgroupFn, {Ident, ThreadID});
2277 BasicBlock *TaskgroupExitBB = splitBB(Builder,
true,
"taskgroup.exit");
2278 if (
Error Err = BodyGenCB(AllocaIP, Builder.saveIP()))
2281 Builder.SetInsertPoint(TaskgroupExitBB);
2284 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_taskgroup);
2285 createRuntimeFunctionCall(EndTaskgroupFn, {Ident, ThreadID});
2287 return Builder.saveIP();
2290OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createSections(
2291 const LocationDescription &
Loc, InsertPointTy AllocaIP,
2293 FinalizeCallbackTy FiniCB,
bool IsCancellable,
bool IsNowait) {
2296 if (!updateToLocation(
Loc))
2299 FinalizationStack.push_back({FiniCB, OMPD_sections, IsCancellable});
2317 auto LoopBodyGenCB = [&](InsertPointTy CodeGenIP,
Value *IndVar) ->
Error {
2318 Builder.restoreIP(CodeGenIP);
2320 splitBBWithSuffix(Builder,
false,
".sections.after");
2324 unsigned CaseNumber = 0;
2325 for (
auto SectionCB : SectionCBs) {
2327 M.getContext(),
"omp_section_loop.body.case", CurFn,
Continue);
2328 SwitchStmt->
addCase(Builder.getInt32(CaseNumber), CaseBB);
2329 Builder.SetInsertPoint(CaseBB);
2331 if (
Error Err = SectionCB(InsertPointTy(), {CaseEndBr->
getParent(),
2343 Value *LB = ConstantInt::get(I32Ty, 0);
2344 Value *UB = ConstantInt::get(I32Ty, SectionCBs.size());
2345 Value *
ST = ConstantInt::get(I32Ty, 1);
2347 Loc, LoopBodyGenCB, LB, UB, ST,
true,
false, AllocaIP,
"section_loop");
2351 InsertPointOrErrorTy WsloopIP =
2352 applyStaticWorkshareLoop(
Loc.DL, *
LoopInfo, AllocaIP,
2353 WorksharingLoopType::ForStaticLoop, !IsNowait);
2355 return WsloopIP.takeError();
2356 InsertPointTy AfterIP = *WsloopIP;
2359 assert(LoopFini &&
"Bad structure of static workshare loop finalization");
2362 auto FiniInfo = FinalizationStack.pop_back_val();
2363 assert(FiniInfo.DK == OMPD_sections &&
2364 "Unexpected finalization stack state!");
2365 if (
Error Err = FiniInfo.mergeFiniBB(Builder, LoopFini))
2371OpenMPIRBuilder::InsertPointOrErrorTy
2372OpenMPIRBuilder::createSection(
const LocationDescription &
Loc,
2373 BodyGenCallbackTy BodyGenCB,
2374 FinalizeCallbackTy FiniCB) {
2375 if (!updateToLocation(
Loc))
2378 auto FiniCBWrapper = [&](InsertPointTy IP) {
2389 Builder.restoreIP(IP);
2390 auto *CaseBB =
Loc.IP.getBlock();
2394 IP = InsertPointTy(
I->getParent(),
I->getIterator());
2398 Directive OMPD = Directive::OMPD_sections;
2401 return EmitOMPInlinedRegion(OMPD,
nullptr,
nullptr, BodyGenCB, FiniCBWrapper,
2409 return OpenMPIRBuilder::InsertPointTy(
I->getParent(),
IT);
2412Value *OpenMPIRBuilder::getGPUThreadID() {
2413 return createRuntimeFunctionCall(
2414 getOrCreateRuntimeFunction(M,
2415 OMPRTL___kmpc_get_hardware_thread_id_in_block),
2419Value *OpenMPIRBuilder::getGPUWarpSize() {
2420 return createRuntimeFunctionCall(
2421 getOrCreateRuntimeFunction(M, OMPRTL___kmpc_get_warp_size), {});
2424Value *OpenMPIRBuilder::getNVPTXWarpID() {
2425 unsigned LaneIDBits =
Log2_32(Config.getGridValue().GV_Warp_Size);
2426 return Builder.CreateAShr(getGPUThreadID(), LaneIDBits,
"nvptx_warp_id");
2429Value *OpenMPIRBuilder::getNVPTXLaneID() {
2430 unsigned LaneIDBits =
Log2_32(Config.getGridValue().GV_Warp_Size);
2431 assert(LaneIDBits < 32 &&
"Invalid LaneIDBits size in NVPTX device.");
2432 unsigned LaneIDMask = ~0
u >> (32u - LaneIDBits);
2433 return Builder.CreateAnd(getGPUThreadID(), Builder.getInt32(LaneIDMask),
2437Value *OpenMPIRBuilder::castValueToType(InsertPointTy AllocaIP,
Value *From,
2440 uint64_t FromSize = M.getDataLayout().getTypeStoreSize(FromType);
2441 uint64_t ToSize = M.getDataLayout().getTypeStoreSize(ToType);
2442 assert(FromSize > 0 &&
"From size must be greater than zero");
2443 assert(ToSize > 0 &&
"To size must be greater than zero");
2444 if (FromType == ToType)
2446 if (FromSize == ToSize)
2447 return Builder.CreateBitCast(From, ToType);
2449 return Builder.CreateIntCast(From, ToType,
true);
2450 InsertPointTy SaveIP = Builder.saveIP();
2451 Builder.restoreIP(AllocaIP);
2452 Value *CastItem = Builder.CreateAlloca(ToType);
2453 Builder.restoreIP(SaveIP);
2455 Value *ValCastItem = Builder.CreatePointerBitCastOrAddrSpaceCast(
2456 CastItem, Builder.getPtrTy(0));
2457 Builder.CreateStore(From, ValCastItem);
2458 return Builder.CreateLoad(ToType, CastItem);
2461Value *OpenMPIRBuilder::createRuntimeShuffleFunction(InsertPointTy AllocaIP,
2465 uint64_t Size = M.getDataLayout().getTypeStoreSize(ElementType);
2466 assert(
Size <= 8 &&
"Unsupported bitwidth in shuffle instruction");
2470 Value *ElemCast = castValueToType(AllocaIP, Element, CastTy);
2472 Builder.CreateIntCast(getGPUWarpSize(), Builder.getInt16Ty(),
true);
2473 Function *ShuffleFunc = getOrCreateRuntimeFunctionPtr(
2474 Size <= 4 ? RuntimeFunction::OMPRTL___kmpc_shuffle_int32
2475 : RuntimeFunction::OMPRTL___kmpc_shuffle_int64);
2476 Value *WarpSizeCast =
2477 Builder.CreateIntCast(WarpSize, Builder.getInt16Ty(),
true);
2478 Value *ShuffleCall =
2479 createRuntimeFunctionCall(ShuffleFunc, {ElemCast,
Offset, WarpSizeCast});
2480 return castValueToType(AllocaIP, ShuffleCall, CastTy);
2483void OpenMPIRBuilder::shuffleAndStore(InsertPointTy AllocaIP,
Value *SrcAddr,
2487 uint64_t Size = M.getDataLayout().getTypeStoreSize(ElemType);
2498 Type *IndexTy = Builder.getIndexTy(
2499 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
2500 Value *ElemPtr = DstAddr;
2501 Value *Ptr = SrcAddr;
2502 for (
unsigned IntSize = 8; IntSize >= 1; IntSize /= 2) {
2506 Ptr = Builder.CreatePointerBitCastOrAddrSpaceCast(
2507 Ptr, Builder.getPtrTy(0), Ptr->
getName() +
".ascast");
2509 Builder.CreateGEP(ElemType, SrcAddr, {ConstantInt::get(IndexTy, 1)});
2510 ElemPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
2511 ElemPtr, Builder.getPtrTy(0), ElemPtr->
getName() +
".ascast");
2514 if ((
Size / IntSize) > 1) {
2515 Value *PtrEnd = Builder.CreatePointerBitCastOrAddrSpaceCast(
2516 SrcAddrGEP, Builder.getPtrTy());
2521 BasicBlock *CurrentBB = Builder.GetInsertBlock();
2522 emitBlock(PreCondBB, CurFunc);
2524 Builder.CreatePHI(Ptr->
getType(), 2);
2527 Builder.CreatePHI(ElemPtr->
getType(), 2);
2531 Value *PtrDiff = Builder.CreatePtrDiff(
2532 Builder.getInt8Ty(), PtrEnd,
2533 Builder.CreatePointerBitCastOrAddrSpaceCast(Ptr, Builder.getPtrTy()));
2534 Builder.CreateCondBr(
2535 Builder.CreateICmpSGT(PtrDiff, Builder.getInt64(IntSize - 1)), ThenBB,
2537 emitBlock(ThenBB, CurFunc);
2538 Value *Res = createRuntimeShuffleFunction(
2540 Builder.CreateAlignedLoad(
2541 IntType, Ptr, M.getDataLayout().getPrefTypeAlign(ElemType)),
2543 Builder.CreateAlignedStore(Res, ElemPtr,
2544 M.getDataLayout().getPrefTypeAlign(ElemType));
2546 Builder.CreateGEP(IntType, Ptr, {ConstantInt::get(IndexTy, 1)});
2547 Value *LocalElemPtr =
2548 Builder.CreateGEP(IntType, ElemPtr, {ConstantInt::get(IndexTy, 1)});
2551 emitBranch(PreCondBB);
2552 emitBlock(ExitBB, CurFunc);
2554 Value *Res = createRuntimeShuffleFunction(
2555 AllocaIP, Builder.CreateLoad(IntType, Ptr), IntType,
Offset);
2558 Res = Builder.CreateTrunc(Res, ElemType);
2559 Builder.CreateStore(Res, ElemPtr);
2560 Ptr = Builder.CreateGEP(IntType, Ptr, {ConstantInt::get(IndexTy, 1)});
2562 Builder.CreateGEP(IntType, ElemPtr, {ConstantInt::get(IndexTy, 1)});
2568Error OpenMPIRBuilder::emitReductionListCopy(
2569 InsertPointTy AllocaIP, CopyAction Action,
Type *ReductionArrayTy,
2572 Type *IndexTy = Builder.getIndexTy(
2573 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
2574 Value *RemoteLaneOffset = CopyOptions.RemoteLaneOffset;
2578 for (
auto En :
enumerate(ReductionInfos)) {
2579 const ReductionInfo &RI = En.value();
2580 Value *SrcElementAddr =
nullptr;
2582 Value *DestElementAddr =
nullptr;
2583 Value *DestElementPtrAddr =
nullptr;
2585 bool ShuffleInElement =
false;
2588 bool UpdateDestListPtr =
false;
2591 Value *SrcElementPtrAddr = Builder.CreateInBoundsGEP(
2592 ReductionArrayTy, SrcBase,
2593 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
2594 SrcElementAddr = Builder.CreateLoad(Builder.getPtrTy(), SrcElementPtrAddr);
2598 DestElementPtrAddr = Builder.CreateInBoundsGEP(
2599 ReductionArrayTy, DestBase,
2600 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
2601 bool IsByRefElem = (!IsByRef.
empty() && IsByRef[En.index()]);
2603 case CopyAction::RemoteLaneToThread: {
2604 InsertPointTy CurIP = Builder.saveIP();
2605 Builder.restoreIP(AllocaIP);
2607 Type *DestAllocaType =
2608 IsByRefElem ? RI.ByRefAllocatedType : RI.ElementType;
2609 DestAlloca = Builder.CreateAlloca(DestAllocaType,
nullptr,
2610 ".omp.reduction.element");
2612 M.getDataLayout().getPrefTypeAlign(DestAllocaType));
2613 DestElementAddr = DestAlloca;
2615 Builder.CreateAddrSpaceCast(DestElementAddr, Builder.getPtrTy(),
2616 DestElementAddr->
getName() +
".ascast");
2617 Builder.restoreIP(CurIP);
2618 ShuffleInElement =
true;
2619 UpdateDestListPtr =
true;
2622 case CopyAction::ThreadCopy: {
2624 Builder.CreateLoad(Builder.getPtrTy(), DestElementPtrAddr);
2631 if (ShuffleInElement) {
2632 Type *ShuffleType = RI.ElementType;
2633 Value *ShuffleSrcAddr = SrcElementAddr;
2634 Value *ShuffleDestAddr = DestElementAddr;
2638 assert(RI.ByRefElementType &&
"Expected by-ref element type to be set");
2639 assert(RI.ByRefAllocatedType &&
2640 "Expected by-ref allocated type to be set");
2645 ShuffleType = RI.ByRefElementType;
2647 InsertPointOrErrorTy GenResult =
2648 RI.DataPtrPtrGen(Builder.saveIP(), ShuffleSrcAddr, ShuffleSrcAddr);
2651 return GenResult.takeError();
2653 ShuffleSrcAddr = Builder.CreateLoad(Builder.getPtrTy(), ShuffleSrcAddr);
2656 InsertPointTy OldIP = Builder.saveIP();
2657 Builder.restoreIP(AllocaIP);
2659 LocalStorage = Builder.CreateAlloca(ShuffleType);
2660 Builder.restoreIP(OldIP);
2661 ShuffleDestAddr = LocalStorage;
2665 shuffleAndStore(AllocaIP, ShuffleSrcAddr, ShuffleDestAddr, ShuffleType,
2666 RemoteLaneOffset, ReductionArrayTy, IsByRefElem);
2670 InsertPointOrErrorTy GenResult =
2671 RI.DataPtrPtrGen(Builder.saveIP(),
2672 Builder.CreatePointerBitCastOrAddrSpaceCast(
2673 DestAlloca, Builder.getPtrTy(),
".ascast"),
2677 return GenResult.takeError();
2679 Builder.CreateStore(Builder.CreatePointerBitCastOrAddrSpaceCast(
2680 LocalStorage, Builder.getPtrTy(),
".ascast"),
2684 switch (RI.EvaluationKind) {
2685 case EvalKind::Scalar: {
2686 Value *Elem = Builder.CreateLoad(RI.ElementType, SrcElementAddr);
2688 Builder.CreateStore(Elem, DestElementAddr);
2691 case EvalKind::Complex: {
2692 Value *SrcRealPtr = Builder.CreateConstInBoundsGEP2_32(
2693 RI.ElementType, SrcElementAddr, 0, 0,
".realp");
2694 Value *SrcReal = Builder.CreateLoad(
2695 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
2696 Value *SrcImgPtr = Builder.CreateConstInBoundsGEP2_32(
2697 RI.ElementType, SrcElementAddr, 0, 1,
".imagp");
2698 Value *SrcImg = Builder.CreateLoad(
2699 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
2701 Value *DestRealPtr = Builder.CreateConstInBoundsGEP2_32(
2702 RI.ElementType, DestElementAddr, 0, 0,
".realp");
2703 Value *DestImgPtr = Builder.CreateConstInBoundsGEP2_32(
2704 RI.ElementType, DestElementAddr, 0, 1,
".imagp");
2705 Builder.CreateStore(SrcReal, DestRealPtr);
2706 Builder.CreateStore(SrcImg, DestImgPtr);
2709 case EvalKind::Aggregate: {
2710 Value *SizeVal = Builder.getInt64(
2711 M.getDataLayout().getTypeStoreSize(RI.ElementType));
2712 Builder.CreateMemCpy(
2713 DestElementAddr, M.getDataLayout().getPrefTypeAlign(RI.ElementType),
2714 SrcElementAddr, M.getDataLayout().getPrefTypeAlign(RI.ElementType),
2726 if (UpdateDestListPtr) {
2727 Value *CastDestAddr = Builder.CreatePointerBitCastOrAddrSpaceCast(
2728 DestElementAddr, Builder.getPtrTy(),
2729 DestElementAddr->
getName() +
".ascast");
2730 Builder.CreateStore(CastDestAddr, DestElementPtrAddr);
2740 InsertPointTy SavedIP = Builder.saveIP();
2743 Builder.getVoidTy(), {Builder.getPtrTy(), Builder.getInt32Ty()},
2747 "_omp_reduction_inter_warp_copy_func", &M);
2752 Builder.SetInsertPoint(EntryBB);
2770 "__openmp_nvptx_data_transfer_temporary_storage";
2771 GlobalVariable *TransferMedium = M.getGlobalVariable(TransferMediumName);
2772 unsigned WarpSize = Config.getGridValue().GV_Warp_Size;
2774 if (!TransferMedium) {
2783 Value *GPUThreadID = getGPUThreadID();
2785 Value *LaneID = getNVPTXLaneID();
2787 Value *WarpID = getNVPTXWarpID();
2789 InsertPointTy AllocaIP =
2790 InsertPointTy(Builder.GetInsertBlock(),
2791 Builder.GetInsertBlock()->getFirstInsertionPt());
2794 Builder.restoreIP(AllocaIP);
2795 AllocaInst *ReduceListAlloca = Builder.CreateAlloca(
2796 Arg0Type,
nullptr, ReduceListArg->
getName() +
".addr");
2798 Builder.CreateAlloca(Arg1Type,
nullptr, NumWarpsArg->
getName() +
".addr");
2799 Value *ReduceListAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
2800 ReduceListAlloca, Arg0Type, ReduceListAlloca->
getName() +
".ascast");
2801 Value *NumWarpsAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
2802 NumWarpsAlloca, Builder.getPtrTy(0),
2803 NumWarpsAlloca->
getName() +
".ascast");
2804 Builder.CreateStore(ReduceListArg, ReduceListAddrCast);
2805 Builder.CreateStore(NumWarpsArg, NumWarpsAddrCast);
2807 InsertPointTy CodeGenIP =
2809 Builder.restoreIP(CodeGenIP);
2812 Builder.CreateLoad(Builder.getPtrTy(), ReduceListAddrCast);
2814 for (
auto En :
enumerate(ReductionInfos)) {
2819 const ReductionInfo &RI = En.value();
2820 bool IsByRefElem = !IsByRef.
empty() && IsByRef[En.index()];
2821 unsigned RealTySize = M.getDataLayout().getTypeAllocSize(
2822 IsByRefElem ? RI.ByRefElementType : RI.ElementType);
2823 for (
unsigned TySize = 4; TySize > 0 && RealTySize > 0; TySize /= 2) {
2826 unsigned NumIters = RealTySize / TySize;
2829 Value *Cnt =
nullptr;
2830 Value *CntAddr =
nullptr;
2834 CodeGenIP = Builder.saveIP();
2835 Builder.restoreIP(AllocaIP);
2837 Builder.CreateAlloca(Builder.getInt32Ty(),
nullptr,
".cnt.addr");
2839 CntAddr = Builder.CreateAddrSpaceCast(CntAddr, Builder.getPtrTy(),
2840 CntAddr->
getName() +
".ascast");
2841 Builder.restoreIP(CodeGenIP);
2848 emitBlock(PrecondBB, Builder.GetInsertBlock()->getParent());
2849 Cnt = Builder.CreateLoad(Builder.getInt32Ty(), CntAddr,
2851 Value *
Cmp = Builder.CreateICmpULT(
2852 Cnt, ConstantInt::get(Builder.getInt32Ty(), NumIters));
2853 Builder.CreateCondBr(Cmp, BodyBB, ExitBB);
2854 emitBlock(BodyBB, Builder.GetInsertBlock()->getParent());
2858 InsertPointOrErrorTy BarrierIP1 =
2859 createBarrier(LocationDescription(Builder.saveIP(),
Loc.DL),
2860 omp::Directive::OMPD_unknown,
2864 return BarrierIP1.takeError();
2870 Value *IsWarpMaster = Builder.CreateIsNull(LaneID,
"warp_master");
2871 Builder.CreateCondBr(IsWarpMaster, ThenBB, ElseBB);
2872 emitBlock(ThenBB, Builder.GetInsertBlock()->getParent());
2875 auto *RedListArrayTy =
2877 Type *IndexTy = Builder.getIndexTy(
2878 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
2880 Builder.CreateInBoundsGEP(RedListArrayTy, ReduceList,
2881 {ConstantInt::get(IndexTy, 0),
2882 ConstantInt::get(IndexTy, En.index())});
2884 Value *ElemPtr = Builder.CreateLoad(Builder.getPtrTy(), ElemPtrPtr);
2887 InsertPointOrErrorTy GenRes =
2888 RI.DataPtrPtrGen(Builder.saveIP(), ElemPtr, ElemPtr);
2891 return GenRes.takeError();
2893 ElemPtr = Builder.CreateLoad(Builder.getPtrTy(), ElemPtr);
2897 ElemPtr = Builder.CreateGEP(Builder.getInt32Ty(), ElemPtr, Cnt);
2901 Value *MediumPtr = Builder.CreateInBoundsGEP(
2902 ArrayTy, TransferMedium, {Builder.getInt64(0), WarpID});
2905 Value *Elem = Builder.CreateLoad(CType, ElemPtr);
2907 Builder.CreateStore(Elem, MediumPtr,
2909 Builder.CreateBr(MergeBB);
2912 emitBlock(ElseBB, Builder.GetInsertBlock()->getParent());
2913 Builder.CreateBr(MergeBB);
2916 emitBlock(MergeBB, Builder.GetInsertBlock()->getParent());
2917 InsertPointOrErrorTy BarrierIP2 =
2918 createBarrier(LocationDescription(Builder.saveIP(),
Loc.DL),
2919 omp::Directive::OMPD_unknown,
2923 return BarrierIP2.takeError();
2930 Value *NumWarpsVal =
2931 Builder.CreateLoad(Builder.getInt32Ty(), NumWarpsAddrCast);
2933 Value *IsActiveThread =
2934 Builder.CreateICmpULT(GPUThreadID, NumWarpsVal,
"is_active_thread");
2935 Builder.CreateCondBr(IsActiveThread, W0ThenBB, W0ElseBB);
2937 emitBlock(W0ThenBB, Builder.GetInsertBlock()->getParent());
2941 Value *SrcMediumPtrVal = Builder.CreateInBoundsGEP(
2942 ArrayTy, TransferMedium, {Builder.getInt64(0), GPUThreadID});
2944 Value *TargetElemPtrPtr =
2945 Builder.CreateInBoundsGEP(RedListArrayTy, ReduceList,
2946 {ConstantInt::get(IndexTy, 0),
2947 ConstantInt::get(IndexTy, En.index())});
2948 Value *TargetElemPtrVal =
2949 Builder.CreateLoad(Builder.getPtrTy(), TargetElemPtrPtr);
2950 Value *TargetElemPtr = TargetElemPtrVal;
2953 InsertPointOrErrorTy GenRes =
2954 RI.DataPtrPtrGen(Builder.saveIP(), TargetElemPtr, TargetElemPtr);
2957 return GenRes.takeError();
2959 TargetElemPtr = Builder.CreateLoad(Builder.getPtrTy(), TargetElemPtr);
2964 Builder.CreateGEP(Builder.getInt32Ty(), TargetElemPtr, Cnt);
2967 Value *SrcMediumValue =
2968 Builder.CreateLoad(CType, SrcMediumPtrVal,
true);
2969 Builder.CreateStore(SrcMediumValue, TargetElemPtr);
2970 Builder.CreateBr(W0MergeBB);
2972 emitBlock(W0ElseBB, Builder.GetInsertBlock()->getParent());
2973 Builder.CreateBr(W0MergeBB);
2975 emitBlock(W0MergeBB, Builder.GetInsertBlock()->getParent());
2978 Cnt = Builder.CreateNSWAdd(
2979 Cnt, ConstantInt::get(Builder.getInt32Ty(), 1));
2980 Builder.CreateStore(Cnt, CntAddr,
false);
2982 auto *CurFn = Builder.GetInsertBlock()->
getParent();
2983 emitBranch(PrecondBB);
2984 emitBlock(ExitBB, CurFn);
2986 RealTySize %= TySize;
2990 Builder.CreateRetVoid();
2991 Builder.restoreIP(SavedIP);
3002 {Builder.getPtrTy(), Builder.getInt16Ty(),
3003 Builder.getInt16Ty(), Builder.getInt16Ty()},
3007 "_omp_reduction_shuffle_and_reduce_func", &M);
3017 Builder.SetInsertPoint(EntryBB);
3028 Type *ReduceListArgType = ReduceListArg->
getType();
3030 Type *LaneIDArgPtrType = Builder.getPtrTy(0);
3031 Value *ReduceListAlloca = Builder.CreateAlloca(
3032 ReduceListArgType,
nullptr, ReduceListArg->
getName() +
".addr");
3033 Value *LaneIdAlloca = Builder.CreateAlloca(LaneIDArgType,
nullptr,
3034 LaneIDArg->
getName() +
".addr");
3035 Value *RemoteLaneOffsetAlloca = Builder.CreateAlloca(
3036 LaneIDArgType,
nullptr, RemoteLaneOffsetArg->
getName() +
".addr");
3037 Value *AlgoVerAlloca = Builder.CreateAlloca(LaneIDArgType,
nullptr,
3038 AlgoVerArg->
getName() +
".addr");
3044 Instruction *RemoteReductionListAlloca = Builder.CreateAlloca(
3045 RedListArrayTy,
nullptr,
".omp.reduction.remote_reduce_list");
3047 Value *ReduceListAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3048 ReduceListAlloca, ReduceListArgType,
3049 ReduceListAlloca->
getName() +
".ascast");
3050 Value *LaneIdAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3051 LaneIdAlloca, LaneIDArgPtrType, LaneIdAlloca->
getName() +
".ascast");
3052 Value *RemoteLaneOffsetAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3053 RemoteLaneOffsetAlloca, LaneIDArgPtrType,
3054 RemoteLaneOffsetAlloca->
getName() +
".ascast");
3055 Value *AlgoVerAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3056 AlgoVerAlloca, LaneIDArgPtrType, AlgoVerAlloca->
getName() +
".ascast");
3057 Value *RemoteListAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3058 RemoteReductionListAlloca, Builder.getPtrTy(),
3059 RemoteReductionListAlloca->
getName() +
".ascast");
3061 Builder.CreateStore(ReduceListArg, ReduceListAddrCast);
3062 Builder.CreateStore(LaneIDArg, LaneIdAddrCast);
3063 Builder.CreateStore(RemoteLaneOffsetArg, RemoteLaneOffsetAddrCast);
3064 Builder.CreateStore(AlgoVerArg, AlgoVerAddrCast);
3066 Value *ReduceList = Builder.CreateLoad(ReduceListArgType, ReduceListAddrCast);
3067 Value *LaneId = Builder.CreateLoad(LaneIDArgType, LaneIdAddrCast);
3068 Value *RemoteLaneOffset =
3069 Builder.CreateLoad(LaneIDArgType, RemoteLaneOffsetAddrCast);
3070 Value *AlgoVer = Builder.CreateLoad(LaneIDArgType, AlgoVerAddrCast);
3077 Error EmitRedLsCpRes = emitReductionListCopy(
3078 AllocaIP, CopyAction::RemoteLaneToThread, RedListArrayTy, ReductionInfos,
3079 ReduceList, RemoteListAddrCast, IsByRef,
3080 {RemoteLaneOffset,
nullptr,
nullptr});
3083 return EmitRedLsCpRes;
3106 Value *CondAlgo0 = Builder.CreateIsNull(AlgoVer);
3107 Value *Algo1 = Builder.CreateICmpEQ(AlgoVer, Builder.getInt16(1));
3108 Value *LaneComp = Builder.CreateICmpULT(LaneId, RemoteLaneOffset);
3109 Value *CondAlgo1 = Builder.CreateAnd(Algo1, LaneComp);
3110 Value *Algo2 = Builder.CreateICmpEQ(AlgoVer, Builder.getInt16(2));
3111 Value *LaneIdAnd1 = Builder.CreateAnd(LaneId, Builder.getInt16(1));
3112 Value *LaneIdComp = Builder.CreateIsNull(LaneIdAnd1);
3113 Value *Algo2AndLaneIdComp = Builder.CreateAnd(Algo2, LaneIdComp);
3114 Value *RemoteOffsetComp =
3115 Builder.CreateICmpSGT(RemoteLaneOffset, Builder.getInt16(0));
3116 Value *CondAlgo2 = Builder.CreateAnd(Algo2AndLaneIdComp, RemoteOffsetComp);
3117 Value *CA0OrCA1 = Builder.CreateOr(CondAlgo0, CondAlgo1);
3118 Value *CondReduce = Builder.CreateOr(CA0OrCA1, CondAlgo2);
3124 Builder.CreateCondBr(CondReduce, ThenBB, ElseBB);
3125 emitBlock(ThenBB, Builder.GetInsertBlock()->getParent());
3126 Value *LocalReduceListPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
3127 ReduceList, Builder.getPtrTy());
3128 Value *RemoteReduceListPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
3129 RemoteListAddrCast, Builder.getPtrTy());
3130 createRuntimeFunctionCall(ReduceFn, {LocalReduceListPtr, RemoteReduceListPtr})
3131 ->addFnAttr(Attribute::NoUnwind);
3132 Builder.CreateBr(MergeBB);
3134 emitBlock(ElseBB, Builder.GetInsertBlock()->getParent());
3135 Builder.CreateBr(MergeBB);
3137 emitBlock(MergeBB, Builder.GetInsertBlock()->getParent());
3141 Algo1 = Builder.CreateICmpEQ(AlgoVer, Builder.getInt16(1));
3142 Value *LaneIdGtOffset = Builder.CreateICmpUGE(LaneId, RemoteLaneOffset);
3143 Value *CondCopy = Builder.CreateAnd(Algo1, LaneIdGtOffset);
3148 Builder.CreateCondBr(CondCopy, CpyThenBB, CpyElseBB);
3150 emitBlock(CpyThenBB, Builder.GetInsertBlock()->getParent());
3152 EmitRedLsCpRes = emitReductionListCopy(
3153 AllocaIP, CopyAction::ThreadCopy, RedListArrayTy, ReductionInfos,
3154 RemoteListAddrCast, ReduceList, IsByRef);
3157 return EmitRedLsCpRes;
3159 Builder.CreateBr(CpyMergeBB);
3161 emitBlock(CpyElseBB, Builder.GetInsertBlock()->getParent());
3162 Builder.CreateBr(CpyMergeBB);
3164 emitBlock(CpyMergeBB, Builder.GetInsertBlock()->getParent());
3166 Builder.CreateRetVoid();
3174 OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
3177 Builder.getVoidTy(),
3178 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3182 "_omp_reduction_list_to_global_copy_func", &M);
3189 Builder.SetInsertPoint(EntryBlock);
3198 Value *BufferArgAlloca = Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
3199 BufferArg->
getName() +
".addr");
3200 Value *IdxArgAlloca = Builder.CreateAlloca(Builder.getInt32Ty(),
nullptr,
3202 Value *ReduceListArgAlloca = Builder.CreateAlloca(
3203 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3204 Value *BufferArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3205 BufferArgAlloca, Builder.getPtrTy(),
3206 BufferArgAlloca->
getName() +
".ascast");
3207 Value *IdxArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3208 IdxArgAlloca, Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3209 Value *ReduceListArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3210 ReduceListArgAlloca, Builder.getPtrTy(),
3211 ReduceListArgAlloca->
getName() +
".ascast");
3213 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3214 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3215 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3217 Value *LocalReduceList =
3218 Builder.CreateLoad(Builder.getPtrTy(), ReduceListArgAddrCast);
3219 Value *BufferArgVal =
3220 Builder.CreateLoad(Builder.getPtrTy(), BufferArgAddrCast);
3221 Value *Idxs[] = {Builder.CreateLoad(Builder.getInt32Ty(), IdxArgAddrCast)};
3222 Type *IndexTy = Builder.getIndexTy(
3223 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3224 for (
auto En :
enumerate(ReductionInfos)) {
3225 const ReductionInfo &RI = En.value();
3226 auto *RedListArrayTy =
3229 Value *ElemPtrPtr = Builder.CreateInBoundsGEP(
3230 RedListArrayTy, LocalReduceList,
3231 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3233 Value *ElemPtr = Builder.CreateLoad(Builder.getPtrTy(), ElemPtrPtr);
3237 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferArgVal, Idxs);
3238 Value *GlobVal = Builder.CreateConstInBoundsGEP2_32(
3239 ReductionsBufferTy, BufferVD, 0, En.index());
3241 switch (RI.EvaluationKind) {
3242 case EvalKind::Scalar: {
3243 Value *TargetElement;
3245 if (IsByRef.
empty() || !IsByRef[En.index()]) {
3246 TargetElement = Builder.CreateLoad(RI.ElementType, ElemPtr);
3248 InsertPointOrErrorTy GenResult =
3249 RI.DataPtrPtrGen(Builder.saveIP(), ElemPtr, ElemPtr);
3252 return GenResult.takeError();
3254 ElemPtr = Builder.CreateLoad(Builder.getPtrTy(), ElemPtr);
3255 TargetElement = Builder.CreateLoad(RI.ByRefElementType, ElemPtr);
3258 Builder.CreateStore(TargetElement, GlobVal);
3261 case EvalKind::Complex: {
3262 Value *SrcRealPtr = Builder.CreateConstInBoundsGEP2_32(
3263 RI.ElementType, ElemPtr, 0, 0,
".realp");
3264 Value *SrcReal = Builder.CreateLoad(
3265 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
3266 Value *SrcImgPtr = Builder.CreateConstInBoundsGEP2_32(
3267 RI.ElementType, ElemPtr, 0, 1,
".imagp");
3268 Value *SrcImg = Builder.CreateLoad(
3269 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
3271 Value *DestRealPtr = Builder.CreateConstInBoundsGEP2_32(
3272 RI.ElementType, GlobVal, 0, 0,
".realp");
3273 Value *DestImgPtr = Builder.CreateConstInBoundsGEP2_32(
3274 RI.ElementType, GlobVal, 0, 1,
".imagp");
3275 Builder.CreateStore(SrcReal, DestRealPtr);
3276 Builder.CreateStore(SrcImg, DestImgPtr);
3279 case EvalKind::Aggregate: {
3281 Builder.getInt64(M.getDataLayout().getTypeStoreSize(RI.ElementType));
3282 Builder.CreateMemCpy(
3283 GlobVal, M.getDataLayout().getPrefTypeAlign(RI.ElementType), ElemPtr,
3284 M.getDataLayout().getPrefTypeAlign(RI.ElementType), SizeVal,
false);
3290 Builder.CreateRetVoid();
3291 Builder.restoreIP(OldIP);
3298 OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
3301 Builder.getVoidTy(),
3302 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3306 "_omp_reduction_list_to_global_reduce_func", &M);
3313 Builder.SetInsertPoint(EntryBlock);
3322 Value *BufferArgAlloca = Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
3323 BufferArg->
getName() +
".addr");
3324 Value *IdxArgAlloca = Builder.CreateAlloca(Builder.getInt32Ty(),
nullptr,
3326 Value *ReduceListArgAlloca = Builder.CreateAlloca(
3327 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3328 auto *RedListArrayTy =
3333 Value *LocalReduceList =
3334 Builder.CreateAlloca(RedListArrayTy,
nullptr,
".omp.reduction.red_list");
3336 InsertPointTy AllocaIP{EntryBlock, EntryBlock->
begin()};
3338 Value *BufferArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3339 BufferArgAlloca, Builder.getPtrTy(),
3340 BufferArgAlloca->
getName() +
".ascast");
3341 Value *IdxArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3342 IdxArgAlloca, Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3343 Value *ReduceListArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3344 ReduceListArgAlloca, Builder.getPtrTy(),
3345 ReduceListArgAlloca->
getName() +
".ascast");
3346 Value *LocalReduceListAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3347 LocalReduceList, Builder.getPtrTy(),
3348 LocalReduceList->
getName() +
".ascast");
3350 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3351 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3352 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3354 Value *BufferVal = Builder.CreateLoad(Builder.getPtrTy(), BufferArgAddrCast);
3355 Value *Idxs[] = {Builder.CreateLoad(Builder.getInt32Ty(), IdxArgAddrCast)};
3356 Type *IndexTy = Builder.getIndexTy(
3357 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3358 for (
auto En :
enumerate(ReductionInfos)) {
3359 const ReductionInfo &RI = En.value();
3362 if (!IsByRef.
empty() && IsByRef[En.index()]) {
3363 InsertPointTy OldIP = Builder.saveIP();
3364 Builder.restoreIP(AllocaIP);
3366 ByRefAlloc = Builder.CreateAlloca(RI.ByRefAllocatedType);
3367 ByRefAlloc = Builder.CreatePointerBitCastOrAddrSpaceCast(
3368 ByRefAlloc, Builder.getPtrTy(), ByRefAlloc->
getName() +
".ascast");
3370 Builder.restoreIP(OldIP);
3373 Value *TargetElementPtrPtr = Builder.CreateInBoundsGEP(
3374 RedListArrayTy, LocalReduceListAddrCast,
3375 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3377 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
3379 Value *GlobValPtr = Builder.CreateConstInBoundsGEP2_32(
3380 ReductionsBufferTy, BufferVD, 0, En.index());
3382 if (!IsByRef.
empty() && IsByRef[En.index()]) {
3383 Value *ByRefDataPtr;
3385 InsertPointOrErrorTy GenResult =
3386 RI.DataPtrPtrGen(Builder.saveIP(), ByRefAlloc, ByRefDataPtr);
3389 return GenResult.takeError();
3391 Builder.CreateStore(GlobValPtr, ByRefDataPtr);
3392 Builder.CreateStore(ByRefAlloc, TargetElementPtrPtr);
3394 Builder.CreateStore(GlobValPtr, TargetElementPtrPtr);
3400 Builder.CreateLoad(Builder.getPtrTy(), ReduceListArgAddrCast);
3401 createRuntimeFunctionCall(ReduceFn, {LocalReduceListAddrCast, ReduceList})
3402 ->addFnAttr(Attribute::NoUnwind);
3403 Builder.CreateRetVoid();
3404 Builder.restoreIP(OldIP);
3411 OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
3414 Builder.getVoidTy(),
3415 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3419 "_omp_reduction_global_to_list_copy_func", &M);
3426 Builder.SetInsertPoint(EntryBlock);
3435 Value *BufferArgAlloca = Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
3436 BufferArg->
getName() +
".addr");
3437 Value *IdxArgAlloca = Builder.CreateAlloca(Builder.getInt32Ty(),
nullptr,
3439 Value *ReduceListArgAlloca = Builder.CreateAlloca(
3440 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3441 Value *BufferArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3442 BufferArgAlloca, Builder.getPtrTy(),
3443 BufferArgAlloca->
getName() +
".ascast");
3444 Value *IdxArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3445 IdxArgAlloca, Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3446 Value *ReduceListArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3447 ReduceListArgAlloca, Builder.getPtrTy(),
3448 ReduceListArgAlloca->
getName() +
".ascast");
3449 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3450 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3451 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3453 Value *LocalReduceList =
3454 Builder.CreateLoad(Builder.getPtrTy(), ReduceListArgAddrCast);
3455 Value *BufferVal = Builder.CreateLoad(Builder.getPtrTy(), BufferArgAddrCast);
3456 Value *Idxs[] = {Builder.CreateLoad(Builder.getInt32Ty(), IdxArgAddrCast)};
3457 Type *IndexTy = Builder.getIndexTy(
3458 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3459 for (
auto En :
enumerate(ReductionInfos)) {
3460 const OpenMPIRBuilder::ReductionInfo &RI = En.value();
3461 auto *RedListArrayTy =
3464 Value *ElemPtrPtr = Builder.CreateInBoundsGEP(
3465 RedListArrayTy, LocalReduceList,
3466 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3468 Value *ElemPtr = Builder.CreateLoad(Builder.getPtrTy(), ElemPtrPtr);
3471 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
3472 Value *GlobValPtr = Builder.CreateConstInBoundsGEP2_32(
3473 ReductionsBufferTy, BufferVD, 0, En.index());
3475 switch (RI.EvaluationKind) {
3476 case EvalKind::Scalar: {
3477 Type *ElemType = RI.ElementType;
3479 if (!IsByRef.
empty() && IsByRef[En.index()]) {
3480 ElemType = RI.ByRefElementType;
3481 InsertPointOrErrorTy GenResult =
3482 RI.DataPtrPtrGen(Builder.saveIP(), ElemPtr, ElemPtr);
3485 return GenResult.takeError();
3487 ElemPtr = Builder.CreateLoad(Builder.getPtrTy(), ElemPtr);
3490 Value *TargetElement = Builder.CreateLoad(ElemType, GlobValPtr);
3491 Builder.CreateStore(TargetElement, ElemPtr);
3494 case EvalKind::Complex: {
3495 Value *SrcRealPtr = Builder.CreateConstInBoundsGEP2_32(
3496 RI.ElementType, GlobValPtr, 0, 0,
".realp");
3497 Value *SrcReal = Builder.CreateLoad(
3498 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
3499 Value *SrcImgPtr = Builder.CreateConstInBoundsGEP2_32(
3500 RI.ElementType, GlobValPtr, 0, 1,
".imagp");
3501 Value *SrcImg = Builder.CreateLoad(
3502 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
3504 Value *DestRealPtr = Builder.CreateConstInBoundsGEP2_32(
3505 RI.ElementType, ElemPtr, 0, 0,
".realp");
3506 Value *DestImgPtr = Builder.CreateConstInBoundsGEP2_32(
3507 RI.ElementType, ElemPtr, 0, 1,
".imagp");
3508 Builder.CreateStore(SrcReal, DestRealPtr);
3509 Builder.CreateStore(SrcImg, DestImgPtr);
3512 case EvalKind::Aggregate: {
3514 Builder.getInt64(M.getDataLayout().getTypeStoreSize(RI.ElementType));
3515 Builder.CreateMemCpy(
3516 ElemPtr, M.getDataLayout().getPrefTypeAlign(RI.ElementType),
3517 GlobValPtr, M.getDataLayout().getPrefTypeAlign(RI.ElementType),
3524 Builder.CreateRetVoid();
3525 Builder.restoreIP(OldIP);
3532 OpenMPIRBuilder::InsertPointTy OldIP = Builder.saveIP();
3535 Builder.getVoidTy(),
3536 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3540 "_omp_reduction_global_to_list_reduce_func", &M);
3547 Builder.SetInsertPoint(EntryBlock);
3556 Value *BufferArgAlloca = Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
3557 BufferArg->
getName() +
".addr");
3558 Value *IdxArgAlloca = Builder.CreateAlloca(Builder.getInt32Ty(),
nullptr,
3560 Value *ReduceListArgAlloca = Builder.CreateAlloca(
3561 Builder.getPtrTy(),
nullptr, ReduceListArg->
getName() +
".addr");
3567 Value *LocalReduceList =
3568 Builder.CreateAlloca(RedListArrayTy,
nullptr,
".omp.reduction.red_list");
3570 InsertPointTy AllocaIP{EntryBlock, EntryBlock->
begin()};
3572 Value *BufferArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3573 BufferArgAlloca, Builder.getPtrTy(),
3574 BufferArgAlloca->
getName() +
".ascast");
3575 Value *IdxArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3576 IdxArgAlloca, Builder.getPtrTy(), IdxArgAlloca->
getName() +
".ascast");
3577 Value *ReduceListArgAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3578 ReduceListArgAlloca, Builder.getPtrTy(),
3579 ReduceListArgAlloca->
getName() +
".ascast");
3580 Value *ReductionList = Builder.CreatePointerBitCastOrAddrSpaceCast(
3581 LocalReduceList, Builder.getPtrTy(),
3582 LocalReduceList->
getName() +
".ascast");
3584 Builder.CreateStore(BufferArg, BufferArgAddrCast);
3585 Builder.CreateStore(IdxArg, IdxArgAddrCast);
3586 Builder.CreateStore(ReduceListArg, ReduceListArgAddrCast);
3588 Value *BufferVal = Builder.CreateLoad(Builder.getPtrTy(), BufferArgAddrCast);
3589 Value *Idxs[] = {Builder.CreateLoad(Builder.getInt32Ty(), IdxArgAddrCast)};
3590 Type *IndexTy = Builder.getIndexTy(
3591 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3592 for (
auto En :
enumerate(ReductionInfos)) {
3593 const ReductionInfo &RI = En.value();
3596 if (!IsByRef.
empty() && IsByRef[En.index()]) {
3597 InsertPointTy OldIP = Builder.saveIP();
3598 Builder.restoreIP(AllocaIP);
3600 ByRefAlloc = Builder.CreateAlloca(RI.ByRefAllocatedType);
3601 ByRefAlloc = Builder.CreatePointerBitCastOrAddrSpaceCast(
3602 ByRefAlloc, Builder.getPtrTy(), ByRefAlloc->
getName() +
".ascast");
3604 Builder.restoreIP(OldIP);
3607 Value *TargetElementPtrPtr = Builder.CreateInBoundsGEP(
3608 RedListArrayTy, ReductionList,
3609 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3612 Builder.CreateInBoundsGEP(ReductionsBufferTy, BufferVal, Idxs);
3613 Value *GlobValPtr = Builder.CreateConstInBoundsGEP2_32(
3614 ReductionsBufferTy, BufferVD, 0, En.index());
3616 if (!IsByRef.
empty() && IsByRef[En.index()]) {
3617 Value *ByRefDataPtr;
3618 InsertPointOrErrorTy GenResult =
3619 RI.DataPtrPtrGen(Builder.saveIP(), ByRefAlloc, ByRefDataPtr);
3621 return GenResult.takeError();
3623 Builder.CreateStore(GlobValPtr, ByRefDataPtr);
3624 Builder.CreateStore(ByRefAlloc, TargetElementPtrPtr);
3626 Builder.CreateStore(GlobValPtr, TargetElementPtrPtr);
3632 Builder.CreateLoad(Builder.getPtrTy(), ReduceListArgAddrCast);
3633 createRuntimeFunctionCall(ReduceFn, {ReduceList, ReductionList})
3634 ->addFnAttr(Attribute::NoUnwind);
3635 Builder.CreateRetVoid();
3636 Builder.restoreIP(OldIP);
3640std::string OpenMPIRBuilder::getReductionFuncName(
StringRef Name)
const {
3641 std::string Suffix =
3642 createPlatformSpecificName({
"omp",
"reduction",
"reduction_func"});
3643 return (Name + Suffix).
str();
3649 AttributeList FuncAttrs) {
3651 {Builder.getPtrTy(), Builder.getPtrTy()},
3653 std::string
Name = getReductionFuncName(ReducerName);
3661 Builder.SetInsertPoint(EntryBB);
3665 Value *LHSArrayPtr =
nullptr;
3666 Value *RHSArrayPtr =
nullptr;
3673 Builder.CreateAlloca(Arg0Type,
nullptr, Arg0->
getName() +
".addr");
3675 Builder.CreateAlloca(Arg1Type,
nullptr, Arg1->
getName() +
".addr");
3676 Value *LHSAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3677 LHSAlloca, Arg0Type, LHSAlloca->
getName() +
".ascast");
3678 Value *RHSAddrCast = Builder.CreatePointerBitCastOrAddrSpaceCast(
3679 RHSAlloca, Arg1Type, RHSAlloca->
getName() +
".ascast");
3680 Builder.CreateStore(Arg0, LHSAddrCast);
3681 Builder.CreateStore(Arg1, RHSAddrCast);
3682 LHSArrayPtr = Builder.CreateLoad(Arg0Type, LHSAddrCast);
3683 RHSArrayPtr = Builder.CreateLoad(Arg1Type, RHSAddrCast);
3686 Type *IndexTy = Builder.getIndexTy(
3687 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3689 for (
auto En :
enumerate(ReductionInfos)) {
3690 const ReductionInfo &RI = En.value();
3691 Value *RHSI8PtrPtr = Builder.CreateInBoundsGEP(
3692 RedArrayTy, RHSArrayPtr,
3693 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3694 Value *RHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), RHSI8PtrPtr);
3695 Value *RHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
3696 RHSI8Ptr, RI.PrivateVariable->getType(),
3697 RHSI8Ptr->
getName() +
".ascast");
3699 Value *LHSI8PtrPtr = Builder.CreateInBoundsGEP(
3700 RedArrayTy, LHSArrayPtr,
3701 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3702 Value *LHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), LHSI8PtrPtr);
3703 Value *LHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
3704 LHSI8Ptr, RI.Variable->getType(), LHSI8Ptr->
getName() +
".ascast");
3706 if (ReductionGenCBKind == ReductionGenCBKind::Clang) {
3713 if (!IsByRef.
empty() && !IsByRef[En.index()]) {
3714 LHS = Builder.CreateLoad(RI.ElementType, LHSPtr);
3715 RHS = Builder.CreateLoad(RI.ElementType, RHSPtr);
3719 InsertPointOrErrorTy AfterIP =
3720 RI.ReductionGen(Builder.saveIP(),
LHS,
RHS, Reduced);
3722 return AfterIP.takeError();
3723 if (!Builder.GetInsertBlock())
3724 return ReductionFunc;
3726 Builder.restoreIP(*AfterIP);
3728 if (!IsByRef.
empty() && !IsByRef[En.index()])
3729 Builder.CreateStore(Reduced, LHSPtr);
3733 if (ReductionGenCBKind == ReductionGenCBKind::Clang)
3734 for (
auto En :
enumerate(ReductionInfos)) {
3735 unsigned Index = En.index();
3736 const ReductionInfo &RI = En.value();
3737 Value *LHSFixupPtr, *RHSFixupPtr;
3738 Builder.restoreIP(RI.ReductionGenClang(
3739 Builder.saveIP(), Index, &LHSFixupPtr, &RHSFixupPtr, ReductionFunc));
3744 LHSPtrs[Index], [ReductionFunc](
const Use &U) {
3749 RHSPtrs[Index], [ReductionFunc](
const Use &U) {
3755 Builder.CreateRetVoid();
3756 return ReductionFunc;
3762 for (
const OpenMPIRBuilder::ReductionInfo &RI : ReductionInfos) {
3764 assert(RI.Variable &&
"expected non-null variable");
3765 assert(RI.PrivateVariable &&
"expected non-null private variable");
3766 assert((RI.ReductionGen || RI.ReductionGenClang) &&
3767 "expected non-null reduction generator callback");
3770 RI.Variable->getType() == RI.PrivateVariable->getType() &&
3771 "expected variables and their private equivalents to have the same "
3774 assert(RI.Variable->getType()->isPointerTy() &&
3775 "expected variables to be pointers");
3779OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createReductionsGPU(
3780 const LocationDescription &
Loc, InsertPointTy AllocaIP,
3783 ReductionGenCBKind ReductionGenCBKind, std::optional<omp::GV> GridValue,
3784 unsigned ReductionBufNum,
Value *SrcLocInfo) {
3785 if (!updateToLocation(
Loc))
3786 return InsertPointTy();
3787 Builder.restoreIP(CodeGenIP);
3794 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
3795 SrcLocInfo = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
3798 if (ReductionInfos.
size() == 0)
3799 return Builder.saveIP();
3802 if (ReductionGenCBKind != ReductionGenCBKind::Clang) {
3808 Builder.SetInsertPoint(InsertBlock, InsertBlock->
end());
3812 AttributeList FuncAttrs;
3813 AttrBuilder AttrBldr(Ctx);
3815 AttrBldr.addAttribute(Attr);
3816 AttrBldr.removeAttribute(Attribute::OptimizeNone);
3817 FuncAttrs = FuncAttrs.addFnAttributes(Ctx, AttrBldr);
3819 CodeGenIP = Builder.saveIP();
3821 Builder.GetInsertBlock()->getParent()->getName(), ReductionInfos, IsByRef,
3822 ReductionGenCBKind, FuncAttrs);
3823 if (!ReductionResult)
3825 Function *ReductionFunc = *ReductionResult;
3826 Builder.restoreIP(CodeGenIP);
3829 if (GridValue.has_value())
3830 Config.setGridValue(GridValue.value());
3845 Builder.getPtrTy(M.getDataLayout().getProgramAddressSpace());
3847 CodeGenIP = Builder.saveIP();
3848 Builder.restoreIP(AllocaIP);
3849 Value *ReductionListAlloca =
3850 Builder.CreateAlloca(RedArrayTy,
nullptr,
".omp.reduction.red_list");
3851 Value *ReductionList = Builder.CreatePointerBitCastOrAddrSpaceCast(
3852 ReductionListAlloca, PtrTy, ReductionListAlloca->
getName() +
".ascast");
3853 Builder.restoreIP(CodeGenIP);
3854 Type *IndexTy = Builder.getIndexTy(
3855 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
3856 for (
auto En :
enumerate(ReductionInfos)) {
3857 const ReductionInfo &RI = En.value();
3858 Value *ElemPtr = Builder.CreateInBoundsGEP(
3859 RedArrayTy, ReductionList,
3860 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3862 Value *PrivateVar = RI.PrivateVariable;
3863 bool IsByRefElem = !IsByRef.
empty() && IsByRef[En.index()];
3865 PrivateVar = Builder.CreateLoad(RI.ElementType, PrivateVar);
3868 Builder.CreatePointerBitCastOrAddrSpaceCast(PrivateVar, PtrTy);
3869 Builder.CreateStore(CastElem, ElemPtr);
3871 CodeGenIP = Builder.saveIP();
3873 ReductionInfos, ReductionFunc, FuncAttrs, IsByRef);
3879 emitInterWarpCopyFunction(
Loc, ReductionInfos, FuncAttrs, IsByRef);
3883 Builder.restoreIP(CodeGenIP);
3885 Value *RL = Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList, PtrTy);
3887 unsigned MaxDataSize = 0;
3889 for (
auto En :
enumerate(ReductionInfos)) {
3890 auto Size = M.getDataLayout().getTypeStoreSize(En.value().ElementType);
3891 if (
Size > MaxDataSize)
3893 Type *RedTypeArg = (!IsByRef.
empty() && IsByRef[En.index()])
3894 ? En.value().ByRefElementType
3895 : En.value().ElementType;
3898 Value *ReductionDataSize =
3899 Builder.getInt64(MaxDataSize * ReductionInfos.
size());
3900 if (!IsTeamsReduction) {
3901 Value *SarFuncCast =
3902 Builder.CreatePointerBitCastOrAddrSpaceCast(*SarFunc, FuncPtrTy);
3904 Builder.CreatePointerBitCastOrAddrSpaceCast(WcFunc, FuncPtrTy);
3905 Value *
Args[] = {SrcLocInfo, ReductionDataSize, RL, SarFuncCast,
3907 Function *Pv2Ptr = getOrCreateRuntimeFunctionPtr(
3908 RuntimeFunction::OMPRTL___kmpc_nvptx_parallel_reduce_nowait_v2);
3909 Res = createRuntimeFunctionCall(Pv2Ptr, Args);
3911 CodeGenIP = Builder.saveIP();
3913 Ctx, ReductionTypeArgs,
"struct._globalized_locals_ty");
3914 Function *RedFixedBufferFn = getOrCreateRuntimeFunctionPtr(
3915 RuntimeFunction::OMPRTL___kmpc_reduction_get_fixed_buffer);
3918 ReductionInfos, ReductionsBufferTy, FuncAttrs, IsByRef);
3923 ReductionInfos, ReductionFunc, ReductionsBufferTy, FuncAttrs, IsByRef);
3928 ReductionInfos, ReductionsBufferTy, FuncAttrs, IsByRef);
3933 ReductionInfos, ReductionFunc, ReductionsBufferTy, FuncAttrs, IsByRef);
3937 Builder.restoreIP(CodeGenIP);
3939 Value *KernelTeamsReductionPtr = createRuntimeFunctionCall(
3940 RedFixedBufferFn, {},
"_openmp_teams_reductions_buffer_$_$ptr");
3942 Value *Args3[] = {SrcLocInfo,
3943 KernelTeamsReductionPtr,
3944 Builder.getInt32(ReductionBufNum),
3954 Function *TeamsReduceFn = getOrCreateRuntimeFunctionPtr(
3955 RuntimeFunction::OMPRTL___kmpc_nvptx_teams_reduce_nowait_v2);
3956 Res = createRuntimeFunctionCall(TeamsReduceFn, Args3);
3962 Value *
Cond = Builder.CreateICmpEQ(Res, Builder.getInt32(1));
3963 Builder.CreateCondBr(
Cond, ThenBB, ExitBB);
3969 emitBlock(ThenBB, CurFunc);
3972 for (
auto En :
enumerate(ReductionInfos)) {
3973 const ReductionInfo &RI = En.value();
3975 Value *RedValue = RI.Variable;
3977 Builder.CreatePointerBitCastOrAddrSpaceCast(RI.PrivateVariable, PtrTy);
3979 if (ReductionGenCBKind == ReductionGenCBKind::Clang) {
3980 Value *LHSPtr, *RHSPtr;
3981 Builder.restoreIP(RI.ReductionGenClang(Builder.saveIP(), En.index(),
3982 &LHSPtr, &RHSPtr, CurFunc));
3995 if (IsByRef.
empty() || !IsByRef[En.index()]) {
3996 RedValue = Builder.CreateLoad(
ValueType, RI.Variable,
3997 "red.value." +
Twine(En.index()));
3999 Value *PrivateRedValue = Builder.CreateLoad(
4002 InsertPointOrErrorTy AfterIP =
4003 RI.ReductionGen(Builder.saveIP(), RedValue, PrivateRedValue, Reduced);
4005 return AfterIP.takeError();
4006 Builder.restoreIP(*AfterIP);
4008 if (!IsByRef.
empty() && !IsByRef[En.index()])
4009 Builder.CreateStore(Reduced, RI.Variable);
4012 emitBlock(ExitBB, CurFunc);
4013 if (ContinuationBlock) {
4014 Builder.CreateBr(ContinuationBlock);
4015 Builder.SetInsertPoint(ContinuationBlock);
4017 Config.setEmitLLVMUsed();
4019 return Builder.saveIP();
4028 ".omp.reduction.func", &M);
4038 Builder.SetInsertPoint(ReductionFuncBlock);
4039 Value *LHSArrayPtr =
nullptr;
4040 Value *RHSArrayPtr =
nullptr;
4051 Builder.CreateAlloca(Arg0Type,
nullptr, Arg0->
getName() +
".addr");
4053 Builder.CreateAlloca(Arg1Type,
nullptr, Arg1->
getName() +
".addr");
4054 Value *LHSAddrCast =
4055 Builder.CreatePointerBitCastOrAddrSpaceCast(LHSAlloca, Arg0Type);
4056 Value *RHSAddrCast =
4057 Builder.CreatePointerBitCastOrAddrSpaceCast(RHSAlloca, Arg1Type);
4058 Builder.CreateStore(Arg0, LHSAddrCast);
4059 Builder.CreateStore(Arg1, RHSAddrCast);
4060 LHSArrayPtr = Builder.CreateLoad(Arg0Type, LHSAddrCast);
4061 RHSArrayPtr = Builder.CreateLoad(Arg1Type, RHSAddrCast);
4063 LHSArrayPtr = ReductionFunc->
getArg(0);
4064 RHSArrayPtr = ReductionFunc->
getArg(1);
4067 unsigned NumReductions = ReductionInfos.
size();
4070 for (
auto En :
enumerate(ReductionInfos)) {
4071 const OpenMPIRBuilder::ReductionInfo &RI = En.value();
4072 Value *LHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
4073 RedArrayTy, LHSArrayPtr, 0, En.index());
4074 Value *LHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), LHSI8PtrPtr);
4075 Value *LHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
4076 LHSI8Ptr, RI.Variable->
getType());
4077 Value *
LHS = Builder.CreateLoad(RI.ElementType, LHSPtr);
4078 Value *RHSI8PtrPtr = Builder.CreateConstInBoundsGEP2_64(
4079 RedArrayTy, RHSArrayPtr, 0, En.index());
4080 Value *RHSI8Ptr = Builder.CreateLoad(Builder.getPtrTy(), RHSI8PtrPtr);
4081 Value *RHSPtr = Builder.CreatePointerBitCastOrAddrSpaceCast(
4082 RHSI8Ptr, RI.PrivateVariable->
getType());
4083 Value *
RHS = Builder.CreateLoad(RI.ElementType, RHSPtr);
4085 OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
4086 RI.ReductionGen(Builder.saveIP(),
LHS,
RHS, Reduced);
4088 return AfterIP.takeError();
4090 Builder.restoreIP(*AfterIP);
4092 if (!Builder.GetInsertBlock())
4096 if (!IsByRef[En.index()])
4097 Builder.CreateStore(Reduced, LHSPtr);
4099 Builder.CreateRetVoid();
4103OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createReductions(
4104 const LocationDescription &
Loc, InsertPointTy AllocaIP,
4106 bool IsNoWait,
bool IsTeamsReduction) {
4109 return createReductionsGPU(
Loc, AllocaIP, Builder.saveIP(), ReductionInfos,
4110 IsByRef, IsNoWait, IsTeamsReduction);
4114 if (!updateToLocation(
Loc))
4115 return InsertPointTy();
4117 if (ReductionInfos.
size() == 0)
4118 return Builder.saveIP();
4127 unsigned NumReductions = ReductionInfos.
size();
4129 Builder.SetInsertPoint(AllocaIP.getBlock()->getTerminator());
4130 Value *RedArray = Builder.CreateAlloca(RedArrayTy,
nullptr,
"red.array");
4132 Builder.SetInsertPoint(InsertBlock, InsertBlock->
end());
4134 for (
auto En :
enumerate(ReductionInfos)) {
4135 unsigned Index = En.index();
4136 const ReductionInfo &RI = En.value();
4137 Value *RedArrayElemPtr = Builder.CreateConstInBoundsGEP2_64(
4138 RedArrayTy, RedArray, 0, Index,
"red.array.elem." +
Twine(Index));
4139 Builder.CreateStore(RI.PrivateVariable, RedArrayElemPtr);
4144 Type *IndexTy = Builder.getIndexTy(
4145 M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
4146 Function *
Func = Builder.GetInsertBlock()->getParent();
4149 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
4150 bool CanGenerateAtomic =
all_of(ReductionInfos, [](
const ReductionInfo &RI) {
4151 return RI.AtomicReductionGen;
4153 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize,
4155 ? IdentFlag::OMP_IDENT_FLAG_ATOMIC_REDUCE
4157 Value *ThreadId = getOrCreateThreadID(Ident);
4158 Constant *NumVariables = Builder.getInt32(NumReductions);
4160 unsigned RedArrayByteSize =
DL.getTypeStoreSize(RedArrayTy);
4161 Constant *RedArraySize = ConstantInt::get(IndexTy, RedArrayByteSize);
4163 Value *Lock = getOMPCriticalRegionLock(
".reduction");
4164 Function *ReduceFunc = getOrCreateRuntimeFunctionPtr(
4165 IsNoWait ? RuntimeFunction::OMPRTL___kmpc_reduce_nowait
4166 : RuntimeFunction::OMPRTL___kmpc_reduce);
4168 createRuntimeFunctionCall(ReduceFunc,
4169 {Ident, ThreadId, NumVariables, RedArraySize,
4170 RedArray, ReductionFunc, Lock},
4181 Builder.CreateSwitch(ReduceCall, ContinuationBlock, 2);
4182 Switch->addCase(Builder.getInt32(1), NonAtomicRedBlock);
4183 Switch->addCase(Builder.getInt32(2), AtomicRedBlock);
4188 Builder.SetInsertPoint(NonAtomicRedBlock);
4189 for (
auto En :
enumerate(ReductionInfos)) {
4190 const ReductionInfo &RI = En.value();
4194 Value *RedValue = RI.Variable;
4195 if (!IsByRef[En.index()]) {
4196 RedValue = Builder.CreateLoad(
ValueType, RI.Variable,
4197 "red.value." +
Twine(En.index()));
4199 Value *PrivateRedValue =
4200 Builder.CreateLoad(
ValueType, RI.PrivateVariable,
4201 "red.private.value." +
Twine(En.index()));
4203 InsertPointOrErrorTy AfterIP =
4204 RI.ReductionGen(Builder.saveIP(), RedValue, PrivateRedValue, Reduced);
4206 return AfterIP.takeError();
4207 Builder.restoreIP(*AfterIP);
4209 if (!Builder.GetInsertBlock())
4210 return InsertPointTy();
4212 if (!IsByRef[En.index()])
4213 Builder.CreateStore(Reduced, RI.Variable);
4215 Function *EndReduceFunc = getOrCreateRuntimeFunctionPtr(
4216 IsNoWait ? RuntimeFunction::OMPRTL___kmpc_end_reduce_nowait
4217 : RuntimeFunction::OMPRTL___kmpc_end_reduce);
4218 createRuntimeFunctionCall(EndReduceFunc, {Ident, ThreadId, Lock});
4219 Builder.CreateBr(ContinuationBlock);
4224 Builder.SetInsertPoint(AtomicRedBlock);
4225 if (CanGenerateAtomic &&
llvm::none_of(IsByRef, [](
bool P) {
return P; })) {
4226 for (
const ReductionInfo &RI : ReductionInfos) {
4227 InsertPointOrErrorTy AfterIP = RI.AtomicReductionGen(
4228 Builder.saveIP(), RI.ElementType, RI.Variable, RI.PrivateVariable);
4230 return AfterIP.takeError();
4231 Builder.restoreIP(*AfterIP);
4232 if (!Builder.GetInsertBlock())
4233 return InsertPointTy();
4235 Builder.CreateBr(ContinuationBlock);
4237 Builder.CreateUnreachable();
4248 if (!Builder.GetInsertBlock())
4249 return InsertPointTy();
4251 Builder.SetInsertPoint(ContinuationBlock);
4252 return Builder.saveIP();
4255OpenMPIRBuilder::InsertPointOrErrorTy
4256OpenMPIRBuilder::createMaster(
const LocationDescription &
Loc,
4257 BodyGenCallbackTy BodyGenCB,
4258 FinalizeCallbackTy FiniCB) {
4259 if (!updateToLocation(
Loc))
4262 Directive OMPD = Directive::OMPD_master;
4264 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
4265 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
4266 Value *ThreadId = getOrCreateThreadID(Ident);
4269 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_master);
4270 Instruction *EntryCall = createRuntimeFunctionCall(EntryRTLFn, Args);
4272 Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_master);
4273 Instruction *ExitCall = createRuntimeFunctionCall(ExitRTLFn, Args);
4275 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
4279OpenMPIRBuilder::InsertPointOrErrorTy
4280OpenMPIRBuilder::createMasked(
const LocationDescription &
Loc,
4281 BodyGenCallbackTy BodyGenCB,
4283 if (!updateToLocation(
Loc))
4286 Directive OMPD = Directive::OMPD_masked;
4288 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
4289 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
4290 Value *ThreadId = getOrCreateThreadID(Ident);
4292 Value *ArgsEnd[] = {Ident, ThreadId};
4294 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_masked);
4295 Instruction *EntryCall = createRuntimeFunctionCall(EntryRTLFn, Args);
4297 Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_masked);
4298 Instruction *ExitCall = createRuntimeFunctionCall(ExitRTLFn, ArgsEnd);
4300 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
4310 Call->setDoesNotThrow();
4322OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createScan(
4323 const LocationDescription &
Loc, InsertPointTy AllocaIP,
4325 bool IsInclusive, ScanInfo *ScanRedInfo) {
4326 if (ScanRedInfo->OMPFirstScanLoop) {
4327 llvm::Error Err = emitScanBasedDirectiveDeclsIR(AllocaIP, ScanVars,
4328 ScanVarsType, ScanRedInfo);
4332 if (!updateToLocation(
Loc))
4337 if (ScanRedInfo->OMPFirstScanLoop) {
4339 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4340 Value *BuffPtr = (*(ScanRedInfo->ScanBuffPtrs))[ScanVars[i]];
4341 Value *Buff = Builder.CreateLoad(Builder.getPtrTy(), BuffPtr);
4342 Type *DestTy = ScanVarsType[i];
4343 Value *Val = Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
4344 Value *Src = Builder.CreateLoad(DestTy, ScanVars[i]);
4346 Builder.CreateStore(Src, Val);
4349 Builder.CreateBr(ScanRedInfo->OMPScanLoopExit);
4350 emitBlock(ScanRedInfo->OMPScanDispatch,
4351 Builder.GetInsertBlock()->getParent());
4353 if (!ScanRedInfo->OMPFirstScanLoop) {
4354 IV = ScanRedInfo->IV;
4357 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4358 Value *BuffPtr = (*(ScanRedInfo->ScanBuffPtrs))[ScanVars[i]];
4359 Value *Buff = Builder.CreateLoad(Builder.getPtrTy(), BuffPtr);
4360 Type *DestTy = ScanVarsType[i];
4362 Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
4363 Value *Src = Builder.CreateLoad(DestTy, SrcPtr);
4364 Builder.CreateStore(Src, ScanVars[i]);
4370 if (ScanRedInfo->OMPFirstScanLoop == IsInclusive) {
4371 Builder.CreateCondBr(CmpI, ScanRedInfo->OMPBeforeScanBlock,
4372 ScanRedInfo->OMPAfterScanBlock);
4374 Builder.CreateCondBr(CmpI, ScanRedInfo->OMPAfterScanBlock,
4375 ScanRedInfo->OMPBeforeScanBlock);
4377 emitBlock(ScanRedInfo->OMPAfterScanBlock,
4378 Builder.GetInsertBlock()->getParent());
4379 Builder.SetInsertPoint(ScanRedInfo->OMPAfterScanBlock);
4380 return Builder.saveIP();
4383Error OpenMPIRBuilder::emitScanBasedDirectiveDeclsIR(
4387 Builder.restoreIP(AllocaIP);
4389 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4391 Builder.CreateAlloca(Builder.getPtrTy(),
nullptr,
"vla");
4392 (*(ScanRedInfo->ScanBuffPtrs))[ScanVars[i]] = BuffPtr;
4396 auto BodyGenCB = [&](InsertPointTy AllocaIP,
4397 InsertPointTy CodeGenIP) ->
Error {
4398 Builder.restoreIP(CodeGenIP);
4400 Builder.CreateAdd(ScanRedInfo->Span, Builder.getInt32(1));
4401 for (
size_t i = 0; i < ScanVars.
size(); i++) {
4405 Value *Buff = Builder.CreateMalloc(IntPtrTy, ScanVarsType[i], Allocsize,
4406 AllocSpan,
nullptr,
"arr");
4407 Builder.CreateStore(Buff, (*(ScanRedInfo->ScanBuffPtrs))[ScanVars[i]]);
4415 Builder.SetInsertPoint(ScanRedInfo->OMPScanInit->getTerminator());
4417 llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
4418 createMasked(Builder.saveIP(), BodyGenCB, FiniCB, FilterVal);
4421 return AfterIP.takeError();
4422 Builder.restoreIP(*AfterIP);
4423 BasicBlock *InputBB = Builder.GetInsertBlock();
4425 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
4426 AfterIP = createBarrier(Builder.saveIP(), llvm::omp::OMPD_barrier);
4428 return AfterIP.takeError();
4429 Builder.restoreIP(*AfterIP);
4434Error OpenMPIRBuilder::emitScanBasedDirectiveFinalsIR(
4436 auto BodyGenCB = [&](InsertPointTy AllocaIP,
4437 InsertPointTy CodeGenIP) ->
Error {
4438 Builder.restoreIP(CodeGenIP);
4439 for (ReductionInfo RedInfo : ReductionInfos) {
4440 Value *PrivateVar = RedInfo.PrivateVariable;
4441 Value *OrigVar = RedInfo.Variable;
4442 Value *BuffPtr = (*(ScanRedInfo->ScanBuffPtrs))[PrivateVar];
4443 Value *Buff = Builder.CreateLoad(Builder.getPtrTy(), BuffPtr);
4445 Type *SrcTy = RedInfo.ElementType;
4446 Value *Val = Builder.CreateInBoundsGEP(SrcTy, Buff, ScanRedInfo->Span,
4448 Value *Src = Builder.CreateLoad(SrcTy, Val);
4450 Builder.CreateStore(Src, OrigVar);
4451 Builder.CreateFree(Buff);
4459 if (ScanRedInfo->OMPScanFinish->getTerminator())
4460 Builder.SetInsertPoint(ScanRedInfo->OMPScanFinish->getTerminator());
4462 Builder.SetInsertPoint(ScanRedInfo->OMPScanFinish);
4465 llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
4466 createMasked(Builder.saveIP(), BodyGenCB, FiniCB, FilterVal);
4469 return AfterIP.takeError();
4470 Builder.restoreIP(*AfterIP);
4471 BasicBlock *InputBB = Builder.GetInsertBlock();
4473 Builder.SetInsertPoint(Builder.GetInsertBlock()->getTerminator());
4474 AfterIP = createBarrier(Builder.saveIP(), llvm::omp::OMPD_barrier);
4476 return AfterIP.takeError();
4477 Builder.restoreIP(*AfterIP);
4481OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitScanReduction(
4482 const LocationDescription &
Loc,
4484 ScanInfo *ScanRedInfo) {
4486 if (!updateToLocation(
Loc))
4488 auto BodyGenCB = [&](InsertPointTy AllocaIP,
4489 InsertPointTy CodeGenIP) ->
Error {
4490 Builder.restoreIP(CodeGenIP);
4496 splitBB(Builder,
false,
"omp.outer.log.scan.exit");
4498 Builder.GetInsertBlock()->getModule(),
4502 Builder.CreateUIToFP(ScanRedInfo->Span, Builder.getDoubleTy());
4505 Builder.GetInsertBlock()->getModule(),
4508 LogVal = Builder.CreateFPToUI(LogVal, Builder.getInt32Ty());
4511 llvm::ConstantInt::get(ScanRedInfo->Span->getType(), 1));
4512 Builder.SetInsertPoint(InputBB);
4513 Builder.CreateBr(LoopBB);
4514 emitBlock(LoopBB, CurFn);
4515 Builder.SetInsertPoint(LoopBB);
4517 PHINode *Counter = Builder.CreatePHI(Builder.getInt32Ty(), 2);
4519 PHINode *Pow2K = Builder.CreatePHI(Builder.getInt32Ty(), 2);
4520 Counter->
addIncoming(llvm::ConstantInt::get(Builder.getInt32Ty(), 0),
4522 Pow2K->
addIncoming(llvm::ConstantInt::get(Builder.getInt32Ty(), 1),
4530 llvm::Value *CmpI = Builder.CreateICmpUGE(NMin1, Pow2K);
4531 Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
4532 emitBlock(InnerLoopBB, CurFn);
4533 Builder.SetInsertPoint(InnerLoopBB);
4534 PHINode *IVal = Builder.CreatePHI(Builder.getInt32Ty(), 2);
4536 for (ReductionInfo RedInfo : ReductionInfos) {
4537 Value *ReductionVal = RedInfo.PrivateVariable;
4538 Value *BuffPtr = (*(ScanRedInfo->ScanBuffPtrs))[ReductionVal];
4539 Value *Buff = Builder.CreateLoad(Builder.getPtrTy(), BuffPtr);
4540 Type *DestTy = RedInfo.ElementType;
4541 Value *
IV = Builder.CreateAdd(IVal, Builder.getInt32(1));
4543 Builder.CreateInBoundsGEP(DestTy, Buff,
IV,
"arrayOffset");
4544 Value *OffsetIval = Builder.CreateNUWSub(
IV, Pow2K);
4546 Builder.CreateInBoundsGEP(DestTy, Buff, OffsetIval,
"arrayOffset");
4547 Value *
LHS = Builder.CreateLoad(DestTy, LHSPtr);
4548 Value *
RHS = Builder.CreateLoad(DestTy, RHSPtr);
4550 InsertPointOrErrorTy AfterIP =
4551 RedInfo.ReductionGen(Builder.saveIP(),
LHS,
RHS, Result);
4553 return AfterIP.takeError();
4554 Builder.CreateStore(Result, LHSPtr);
4557 IVal, llvm::ConstantInt::get(Builder.getInt32Ty(), 1));
4558 IVal->
addIncoming(NextIVal, Builder.GetInsertBlock());
4559 CmpI = Builder.CreateICmpUGE(NextIVal, Pow2K);
4560 Builder.CreateCondBr(CmpI, InnerLoopBB, InnerExitBB);
4561 emitBlock(InnerExitBB, CurFn);
4563 Counter, llvm::ConstantInt::get(Counter->
getType(), 1));
4566 llvm::Value *NextPow2K = Builder.CreateShl(Pow2K, 1,
"",
true);
4567 Pow2K->
addIncoming(NextPow2K, Builder.GetInsertBlock());
4569 Builder.CreateCondBr(Cmp, LoopBB, ExitBB);
4579 llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
4580 createMasked(Builder.saveIP(), BodyGenCB, FiniCB, FilterVal);
4583 return AfterIP.takeError();
4584 Builder.restoreIP(*AfterIP);
4585 AfterIP = createBarrier(Builder.saveIP(), llvm::omp::OMPD_barrier);
4588 return AfterIP.takeError();
4589 Builder.restoreIP(*AfterIP);
4590 Error Err = emitScanBasedDirectiveFinalsIR(ReductionInfos, ScanRedInfo);
4597Error OpenMPIRBuilder::emitScanBasedDirectiveIR(
4600 ScanInfo *ScanRedInfo) {
4608 ScanRedInfo->OMPFirstScanLoop =
true;
4609 Error Err = InputLoopGen();
4619 ScanRedInfo->OMPFirstScanLoop =
false;
4620 Error Err = ScanLoopGen(Builder.saveIP());
4627void OpenMPIRBuilder::createScanBBs(ScanInfo *ScanRedInfo) {
4628 Function *
Fun = Builder.GetInsertBlock()->getParent();
4629 ScanRedInfo->OMPScanDispatch =
4631 ScanRedInfo->OMPAfterScanBlock =
4633 ScanRedInfo->OMPBeforeScanBlock =
4635 ScanRedInfo->OMPScanLoopExit =
4638CanonicalLoopInfo *OpenMPIRBuilder::createLoopSkeleton(
4662 Builder.SetCurrentDebugLocation(
DL);
4664 Builder.SetInsertPoint(Preheader);
4665 Builder.CreateBr(Header);
4667 Builder.SetInsertPoint(Header);
4668 PHINode *IndVarPHI = Builder.CreatePHI(IndVarTy, 2,
"omp_" + Name +
".iv");
4669 IndVarPHI->
addIncoming(ConstantInt::get(IndVarTy, 0), Preheader);
4670 Builder.CreateBr(
Cond);
4672 Builder.SetInsertPoint(
Cond);
4674 Builder.CreateICmpULT(IndVarPHI, TripCount,
"omp_" + Name +
".cmp");
4675 Builder.CreateCondBr(Cmp, Body, Exit);
4677 Builder.SetInsertPoint(Body);
4678 Builder.CreateBr(Latch);
4680 Builder.SetInsertPoint(Latch);
4681 Value *
Next = Builder.CreateAdd(IndVarPHI, ConstantInt::get(IndVarTy, 1),
4682 "omp_" + Name +
".next",
true);
4683 Builder.CreateBr(Header);
4686 Builder.SetInsertPoint(Exit);
4687 Builder.CreateBr(After);
4690 LoopInfos.emplace_front();
4691 CanonicalLoopInfo *CL = &LoopInfos.front();
4693 CL->Header = Header;
4705OpenMPIRBuilder::createCanonicalLoop(
const LocationDescription &
Loc,
4706 LoopBodyGenCallbackTy BodyGenCB,
4711 CanonicalLoopInfo *CL = createLoopSkeleton(
Loc.DL, TripCount, BB->
getParent(),
4712 NextBB, NextBB, Name);
4716 if (updateToLocation(
Loc)) {
4720 spliceBB(Builder, After,
false);
4721 Builder.CreateBr(CL->getPreheader());
4726 if (
Error Err = BodyGenCB(CL->getBodyIP(), CL->getIndVar()))
4736 ScanInfos.emplace_front();
4737 ScanInfo *
Result = &ScanInfos.front();
4742OpenMPIRBuilder::createCanonicalScanLoops(
4743 const LocationDescription &
Loc, LoopBodyGenCallbackTy BodyGenCB,
4744 Value *Start,
Value *Stop,
Value *Step,
bool IsSigned,
bool InclusiveStop,
4745 InsertPointTy ComputeIP,
const Twine &Name, ScanInfo *ScanRedInfo) {
4746 LocationDescription ComputeLoc =
4747 ComputeIP.isSet() ? LocationDescription(ComputeIP,
Loc.DL) :
Loc;
4748 updateToLocation(ComputeLoc);
4752 Value *TripCount = calculateCanonicalLoopTripCount(
4753 ComputeLoc, Start, Stop, Step, IsSigned, InclusiveStop, Name);
4754 ScanRedInfo->Span = TripCount;
4755 ScanRedInfo->OMPScanInit = splitBB(Builder,
true,
"scan.init");
4756 Builder.SetInsertPoint(ScanRedInfo->OMPScanInit);
4758 auto BodyGen = [=](InsertPointTy CodeGenIP,
Value *
IV) {
4759 Builder.restoreIP(CodeGenIP);
4760 ScanRedInfo->IV =
IV;
4761 createScanBBs(ScanRedInfo);
4762 BasicBlock *InputBlock = Builder.GetInsertBlock();
4766 Terminator->setSuccessor(0, ScanRedInfo->OMPScanDispatch);
4767 emitBlock(ScanRedInfo->OMPBeforeScanBlock,
4768 Builder.GetInsertBlock()->getParent());
4769 Builder.CreateBr(ScanRedInfo->OMPScanLoopExit);
4770 emitBlock(ScanRedInfo->OMPScanLoopExit,
4771 Builder.GetInsertBlock()->getParent());
4772 Builder.CreateBr(ContinueBlock);
4773 Builder.SetInsertPoint(
4774 ScanRedInfo->OMPBeforeScanBlock->getFirstInsertionPt());
4775 return BodyGenCB(Builder.saveIP(),
IV);
4778 const auto &&InputLoopGen = [&]() ->
Error {
4780 Builder.saveIP(), BodyGen, Start, Stop, Step, IsSigned, InclusiveStop,
4781 ComputeIP, Name,
true, ScanRedInfo);
4785 Builder.restoreIP((*LoopInfo)->getAfterIP());
4788 const auto &&ScanLoopGen = [&](LocationDescription
Loc) ->
Error {
4790 createCanonicalLoop(
Loc, BodyGen, Start, Stop, Step, IsSigned,
4791 InclusiveStop, ComputeIP, Name,
true, ScanRedInfo);
4795 Builder.restoreIP((*LoopInfo)->getAfterIP());
4796 ScanRedInfo->OMPScanFinish = Builder.GetInsertBlock();
4799 Error Err = emitScanBasedDirectiveIR(InputLoopGen, ScanLoopGen, ScanRedInfo);
4805Value *OpenMPIRBuilder::calculateCanonicalLoopTripCount(
4807 bool IsSigned,
bool InclusiveStop,
const Twine &Name) {
4817 assert(IndVarTy == Stop->
getType() &&
"Stop type mismatch");
4818 assert(IndVarTy == Step->
getType() &&
"Step type mismatch");
4820 updateToLocation(
Loc);
4837 Value *IsNeg = Builder.CreateICmpSLT(Step, Zero);
4838 Incr = Builder.CreateSelect(IsNeg, Builder.CreateNeg(Step), Step);
4839 Value *LB = Builder.CreateSelect(IsNeg, Stop, Start);
4840 Value *UB = Builder.CreateSelect(IsNeg, Start, Stop);
4841 Span = Builder.CreateSub(UB, LB,
"",
false,
true);
4842 ZeroCmp = Builder.CreateICmp(
4845 Span = Builder.CreateSub(Stop, Start,
"",
true);
4846 ZeroCmp = Builder.CreateICmp(
4850 Value *CountIfLooping;
4851 if (InclusiveStop) {
4852 CountIfLooping = Builder.CreateAdd(Builder.CreateUDiv(Span, Incr), One);
4855 Value *CountIfTwo = Builder.CreateAdd(
4856 Builder.CreateUDiv(Builder.CreateSub(Span, One), Incr), One);
4858 CountIfLooping = Builder.CreateSelect(OneCmp, One, CountIfTwo);
4861 return Builder.CreateSelect(ZeroCmp, Zero, CountIfLooping,
4862 "omp_" + Name +
".tripcount");
4866 const LocationDescription &
Loc, LoopBodyGenCallbackTy BodyGenCB,
4867 Value *Start,
Value *Stop,
Value *Step,
bool IsSigned,
bool InclusiveStop,
4868 InsertPointTy ComputeIP,
const Twine &Name,
bool InScan,
4869 ScanInfo *ScanRedInfo) {
4870 LocationDescription ComputeLoc =
4871 ComputeIP.isSet() ? LocationDescription(ComputeIP,
Loc.DL) :
Loc;
4873 Value *TripCount = calculateCanonicalLoopTripCount(
4874 ComputeLoc, Start, Stop, Step, IsSigned, InclusiveStop, Name);
4876 auto BodyGen = [=](InsertPointTy CodeGenIP,
Value *
IV) {
4877 Builder.restoreIP(CodeGenIP);
4878 Value *Span = Builder.CreateMul(
IV, Step);
4879 Value *IndVar = Builder.CreateAdd(Span, Start);
4881 ScanRedInfo->IV = IndVar;
4882 return BodyGenCB(Builder.saveIP(), IndVar);
4884 LocationDescription LoopLoc =
4887 : LocationDescription(Builder.saveIP(),
4888 Builder.getCurrentDebugLocation());
4889 return createCanonicalLoop(LoopLoc, BodyGen, TripCount, Name);
4898 OpenMPIRBuilder &OMPBuilder) {
4899 unsigned Bitwidth = Ty->getIntegerBitWidth();
4901 return OMPBuilder.getOrCreateRuntimeFunction(
4902 M, omp::RuntimeFunction::OMPRTL___kmpc_dist_for_static_init_4u);
4904 return OMPBuilder.getOrCreateRuntimeFunction(
4905 M, omp::RuntimeFunction::OMPRTL___kmpc_dist_for_static_init_8u);
4914 OpenMPIRBuilder &OMPBuilder) {
4915 unsigned Bitwidth = Ty->getIntegerBitWidth();
4917 return OMPBuilder.getOrCreateRuntimeFunction(
4918 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_4u);
4920 return OMPBuilder.getOrCreateRuntimeFunction(
4921 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_8u);
4925OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::applyStaticWorkshareLoop(
4926 DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
4929 assert(CLI->isValid() &&
"Requires a valid canonical loop");
4931 "Require dedicated allocate IP");
4934 Builder.restoreIP(CLI->getPreheaderIP());
4935 Builder.SetCurrentDebugLocation(
DL);
4938 Constant *SrcLocStr = getOrCreateSrcLocStr(
DL, SrcLocStrSize);
4939 Value *SrcLoc = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
4943 Type *IVTy =
IV->getType();
4945 LoopType == WorksharingLoopType::DistributeForStaticLoop
4949 getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_for_static_fini);
4952 Builder.SetInsertPoint(AllocaIP.getBlock()->getFirstNonPHIOrDbgOrAlloca());
4955 Value *PLastIter = Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
4956 Value *PLowerBound = Builder.CreateAlloca(IVTy,
nullptr,
"p.lowerbound");
4957 Value *PUpperBound = Builder.CreateAlloca(IVTy,
nullptr,
"p.upperbound");
4958 Value *PStride = Builder.CreateAlloca(IVTy,
nullptr,
"p.stride");
4959 CLI->setLastIter(PLastIter);
4965 Builder.SetInsertPoint(CLI->getPreheader()->getTerminator());
4967 Constant *One = ConstantInt::get(IVTy, 1);
4968 Builder.CreateStore(Zero, PLowerBound);
4969 Value *UpperBound = Builder.CreateSub(CLI->getTripCount(), One);
4970 Builder.CreateStore(UpperBound, PUpperBound);
4971 Builder.CreateStore(One, PStride);
4973 Value *ThreadNum = getOrCreateThreadID(SrcLoc);
4976 (LoopType == WorksharingLoopType::DistributeStaticLoop)
4977 ? OMPScheduleType::OrderedDistribute
4980 ConstantInt::get(I32Type,
static_cast<int>(SchedType));
4984 auto BuildInitCall = [LoopType, SrcLoc, ThreadNum, PLastIter, PLowerBound,
4985 PUpperBound, IVTy, PStride, One,
Zero, StaticInit,
4986 this](
Value *SchedulingType,
auto &Builder) {
4988 PLowerBound, PUpperBound});
4989 if (LoopType == WorksharingLoopType::DistributeForStaticLoop) {
4990 Value *PDistUpperBound =
4991 Builder.CreateAlloca(IVTy,
nullptr,
"p.distupperbound");
4992 Args.push_back(PDistUpperBound);
4995 createRuntimeFunctionCall(StaticInit, Args);
4997 BuildInitCall(SchedulingType, Builder);
4998 if (HasDistSchedule &&
4999 LoopType != WorksharingLoopType::DistributeStaticLoop) {
5000 Constant *DistScheduleSchedType = ConstantInt::get(
5005 BuildInitCall(DistScheduleSchedType, Builder);
5007 Value *LowerBound = Builder.CreateLoad(IVTy, PLowerBound);
5008 Value *InclusiveUpperBound = Builder.CreateLoad(IVTy, PUpperBound);
5009 Value *TripCountMinusOne = Builder.CreateSub(InclusiveUpperBound, LowerBound);
5010 Value *TripCount = Builder.CreateAdd(TripCountMinusOne, One);
5011 CLI->setTripCount(TripCount);
5018 Builder.SetInsertPoint(CLI->getBody(),
5019 CLI->getBody()->getFirstInsertionPt());
5020 Builder.SetCurrentDebugLocation(
DL);
5021 return Builder.CreateAdd(OldIV, LowerBound);
5025 Builder.SetInsertPoint(CLI->getExit(),
5026 CLI->getExit()->getTerminator()->getIterator());
5027 createRuntimeFunctionCall(StaticFini, {SrcLoc, ThreadNum});
5031 InsertPointOrErrorTy BarrierIP =
5032 createBarrier(LocationDescription(Builder.saveIP(),
DL),
5033 omp::Directive::OMPD_for,
false,
5036 return BarrierIP.takeError();
5039 InsertPointTy AfterIP = CLI->getAfterIP();
5061 if (
Block == CLI->getCond() ||
Block == CLI->getHeader())
5063 Reachable.insert(
Block);
5073 Ctx, {
MDString::get(Ctx,
"llvm.loop.parallel_accesses"), AccessGroup}));
5076OpenMPIRBuilder::InsertPointOrErrorTy
5077OpenMPIRBuilder::applyStaticChunkedWorkshareLoop(
5078 DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
5081 assert(CLI->isValid() &&
"Requires a valid canonical loop");
5082 assert((ChunkSize || DistScheduleChunkSize) &&
"Chunk size is required");
5084 LLVMContext &Ctx = CLI->getFunction()->getContext();
5086 Value *OrigTripCount = CLI->getTripCount();
5087 Type *IVTy =
IV->getType();
5089 "Max supported tripcount bitwidth is 64 bits");
5091 :
Type::getInt64Ty(Ctx);
5094 Constant *One = ConstantInt::get(InternalIVTy, 1);
5104 if (ChunkSize || DistScheduleChunkSize)
5112 getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_for_static_fini);
5115 Builder.restoreIP(AllocaIP);
5116 Builder.SetCurrentDebugLocation(
DL);
5117 Value *PLastIter = Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
5118 Value *PLowerBound =
5119 Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.lowerbound");
5120 Value *PUpperBound =
5121 Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.upperbound");
5122 Value *PStride = Builder.CreateAlloca(InternalIVTy,
nullptr,
"p.stride");
5123 CLI->setLastIter(PLastIter);
5126 Builder.restoreIP(CLI->getPreheaderIP());
5127 Builder.SetCurrentDebugLocation(
DL);
5130 Value *CastedChunkSize = Builder.CreateZExtOrTrunc(
5131 ChunkSize ? ChunkSize : Zero, InternalIVTy,
"chunksize");
5132 Value *CastedDistScheduleChunkSize = Builder.CreateZExtOrTrunc(
5133 DistScheduleChunkSize ? DistScheduleChunkSize : Zero, InternalIVTy,
5134 "distschedulechunksize");
5135 Value *CastedTripCount =
5136 Builder.CreateZExt(OrigTripCount, InternalIVTy,
"tripcount");
5139 ConstantInt::get(I32Type,
static_cast<int>(SchedType));
5141 ConstantInt::get(I32Type,
static_cast<int>(DistScheduleSchedType));
5142 Builder.CreateStore(Zero, PLowerBound);
5143 Value *OrigUpperBound = Builder.CreateSub(CastedTripCount, One);
5144 Value *IsTripCountZero = Builder.CreateICmpEQ(CastedTripCount, Zero);
5146 Builder.CreateSelect(IsTripCountZero, Zero, OrigUpperBound);
5147 Builder.CreateStore(UpperBound, PUpperBound);
5148 Builder.CreateStore(One, PStride);
5153 Constant *SrcLocStr = getOrCreateSrcLocStr(
DL, SrcLocStrSize);
5154 Value *SrcLoc = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
5155 Value *ThreadNum = getOrCreateThreadID(SrcLoc);
5156 auto BuildInitCall = [StaticInit, SrcLoc, ThreadNum, PLastIter, PLowerBound,
5157 PUpperBound, PStride, One,
5158 this](
Value *SchedulingType,
Value *ChunkSize,
5160 createRuntimeFunctionCall(
5161 StaticInit, {SrcLoc, ThreadNum,
5162 SchedulingType, PLastIter,
5163 PLowerBound, PUpperBound,
5167 BuildInitCall(SchedulingType, CastedChunkSize, Builder);
5168 if (DistScheduleSchedType != OMPScheduleType::None &&
5169 SchedType != OMPScheduleType::OrderedDistributeChunked &&
5170 SchedType != OMPScheduleType::OrderedDistribute) {
5174 BuildInitCall(DistSchedulingType, CastedDistScheduleChunkSize, Builder);
5178 Value *FirstChunkStart =
5179 Builder.CreateLoad(InternalIVTy, PLowerBound,
"omp_firstchunk.lb");
5180 Value *FirstChunkStop =
5181 Builder.CreateLoad(InternalIVTy, PUpperBound,
"omp_firstchunk.ub");
5182 Value *FirstChunkEnd = Builder.CreateAdd(FirstChunkStop, One);
5184 Builder.CreateSub(FirstChunkEnd, FirstChunkStart,
"omp_chunk.range");
5185 Value *NextChunkStride =
5186 Builder.CreateLoad(InternalIVTy, PStride,
"omp_dispatch.stride");
5189 BasicBlock *DispatchEnter = splitBB(Builder,
true);
5190 Value *DispatchCounter;
5195 CanonicalLoopInfo *DispatchCLI =
cantFail(createCanonicalLoop(
5196 {Builder.saveIP(),
DL},
5197 [&](InsertPointTy BodyIP,
Value *Counter) {
5198 DispatchCounter = Counter;
5201 FirstChunkStart, CastedTripCount, NextChunkStride,
5207 BasicBlock *DispatchBody = DispatchCLI->getBody();
5208 BasicBlock *DispatchLatch = DispatchCLI->getLatch();
5209 BasicBlock *DispatchExit = DispatchCLI->getExit();
5210 BasicBlock *DispatchAfter = DispatchCLI->getAfter();
5211 DispatchCLI->invalidate();
5219 Builder.restoreIP(CLI->getPreheaderIP());
5220 Builder.SetCurrentDebugLocation(
DL);
5223 Builder.SetInsertPoint(CLI->getPreheader()->getTerminator());
5224 Value *ChunkEnd = Builder.CreateAdd(DispatchCounter, ChunkRange);
5225 Value *IsLastChunk =
5226 Builder.CreateICmpUGE(ChunkEnd, CastedTripCount,
"omp_chunk.is_last");
5227 Value *CountUntilOrigTripCount =
5228 Builder.CreateSub(CastedTripCount, DispatchCounter);
5229 Value *ChunkTripCount = Builder.CreateSelect(
5230 IsLastChunk, CountUntilOrigTripCount, ChunkRange,
"omp_chunk.tripcount");
5231 Value *BackcastedChunkTC =
5232 Builder.CreateTrunc(ChunkTripCount, IVTy,
"omp_chunk.tripcount.trunc");
5233 CLI->setTripCount(BackcastedChunkTC);
5238 Value *BackcastedDispatchCounter =
5239 Builder.CreateTrunc(DispatchCounter, IVTy,
"omp_dispatch.iv.trunc");
5241 Builder.restoreIP(CLI->getBodyIP());
5242 return Builder.CreateAdd(
IV, BackcastedDispatchCounter);
5247 createRuntimeFunctionCall(StaticFini, {SrcLoc, ThreadNum});
5251 InsertPointOrErrorTy AfterIP =
5252 createBarrier(LocationDescription(Builder.saveIP(),
DL), OMPD_for,
5255 return AfterIP.takeError();
5273 unsigned Bitwidth = Ty->getIntegerBitWidth();
5274 Module &M = OMPBuilder->M;
5276 case WorksharingLoopType::ForStaticLoop:
5278 return OMPBuilder->getOrCreateRuntimeFunction(
5279 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_loop_4u);
5281 return OMPBuilder->getOrCreateRuntimeFunction(
5282 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_loop_8u);
5284 case WorksharingLoopType::DistributeStaticLoop:
5286 return OMPBuilder->getOrCreateRuntimeFunction(
5287 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_static_loop_4u);
5289 return OMPBuilder->getOrCreateRuntimeFunction(
5290 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_static_loop_8u);
5292 case WorksharingLoopType::DistributeForStaticLoop:
5294 return OMPBuilder->getOrCreateRuntimeFunction(
5295 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_for_static_loop_4u);
5297 return OMPBuilder->getOrCreateRuntimeFunction(
5298 M, omp::RuntimeFunction::OMPRTL___kmpc_distribute_for_static_loop_8u);
5301 if (Bitwidth != 32 && Bitwidth != 64) {
5313 Function &LoopBodyFn,
bool NoLoop) {
5315 Module &M = OMPBuilder->M;
5324 if (LoopType == WorksharingLoopType::DistributeStaticLoop) {
5325 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
5326 RealArgs.
push_back(ConstantInt::get(Builder.getInt8Ty(), 0));
5327 Builder.restoreIP({InsertBlock, std::prev(InsertBlock->
end())});
5328 OMPBuilder->createRuntimeFunctionCall(RTLFn, RealArgs);
5331 FunctionCallee RTLNumThreads = OMPBuilder->getOrCreateRuntimeFunction(
5332 M, omp::RuntimeFunction::OMPRTL_omp_get_num_threads);
5333 Builder.restoreIP({InsertBlock, std::prev(InsertBlock->
end())});
5334 Value *NumThreads = OMPBuilder->createRuntimeFunctionCall(RTLNumThreads, {});
5337 Builder.CreateZExtOrTrunc(NumThreads, TripCountTy,
"num.threads.cast"));
5338 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
5339 if (LoopType == WorksharingLoopType::DistributeForStaticLoop) {
5340 RealArgs.
push_back(ConstantInt::get(TripCountTy, 0));
5341 RealArgs.
push_back(ConstantInt::get(Builder.getInt8Ty(), NoLoop));
5343 RealArgs.
push_back(ConstantInt::get(Builder.getInt8Ty(), 0));
5346 OMPBuilder->createRuntimeFunctionCall(RTLFn, RealArgs);
5350 OpenMPIRBuilder *OMPIRBuilder, CanonicalLoopInfo *CLI,
Value *Ident,
5355 Value *TripCount = CLI->getTripCount();
5361 Preheader->
splice(std::prev(Preheader->
end()), CLI->getBody(),
5362 CLI->getBody()->begin(), std::prev(CLI->getBody()->end()));
5367 Builder.restoreIP({Preheader, Preheader->
end()});
5370 Builder.CreateBr(CLI->getExit());
5373 OpenMPIRBuilder::OutlineInfo CleanUpInfo;
5376 CleanUpInfo.EntryBB = CLI->getHeader();
5377 CleanUpInfo.ExitBB = CLI->getExit();
5378 CleanUpInfo.collectBlocks(RegionBlockSet, BlocksToBeRemoved);
5386 "Expected unique undroppable user of outlined function");
5388 assert(OutlinedFnCallInstruction &&
"Expected outlined function call");
5390 "Expected outlined function call to be located in loop preheader");
5392 if (OutlinedFnCallInstruction->
arg_size() > 1)
5399 LoopBodyArg, TripCount, OutlinedFn, NoLoop);
5401 for (
auto &ToBeDeletedItem : ToBeDeleted)
5402 ToBeDeletedItem->eraseFromParent();
5406OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::applyWorkshareLoopTarget(
5407 DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
5410 Constant *SrcLocStr = getOrCreateSrcLocStr(
DL, SrcLocStrSize);
5411 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
5414 OI.OuterAllocaBB = CLI->getPreheader();
5420 OI.OuterAllocaBB = AllocaIP.getBlock();
5423 OI.EntryBB = CLI->getBody();
5424 OI.ExitBB = CLI->getLatch()->splitBasicBlock(CLI->getLatch()->begin(),
5425 "omp.prelatch",
true);
5428 Builder.restoreIP({CLI->getPreheader(), CLI->getPreheader()->begin()});
5432 AllocaInst *NewLoopCnt = Builder.CreateAlloca(CLI->getIndVarType(), 0,
"");
5434 Builder.CreateLoad(CLI->getIndVarType(), NewLoopCnt);
5445 OI.collectBlocks(ParallelRegionBlockSet, Blocks);
5456 CLI->getPreheader(),
5465 Extractor.findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
5471 CLI->getIndVar()->user_end());
5474 if (ParallelRegionBlockSet.
count(Inst->getParent())) {
5475 Inst->replaceUsesOfWith(CLI->getIndVar(), NewLoopCntLoad);
5481 OI.ExcludeArgsFromAggregate.push_back(NewLoopCntLoad);
5488 OI.PostOutlineCB = [=, ToBeDeletedVec =
5489 std::move(ToBeDeleted)](
Function &OutlinedFn) {
5493 addOutlineInfo(std::move(OI));
5494 return CLI->getAfterIP();
5497OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::applyWorkshareLoop(
5498 DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
5499 bool NeedsBarrier, omp::ScheduleKind SchedKind,
Value *ChunkSize,
5500 bool HasSimdModifier,
bool HasMonotonicModifier,
5501 bool HasNonmonotonicModifier,
bool HasOrderedClause,
5503 Value *DistScheduleChunkSize) {
5504 if (Config.isTargetDevice())
5505 return applyWorkshareLoopTarget(
DL, CLI, AllocaIP, LoopType, NoLoop);
5507 SchedKind, ChunkSize, HasSimdModifier, HasMonotonicModifier,
5508 HasNonmonotonicModifier, HasOrderedClause, DistScheduleChunkSize);
5510 bool IsOrdered = (EffectiveScheduleType & OMPScheduleType::ModifierOrdered) ==
5511 OMPScheduleType::ModifierOrdered;
5513 if (HasDistSchedule) {
5514 DistScheduleSchedType = DistScheduleChunkSize
5515 ? OMPScheduleType::OrderedDistributeChunked
5516 : OMPScheduleType::OrderedDistribute;
5518 switch (EffectiveScheduleType & ~OMPScheduleType::ModifierMask) {
5519 case OMPScheduleType::BaseStatic:
5520 case OMPScheduleType::BaseDistribute:
5521 assert((!ChunkSize || !DistScheduleChunkSize) &&
5522 "No chunk size with static-chunked schedule");
5523 if (IsOrdered && !HasDistSchedule)
5524 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
5525 NeedsBarrier, ChunkSize);
5527 if (DistScheduleChunkSize)
5528 return applyStaticChunkedWorkshareLoop(
5529 DL, CLI, AllocaIP, NeedsBarrier, ChunkSize, EffectiveScheduleType,
5530 DistScheduleChunkSize, DistScheduleSchedType);
5531 return applyStaticWorkshareLoop(
DL, CLI, AllocaIP, LoopType, NeedsBarrier,
5534 case OMPScheduleType::BaseStaticChunked:
5535 case OMPScheduleType::BaseDistributeChunked:
5536 if (IsOrdered && !HasDistSchedule)
5537 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
5538 NeedsBarrier, ChunkSize);
5540 return applyStaticChunkedWorkshareLoop(
5541 DL, CLI, AllocaIP, NeedsBarrier, ChunkSize, EffectiveScheduleType,
5542 DistScheduleChunkSize, DistScheduleSchedType);
5544 case OMPScheduleType::BaseRuntime:
5545 case OMPScheduleType::BaseAuto:
5546 case OMPScheduleType::BaseGreedy:
5547 case OMPScheduleType::BaseBalanced:
5548 case OMPScheduleType::BaseSteal:
5549 case OMPScheduleType::BaseGuidedSimd:
5550 case OMPScheduleType::BaseRuntimeSimd:
5552 "schedule type does not support user-defined chunk sizes");
5554 case OMPScheduleType::BaseDynamicChunked:
5555 case OMPScheduleType::BaseGuidedChunked:
5556 case OMPScheduleType::BaseGuidedIterativeChunked:
5557 case OMPScheduleType::BaseGuidedAnalyticalChunked:
5558 case OMPScheduleType::BaseStaticBalancedChunked:
5559 return applyDynamicWorkshareLoop(
DL, CLI, AllocaIP, EffectiveScheduleType,
5560 NeedsBarrier, ChunkSize);
5573 unsigned Bitwidth = Ty->getIntegerBitWidth();
5575 return OMPBuilder.getOrCreateRuntimeFunction(
5576 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_4u);
5578 return OMPBuilder.getOrCreateRuntimeFunction(
5579 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_8u);
5589 unsigned Bitwidth = Ty->getIntegerBitWidth();
5591 return OMPBuilder.getOrCreateRuntimeFunction(
5592 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_4u);
5594 return OMPBuilder.getOrCreateRuntimeFunction(
5595 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_8u);
5604 unsigned Bitwidth = Ty->getIntegerBitWidth();
5606 return OMPBuilder.getOrCreateRuntimeFunction(
5607 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_4u);
5609 return OMPBuilder.getOrCreateRuntimeFunction(
5610 M, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_fini_8u);
5614OpenMPIRBuilder::InsertPointOrErrorTy
5615OpenMPIRBuilder::applyDynamicWorkshareLoop(
DebugLoc DL, CanonicalLoopInfo *CLI,
5616 InsertPointTy AllocaIP,
5618 bool NeedsBarrier,
Value *Chunk) {
5619 assert(CLI->isValid() &&
"Requires a valid canonical loop");
5621 "Require dedicated allocate IP");
5623 "Require valid schedule type");
5625 bool Ordered = (SchedType & OMPScheduleType::ModifierOrdered) ==
5626 OMPScheduleType::ModifierOrdered;
5629 Builder.SetCurrentDebugLocation(
DL);
5632 Constant *SrcLocStr = getOrCreateSrcLocStr(
DL, SrcLocStrSize);
5633 Value *SrcLoc = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
5637 Type *IVTy =
IV->getType();
5642 Builder.SetInsertPoint(AllocaIP.getBlock()->getFirstNonPHIOrDbgOrAlloca());
5644 Value *PLastIter = Builder.CreateAlloca(I32Type,
nullptr,
"p.lastiter");
5645 Value *PLowerBound = Builder.CreateAlloca(IVTy,
nullptr,
"p.lowerbound");
5646 Value *PUpperBound = Builder.CreateAlloca(IVTy,
nullptr,
"p.upperbound");
5647 Value *PStride = Builder.CreateAlloca(IVTy,
nullptr,
"p.stride");
5648 CLI->setLastIter(PLastIter);
5656 Constant *One = ConstantInt::get(IVTy, 1);
5657 Builder.CreateStore(One, PLowerBound);
5658 Value *UpperBound = CLI->getTripCount();
5659 Builder.CreateStore(UpperBound, PUpperBound);
5660 Builder.CreateStore(One, PStride);
5666 InsertPointTy AfterIP = CLI->getAfterIP();
5674 Value *ThreadNum = getOrCreateThreadID(SrcLoc);
5677 ConstantInt::get(I32Type,
static_cast<int>(SchedType));
5680 createRuntimeFunctionCall(DynamicInit, {SrcLoc, ThreadNum, SchedulingType,
5689 Builder.SetInsertPoint(OuterCond, OuterCond->getFirstInsertionPt());
5690 Value *Res = createRuntimeFunctionCall(
5692 {SrcLoc, ThreadNum, PLastIter, PLowerBound, PUpperBound, PStride});
5693 Constant *Zero32 = ConstantInt::get(I32Type, 0);
5696 Builder.CreateSub(Builder.CreateLoad(IVTy, PLowerBound), One,
"lb");
5697 Builder.CreateCondBr(MoreWork, Header, Exit);
5703 PI->setIncomingBlock(0, OuterCond);
5704 PI->setIncomingValue(0, LowerBound);
5709 Br->setSuccessor(0, OuterCond);
5714 Builder.SetInsertPoint(
Cond,
Cond->getFirstInsertionPt());
5715 UpperBound = Builder.CreateLoad(IVTy, PUpperBound,
"ub");
5718 CI->setOperand(1, UpperBound);
5722 assert(BI->getSuccessor(1) == Exit);
5723 BI->setSuccessor(1, OuterCond);
5727 Builder.SetInsertPoint(&Latch->
back());
5729 createRuntimeFunctionCall(DynamicFini, {SrcLoc, ThreadNum});
5734 Builder.SetInsertPoint(&
Exit->back());
5735 InsertPointOrErrorTy BarrierIP =
5736 createBarrier(LocationDescription(Builder.saveIP(),
DL),
5737 omp::Directive::OMPD_for,
false,
5740 return BarrierIP.takeError();
5759 auto HasRemainingUses = [&BBsToErase](
BasicBlock *BB) {
5764 if (BBsToErase.
count(UseInst->getParent()))
5771 while (BBsToErase.
remove_if(HasRemainingUses)) {
5781 InsertPointTy ComputeIP) {
5782 assert(
Loops.size() >= 1 &&
"At least one loop required");
5783 size_t NumLoops =
Loops.size();
5787 return Loops.front();
5789 CanonicalLoopInfo *Outermost =
Loops.front();
5790 CanonicalLoopInfo *Innermost =
Loops.back();
5791 BasicBlock *OrigPreheader = Outermost->getPreheader();
5792 BasicBlock *OrigAfter = Outermost->getAfter();
5799 Loop->collectControlBlocks(OldControlBBs);
5802 Builder.SetCurrentDebugLocation(
DL);
5803 if (ComputeIP.isSet())
5804 Builder.restoreIP(ComputeIP);
5806 Builder.restoreIP(Outermost->getPreheaderIP());
5810 Value *CollapsedTripCount =
nullptr;
5811 for (CanonicalLoopInfo *L :
Loops) {
5813 "All loops to collapse must be valid canonical loops");
5814 Value *OrigTripCount =
L->getTripCount();
5815 if (!CollapsedTripCount) {
5816 CollapsedTripCount = OrigTripCount;
5821 CollapsedTripCount =
5822 Builder.CreateNUWMul(CollapsedTripCount, OrigTripCount);
5826 CanonicalLoopInfo *
Result =
5827 createLoopSkeleton(
DL, CollapsedTripCount,
F,
5828 OrigPreheader->
getNextNode(), OrigAfter,
"collapsed");
5834 Builder.restoreIP(
Result->getBodyIP());
5838 NewIndVars.
resize(NumLoops);
5839 for (
int i = NumLoops - 1; i >= 1; --i) {
5840 Value *OrigTripCount =
Loops[i]->getTripCount();
5842 Value *NewIndVar = Builder.CreateURem(Leftover, OrigTripCount);
5843 NewIndVars[i] = NewIndVar;
5845 Leftover = Builder.CreateUDiv(Leftover, OrigTripCount);
5848 NewIndVars[0] = Leftover;
5859 auto ContinueWith = [&ContinueBlock, &ContinuePred,
DL](
BasicBlock *Dest,
5866 ContinueBlock =
nullptr;
5867 ContinuePred = NextSrc;
5874 for (
size_t i = 0; i < NumLoops - 1; ++i)
5875 ContinueWith(
Loops[i]->getBody(),
Loops[i + 1]->getHeader());
5878 ContinueWith(Innermost->getBody(), Innermost->getLatch());
5881 for (
size_t i = NumLoops - 1; i > 0; --i)
5882 ContinueWith(
Loops[i]->getAfter(),
Loops[i - 1]->getLatch());
5885 ContinueWith(
Result->getLatch(),
nullptr);
5892 for (
size_t i = 0; i < NumLoops; ++i)
5893 Loops[i]->getIndVar()->replaceAllUsesWith(NewIndVars[i]);
5898 for (CanonicalLoopInfo *L :
Loops)
5907std::vector<CanonicalLoopInfo *>
5911 "Must pass as many tile sizes as there are loops");
5912 int NumLoops =
Loops.size();
5913 assert(NumLoops >= 1 &&
"At least one loop to tile required");
5915 CanonicalLoopInfo *OutermostLoop =
Loops.front();
5916 CanonicalLoopInfo *InnermostLoop =
Loops.back();
5917 Function *
F = OutermostLoop->getBody()->getParent();
5918 BasicBlock *InnerEnter = InnermostLoop->getBody();
5919 BasicBlock *InnerLatch = InnermostLoop->getLatch();
5925 Loop->collectControlBlocks(OldControlBBs);
5932 for (CanonicalLoopInfo *L :
Loops) {
5933 assert(
L->isValid() &&
"All input loops must be valid canonical loops");
5945 for (
int i = 0; i < NumLoops - 1; ++i) {
5946 CanonicalLoopInfo *Surrounding =
Loops[i];
5949 BasicBlock *EnterBB = Surrounding->getBody();
5955 Builder.SetCurrentDebugLocation(
DL);
5956 Builder.restoreIP(OutermostLoop->getPreheaderIP());
5958 for (
int i = 0; i < NumLoops; ++i) {
5960 Value *OrigTripCount = OrigTripCounts[i];
5963 Value *FloorCompleteTripCount = Builder.CreateUDiv(OrigTripCount,
TileSize);
5964 Value *FloorTripRem = Builder.CreateURem(OrigTripCount,
TileSize);
5973 Value *FloorTripOverflow =
5974 Builder.CreateICmpNE(FloorTripRem, ConstantInt::get(IVType, 0));
5976 FloorTripOverflow = Builder.CreateZExt(FloorTripOverflow, IVType);
5977 Value *FloorTripCount =
5978 Builder.CreateAdd(FloorCompleteTripCount, FloorTripOverflow,
5979 "omp_floor" +
Twine(i) +
".tripcount",
true);
5982 FloorCompleteCount.
push_back(FloorCompleteTripCount);
5988 std::vector<CanonicalLoopInfo *>
Result;
5989 Result.reserve(NumLoops * 2);
5993 BasicBlock *Enter = OutermostLoop->getPreheader();
6000 BasicBlock *OutroInsertBefore = InnermostLoop->getExit();
6002 auto EmbeddNewLoop =
6003 [
this,
DL,
F, InnerEnter, &Enter, &
Continue, &OutroInsertBefore](
6005 CanonicalLoopInfo *EmbeddedLoop = createLoopSkeleton(
6006 DL, TripCount,
F, InnerEnter, OutroInsertBefore, Name);
6011 Enter = EmbeddedLoop->getBody();
6012 Continue = EmbeddedLoop->getLatch();
6013 OutroInsertBefore = EmbeddedLoop->getLatch();
6014 return EmbeddedLoop;
6018 const Twine &NameBase) {
6020 CanonicalLoopInfo *EmbeddedLoop =
6021 EmbeddNewLoop(
P.value(), NameBase +
Twine(
P.index()));
6022 Result.push_back(EmbeddedLoop);
6026 EmbeddNewLoops(FloorCount,
"floor");
6030 Builder.SetInsertPoint(Enter->getTerminator());
6032 for (
int i = 0; i < NumLoops; ++i) {
6033 CanonicalLoopInfo *FloorLoop =
Result[i];
6036 Value *FloorIsEpilogue =
6037 Builder.CreateICmpEQ(FloorLoop->getIndVar(), FloorCompleteCount[i]);
6038 Value *TileTripCount =
6039 Builder.CreateSelect(FloorIsEpilogue, FloorRems[i],
TileSize);
6045 EmbeddNewLoops(TileCounts,
"tile");
6050 for (std::pair<BasicBlock *, BasicBlock *>
P : InbetweenCode) {
6059 BodyEnter =
nullptr;
6060 BodyEntered = ExitBB;
6072 Builder.restoreIP(
Result.back()->getBodyIP());
6073 for (
int i = 0; i < NumLoops; ++i) {
6074 CanonicalLoopInfo *FloorLoop =
Result[i];
6075 CanonicalLoopInfo *TileLoop =
Result[NumLoops + i];
6076 Value *OrigIndVar = OrigIndVars[i];
6080 Builder.CreateMul(
Size, FloorLoop->getIndVar(), {},
true);
6082 Builder.CreateAdd(Scale, TileLoop->getIndVar(), {},
true);
6089 for (CanonicalLoopInfo *L :
Loops)
6093 for (CanonicalLoopInfo *GenL : Result)
6104 if (Properties.
empty())
6127 assert(
Loop->isValid() &&
"Expecting a valid CanonicalLoopInfo");
6131 assert(Latch &&
"A valid CanonicalLoopInfo must have a unique latch");
6139 if (
I.mayReadOrWriteMemory()) {
6143 I.setMetadata(LLVMContext::MD_access_group, AccessGroup);
6148void OpenMPIRBuilder::unrollLoopFull(
DebugLoc, CanonicalLoopInfo *
Loop) {
6155void OpenMPIRBuilder::unrollLoopHeuristic(
DebugLoc, CanonicalLoopInfo *
Loop) {
6163void OpenMPIRBuilder::createIfVersion(CanonicalLoopInfo *CanonicalLoop,
6166 const Twine &NamePrefix) {
6167 Function *
F = CanonicalLoop->getFunction();
6189 auto SplitBeforeIt = CanonicalLoop->getBody()->getFirstNonPHIIt();
6195 C, NamePrefix +
".if.then",
Cond->getParent(),
Cond->getNextNode());
6197 C, NamePrefix +
".if.else",
Cond->getParent(), CanonicalLoop->getExit());
6200 Builder.SetInsertPoint(SplitBeforeIt);
6202 Builder.CreateCondBr(IfCond, ThenBlock, ElseBlock);
6205 spliceBB(IP, ThenBlock,
false, Builder.getCurrentDebugLocation());
6208 Builder.SetInsertPoint(ElseBlock);
6214 ExistingBlocks.
reserve(
L->getNumBlocks() + 1);
6216 ExistingBlocks.
append(
L->block_begin(),
L->block_end());
6222 assert(LoopCond && LoopHeader &&
"Invalid loop structure");
6224 if (
Block ==
L->getLoopPreheader() ||
Block ==
L->getLoopLatch() ||
6231 if (
Block == ThenBlock)
6232 NewBB->
setName(NamePrefix +
".if.else");
6235 VMap[
Block] = NewBB;
6239 Builder.CreateBr(NewBlocks.
front());
6243 L->getLoopLatch()->splitBasicBlock(
6244 L->getLoopLatch()->begin(), NamePrefix +
".pre_latch",
true);
6248 L->addBasicBlockToLoop(ThenBlock, LI);
6252OpenMPIRBuilder::getOpenMPDefaultSimdAlign(
const Triple &TargetTriple,
6254 if (TargetTriple.
isX86()) {
6255 if (Features.
lookup(
"avx512f"))
6257 else if (Features.
lookup(
"avx"))
6261 if (TargetTriple.
isPPC())
6263 if (TargetTriple.
isWasm())
6268void OpenMPIRBuilder::applySimd(CanonicalLoopInfo *CanonicalLoop,
6270 Value *IfCond, OrderKind Order,
6274 Function *
F = CanonicalLoop->getFunction();
6289 if (AlignedVars.
size()) {
6290 InsertPointTy IP = Builder.saveIP();
6291 for (
auto &AlignedItem : AlignedVars) {
6292 Value *AlignedPtr = AlignedItem.first;
6293 Value *Alignment = AlignedItem.second;
6296 Builder.CreateAlignmentAssumption(
F->getDataLayout(), AlignedPtr,
6299 Builder.restoreIP(IP);
6304 createIfVersion(CanonicalLoop, IfCond, VMap, LIA, LI, L,
"simd");
6314 if (
Block == CanonicalLoop->getCond() ||
6315 Block == CanonicalLoop->getHeader())
6317 Reachable.insert(
Block);
6327 if ((Safelen ==
nullptr) || (Order == OrderKind::OMP_ORDER_concurrent))
6343 Ctx, {
MDString::get(Ctx,
"llvm.loop.vectorize.enable"), BoolConst}));
6345 if (Simdlen || Safelen) {
6349 ConstantInt *VectorizeWidth = Simdlen ==
nullptr ? Safelen : Simdlen;
6375static std::unique_ptr<TargetMachine>
6379 StringRef CPU =
F->getFnAttribute(
"target-cpu").getValueAsString();
6380 StringRef Features =
F->getFnAttribute(
"target-features").getValueAsString();
6391 std::nullopt, OptLevel));
6415 [&](
const Function &
F) {
return TM->getTargetTransformInfo(
F); });
6416 FAM.registerPass([&]() {
return TIRA; });
6430 assert(L &&
"Expecting CanonicalLoopInfo to be recognized as a loop");
6435 nullptr, ORE,
static_cast<int>(OptLevel),
6456 <<
" Threshold=" << UP.
Threshold <<
"\n"
6459 <<
" PartialOptSizeThreshold="
6479 Ptr = Load->getPointerOperand();
6481 Ptr = Store->getPointerOperand();
6488 if (Alloca->getParent() == &
F->getEntryBlock())
6508 int MaxTripCount = 0;
6509 bool MaxOrZero =
false;
6510 unsigned TripMultiple = 0;
6512 bool UseUpperBound =
false;
6514 MaxTripCount, MaxOrZero, TripMultiple, UCE, UP, PP,
6516 unsigned Factor = UP.
Count;
6517 LLVM_DEBUG(
dbgs() <<
"Suggesting unroll factor of " << Factor <<
"\n");
6525void OpenMPIRBuilder::unrollLoopPartial(
DebugLoc DL, CanonicalLoopInfo *
Loop,
6527 CanonicalLoopInfo **UnrolledCLI) {
6528 assert(Factor >= 0 &&
"Unroll factor must not be negative");
6544 Ctx, {
MDString::get(Ctx,
"llvm.loop.unroll.count"), FactorConst}));
6557 *UnrolledCLI =
Loop;
6562 "unrolling only makes sense with a factor of 2 or larger");
6564 Type *IndVarTy =
Loop->getIndVarType();
6571 std::vector<CanonicalLoopInfo *>
LoopNest =
6572 tileLoops(
DL, {
Loop}, {FactorVal});
6575 CanonicalLoopInfo *InnerLoop =
LoopNest[1];
6586 Ctx, {
MDString::get(Ctx,
"llvm.loop.unroll.count"), FactorConst})});
6589 (*UnrolledCLI)->assertOK();
6593OpenMPIRBuilder::InsertPointTy
6594OpenMPIRBuilder::createCopyPrivate(
const LocationDescription &
Loc,
6597 if (!updateToLocation(
Loc))
6601 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6602 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6603 Value *ThreadId = getOrCreateThreadID(Ident);
6605 llvm::Value *DidItLD = Builder.CreateLoad(Builder.getInt32Ty(), DidIt);
6607 Value *
Args[] = {Ident, ThreadId, BufSize, CpyBuf, CpyFn, DidItLD};
6609 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_copyprivate);
6610 createRuntimeFunctionCall(Fn, Args);
6612 return Builder.saveIP();
6615OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createSingle(
6616 const LocationDescription &
Loc, BodyGenCallbackTy BodyGenCB,
6620 if (!updateToLocation(
Loc))
6626 if (!CPVars.
empty()) {
6628 Builder.CreateStore(Builder.getInt32(0), DidIt);
6631 Directive OMPD = Directive::OMPD_single;
6633 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6634 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6635 Value *ThreadId = getOrCreateThreadID(Ident);
6638 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_single);
6639 Instruction *EntryCall = createRuntimeFunctionCall(EntryRTLFn, Args);
6641 Function *ExitRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_single);
6642 Instruction *ExitCall = createRuntimeFunctionCall(ExitRTLFn, Args);
6644 auto FiniCBWrapper = [&](InsertPointTy IP) ->
Error {
6645 if (
Error Err = FiniCB(IP))
6652 Builder.CreateStore(Builder.getInt32(1), DidIt);
6665 InsertPointOrErrorTy AfterIP =
6666 EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCBWrapper,
6670 return AfterIP.takeError();
6673 for (
size_t I = 0,
E = CPVars.
size();
I <
E; ++
I)
6675 createCopyPrivate(LocationDescription(Builder.saveIP(),
Loc.DL),
6676 ConstantInt::get(Int64, 0), CPVars[
I],
6679 }
else if (!IsNowait) {
6680 InsertPointOrErrorTy AfterIP =
6681 createBarrier(LocationDescription(Builder.saveIP(),
Loc.DL),
6682 omp::Directive::OMPD_unknown,
false,
6685 return AfterIP.takeError();
6687 return Builder.saveIP();
6690OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createCritical(
6691 const LocationDescription &
Loc, BodyGenCallbackTy BodyGenCB,
6692 FinalizeCallbackTy FiniCB,
StringRef CriticalName,
Value *HintInst) {
6694 if (!updateToLocation(
Loc))
6697 Directive OMPD = Directive::OMPD_critical;
6699 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6700 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6701 Value *ThreadId = getOrCreateThreadID(Ident);
6702 Value *LockVar = getOMPCriticalRegionLock(CriticalName);
6703 Value *
Args[] = {Ident, ThreadId, LockVar};
6709 EnterArgs.push_back(HintInst);
6710 RTFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_critical_with_hint);
6712 RTFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_critical);
6714 Instruction *EntryCall = createRuntimeFunctionCall(RTFn, EnterArgs);
6717 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_critical);
6718 Instruction *ExitCall = createRuntimeFunctionCall(ExitRTLFn, Args);
6720 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
6724OpenMPIRBuilder::InsertPointTy
6725OpenMPIRBuilder::createOrderedDepend(
const LocationDescription &
Loc,
6726 InsertPointTy AllocaIP,
unsigned NumLoops,
6728 const Twine &Name,
bool IsDependSource) {
6732 "OpenMP runtime requires depend vec with i64 type");
6734 if (!updateToLocation(
Loc))
6739 Builder.restoreIP(AllocaIP);
6740 AllocaInst *ArgsBase = Builder.CreateAlloca(ArrI64Ty,
nullptr, Name);
6742 updateToLocation(
Loc);
6745 for (
unsigned I = 0;
I < NumLoops; ++
I) {
6746 Value *DependAddrGEPIter = Builder.CreateInBoundsGEP(
6747 ArrI64Ty, ArgsBase, {Builder.getInt64(0), Builder.getInt64(
I)});
6748 StoreInst *STInst = Builder.CreateStore(StoreValues[
I], DependAddrGEPIter);
6752 Value *DependBaseAddrGEP = Builder.CreateInBoundsGEP(
6753 ArrI64Ty, ArgsBase, {Builder.getInt64(0), Builder.getInt64(0)});
6756 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6757 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6758 Value *ThreadId = getOrCreateThreadID(Ident);
6759 Value *
Args[] = {Ident, ThreadId, DependBaseAddrGEP};
6763 RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_doacross_post);
6765 RTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_doacross_wait);
6766 createRuntimeFunctionCall(RTLFn, Args);
6768 return Builder.saveIP();
6771OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createOrderedThreadsSimd(
6772 const LocationDescription &
Loc, BodyGenCallbackTy BodyGenCB,
6773 FinalizeCallbackTy FiniCB,
bool IsThreads) {
6774 if (!updateToLocation(
Loc))
6777 Directive OMPD = Directive::OMPD_ordered;
6783 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6784 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6785 Value *ThreadId = getOrCreateThreadID(Ident);
6788 Function *EntryRTLFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_ordered);
6789 EntryCall = createRuntimeFunctionCall(EntryRTLFn, Args);
6792 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_ordered);
6793 ExitCall = createRuntimeFunctionCall(ExitRTLFn, Args);
6796 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
6800OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::EmitOMPInlinedRegion(
6802 BodyGenCallbackTy BodyGenCB, FinalizeCallbackTy FiniCB,
bool Conditional,
6803 bool HasFinalize,
bool IsCancellable) {
6806 FinalizationStack.push_back({FiniCB, OMPD, IsCancellable});
6810 BasicBlock *EntryBB = Builder.GetInsertBlock();
6819 emitCommonDirectiveEntry(OMPD, EntryCall, ExitBB, Conditional);
6822 if (
Error Err = BodyGenCB( InsertPointTy(),
6830 "Unexpected control flow graph state!!");
6831 InsertPointOrErrorTy AfterIP =
6832 emitCommonDirectiveExit(OMPD, FinIP, ExitCall, HasFinalize);
6834 return AfterIP.takeError();
6839 "Unexpected Insertion point location!");
6842 auto InsertBB = merged ? ExitPredBB : ExitBB;
6845 Builder.SetInsertPoint(InsertBB);
6847 return Builder.saveIP();
6850OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::emitCommonDirectiveEntry(
6853 if (!Conditional || !EntryCall)
6854 return Builder.saveIP();
6856 BasicBlock *EntryBB = Builder.GetInsertBlock();
6857 Value *CallBool = Builder.CreateIsNotNull(EntryCall);
6869 Builder.CreateCondBr(CallBool, ThenBB, ExitBB);
6871 Builder.SetInsertPoint(UI);
6872 Builder.Insert(EntryBBTI);
6873 UI->eraseFromParent();
6880OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitCommonDirectiveExit(
6881 omp::Directive OMPD, InsertPointTy FinIP,
Instruction *ExitCall,
6884 Builder.restoreIP(FinIP);
6888 assert(!FinalizationStack.empty() &&
6889 "Unexpected finalization stack state!");
6891 FinalizationInfo Fi = FinalizationStack.pop_back_val();
6892 assert(Fi.DK == OMPD &&
"Unexpected Directive for Finalization call!");
6894 if (
Error Err = Fi.mergeFiniBB(Builder, FinIP.getBlock()))
6895 return std::move(Err);
6899 Builder.SetInsertPoint(FinIP.getBlock()->getTerminator());
6903 return Builder.saveIP();
6907 Builder.Insert(ExitCall);
6913OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createCopyinClauseBlocks(
6914 InsertPointTy IP,
Value *MasterAddr,
Value *PrivateAddr,
6943 "copyin.not.master.end");
6950 Builder.SetInsertPoint(OMP_Entry);
6951 Value *MasterPtr = Builder.CreatePtrToInt(MasterAddr, IntPtrTy);
6952 Value *PrivatePtr = Builder.CreatePtrToInt(PrivateAddr, IntPtrTy);
6953 Value *cmp = Builder.CreateICmpNE(MasterPtr, PrivatePtr);
6954 Builder.CreateCondBr(cmp, CopyBegin, CopyEnd);
6956 Builder.SetInsertPoint(CopyBegin);
6958 Builder.SetInsertPoint(Builder.CreateBr(CopyEnd));
6960 return Builder.saveIP();
6963CallInst *OpenMPIRBuilder::createOMPAlloc(
const LocationDescription &
Loc,
6967 updateToLocation(
Loc);
6970 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6971 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6972 Value *ThreadId = getOrCreateThreadID(Ident);
6975 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_alloc);
6977 return createRuntimeFunctionCall(Fn, Args, Name);
6980CallInst *OpenMPIRBuilder::createOMPFree(
const LocationDescription &
Loc,
6984 updateToLocation(
Loc);
6987 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
6988 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
6989 Value *ThreadId = getOrCreateThreadID(Ident);
6991 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_free);
6992 return createRuntimeFunctionCall(Fn, Args, Name);
6995CallInst *OpenMPIRBuilder::createOMPInteropInit(
6996 const LocationDescription &
Loc,
Value *InteropVar,
6998 Value *DependenceAddress,
bool HaveNowaitClause) {
7000 updateToLocation(
Loc);
7003 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
7004 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
7005 Value *ThreadId = getOrCreateThreadID(Ident);
7006 if (Device ==
nullptr)
7008 Constant *InteropTypeVal = ConstantInt::get(Int32, (
int)InteropType);
7009 if (NumDependences ==
nullptr) {
7010 NumDependences = ConstantInt::get(Int32, 0);
7014 Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause);
7016 Ident, ThreadId, InteropVar, InteropTypeVal,
7017 Device, NumDependences, DependenceAddress, HaveNowaitClauseVal};
7019 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___tgt_interop_init);
7021 return createRuntimeFunctionCall(Fn, Args);
7024CallInst *OpenMPIRBuilder::createOMPInteropDestroy(
7025 const LocationDescription &
Loc,
Value *InteropVar,
Value *Device,
7026 Value *NumDependences,
Value *DependenceAddress,
bool HaveNowaitClause) {
7028 updateToLocation(
Loc);
7031 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
7032 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
7033 Value *ThreadId = getOrCreateThreadID(Ident);
7034 if (Device ==
nullptr)
7036 if (NumDependences ==
nullptr) {
7037 NumDependences = ConstantInt::get(Int32, 0);
7041 Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause);
7043 Ident, ThreadId, InteropVar,
Device,
7044 NumDependences, DependenceAddress, HaveNowaitClauseVal};
7046 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___tgt_interop_destroy);
7048 return createRuntimeFunctionCall(Fn, Args);
7051CallInst *OpenMPIRBuilder::createOMPInteropUse(
const LocationDescription &
Loc,
7053 Value *NumDependences,
7054 Value *DependenceAddress,
7055 bool HaveNowaitClause) {
7057 updateToLocation(
Loc);
7059 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
7060 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
7061 Value *ThreadId = getOrCreateThreadID(Ident);
7062 if (Device ==
nullptr)
7064 if (NumDependences ==
nullptr) {
7065 NumDependences = ConstantInt::get(Int32, 0);
7069 Value *HaveNowaitClauseVal = ConstantInt::get(Int32, HaveNowaitClause);
7071 Ident, ThreadId, InteropVar,
Device,
7072 NumDependences, DependenceAddress, HaveNowaitClauseVal};
7074 Function *Fn = getOrCreateRuntimeFunctionPtr(OMPRTL___tgt_interop_use);
7076 return createRuntimeFunctionCall(Fn, Args);
7079CallInst *OpenMPIRBuilder::createCachedThreadPrivate(
7083 updateToLocation(
Loc);
7086 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
7087 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
7088 Value *ThreadId = getOrCreateThreadID(Ident);
7090 getOrCreateInternalVariable(Int8PtrPtr,
Name.str());
7094 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_threadprivate_cached);
7096 return createRuntimeFunctionCall(Fn, Args);
7099OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createTargetInit(
7100 const LocationDescription &
Loc,
7101 const llvm::OpenMPIRBuilder::TargetKernelDefaultAttrs &Attrs) {
7103 "expected num_threads and num_teams to be specified");
7105 if (!updateToLocation(
Loc))
7109 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
7110 Constant *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
7122 const std::string DebugPrefix =
"_debug__";
7123 if (KernelName.
ends_with(DebugPrefix)) {
7124 KernelName = KernelName.
drop_back(DebugPrefix.length());
7125 Kernel = M.getFunction(KernelName);
7131 if (
Attrs.MinTeams > 1 ||
Attrs.MaxTeams.front() > 0)
7136 int32_t MaxThreadsVal =
Attrs.MaxThreads.front();
7137 if (MaxThreadsVal < 0)
7138 MaxThreadsVal = std::max(
7141 if (MaxThreadsVal > 0)
7142 writeThreadBoundsForKernel(
T, *
Kernel,
Attrs.MinThreads, MaxThreadsVal);
7153 Function *Fn = getOrCreateRuntimeFunctionPtr(
7154 omp::RuntimeFunction::OMPRTL___kmpc_target_init);
7157 Twine DynamicEnvironmentName = KernelName +
"_dynamic_environment";
7158 Constant *DynamicEnvironmentInitializer =
7162 DynamicEnvironmentInitializer, DynamicEnvironmentName,
7164 DL.getDefaultGlobalsAddressSpace());
7168 DynamicEnvironmentGV->
getType() == DynamicEnvironmentPtr
7169 ? DynamicEnvironmentGV
7171 DynamicEnvironmentPtr);
7174 ConfigurationEnvironment, {
7175 UseGenericStateMachineVal,
7176 MayUseNestedParallelismVal,
7183 ReductionBufferLength,
7186 KernelEnvironment, {
7187 ConfigurationEnvironmentInitializer,
7191 std::string KernelEnvironmentName =
7192 (KernelName +
"_kernel_environment").str();
7195 KernelEnvironmentInitializer, KernelEnvironmentName,
7197 DL.getDefaultGlobalsAddressSpace());
7201 KernelEnvironmentGV->
getType() == KernelEnvironmentPtr
7202 ? KernelEnvironmentGV
7204 KernelEnvironmentPtr);
7205 Value *KernelLaunchEnvironment = DebugKernelWrapper->
getArg(0);
7207 KernelLaunchEnvironment =
7208 KernelLaunchEnvironment->
getType() == KernelLaunchEnvParamTy
7209 ? KernelLaunchEnvironment
7210 : Builder.CreateAddrSpaceCast(KernelLaunchEnvironment,
7211 KernelLaunchEnvParamTy);
7212 CallInst *ThreadKind = createRuntimeFunctionCall(
7213 Fn, {KernelEnvironment, KernelLaunchEnvironment});
7215 Value *ExecUserCode = Builder.CreateICmpEQ(
7225 auto *UI = Builder.CreateUnreachable();
7231 Builder.SetInsertPoint(WorkerExitBB);
7232 Builder.CreateRetVoid();
7235 Builder.SetInsertPoint(CheckBBTI);
7236 Builder.CreateCondBr(ExecUserCode, UI->getParent(), WorkerExitBB);
7239 UI->eraseFromParent();
7246void OpenMPIRBuilder::createTargetDeinit(
const LocationDescription &
Loc,
7247 int32_t TeamsReductionDataSize,
7248 int32_t TeamsReductionBufferLength) {
7249 if (!updateToLocation(
Loc))
7252 Function *Fn = getOrCreateRuntimeFunctionPtr(
7253 omp::RuntimeFunction::OMPRTL___kmpc_target_deinit);
7255 createRuntimeFunctionCall(Fn, {});
7257 if (!TeamsReductionBufferLength || !TeamsReductionDataSize)
7263 const std::string DebugPrefix =
"_debug__";
7265 KernelName = KernelName.
drop_back(DebugPrefix.length());
7266 auto *KernelEnvironmentGV =
7267 M.getNamedGlobal((KernelName +
"_kernel_environment").str());
7268 assert(KernelEnvironmentGV &&
"Expected kernel environment global\n");
7269 auto *KernelEnvironmentInitializer = KernelEnvironmentGV->
getInitializer();
7271 KernelEnvironmentInitializer,
7272 ConstantInt::get(Int32, TeamsReductionDataSize), {0, 7});
7274 NewInitializer, ConstantInt::get(Int32, TeamsReductionBufferLength),
7281 if (
Kernel.hasFnAttribute(Name)) {
7282 int32_t OldLimit =
Kernel.getFnAttributeAsParsedInteger(Name);
7288std::pair<int32_t, int32_t>
7290 int32_t ThreadLimit =
7291 Kernel.getFnAttributeAsParsedInteger(
"omp_target_thread_limit");
7294 const auto &Attr =
Kernel.getFnAttribute(
"amdgpu-flat-work-group-size");
7295 if (!Attr.isValid() || !Attr.isStringAttribute())
7296 return {0, ThreadLimit};
7297 auto [LBStr, UBStr] = Attr.getValueAsString().split(
',');
7300 return {0, ThreadLimit};
7301 UB = ThreadLimit ? std::min(ThreadLimit, UB) : UB;
7307 if (
Kernel.hasFnAttribute(
"nvvm.maxntid")) {
7308 int32_t UB =
Kernel.getFnAttributeAsParsedInteger(
"nvvm.maxntid");
7309 return {0, ThreadLimit ? std::min(ThreadLimit, UB) : UB};
7311 return {0, ThreadLimit};
7314void OpenMPIRBuilder::writeThreadBoundsForKernel(
const Triple &
T,
7317 Kernel.addFnAttr(
"omp_target_thread_limit", std::to_string(UB));
7320 Kernel.addFnAttr(
"amdgpu-flat-work-group-size",
7328std::pair<int32_t, int32_t>
7331 return {0,
Kernel.getFnAttributeAsParsedInteger(
"omp_target_num_teams")};
7335 int32_t LB, int32_t UB) {
7342 Kernel.addFnAttr(
"omp_target_num_teams", std::to_string(LB));
7345void OpenMPIRBuilder::setOutlinedTargetRegionFunctionAttributes(
7347 if (Config.isTargetDevice()) {
7354 else if (
T.isNVPTX())
7356 else if (
T.isSPIRV())
7363 if (Config.isTargetDevice()) {
7364 assert(OutlinedFn &&
"The outlined function must exist if embedded");
7373Constant *OpenMPIRBuilder::createTargetRegionEntryAddr(
Function *OutlinedFn,
7378 assert(!M.getGlobalVariable(EntryFnName,
true) &&
7379 "Named kernel already exists?");
7385Error OpenMPIRBuilder::emitTargetRegionFunction(
7386 TargetRegionEntryInfo &EntryInfo,
7387 FunctionGenCallback &GenerateFunctionCallback,
bool IsOffloadEntry,
7391 OffloadInfoManager.getTargetRegionEntryFnName(EntryFnName, EntryInfo);
7393 if (Config.isTargetDevice() || !Config.openMPOffloadMandatory()) {
7397 OutlinedFn = *CBResult;
7399 OutlinedFn =
nullptr;
7405 if (!IsOffloadEntry)
7408 std::string EntryFnIDName =
7409 Config.isTargetDevice()
7410 ? std::string(EntryFnName)
7411 : createPlatformSpecificName({EntryFnName,
"region_id"});
7413 OutlinedFnID = registerTargetRegionFunction(EntryInfo, OutlinedFn,
7414 EntryFnName, EntryFnIDName);
7418Constant *OpenMPIRBuilder::registerTargetRegionFunction(
7419 TargetRegionEntryInfo &EntryInfo,
Function *OutlinedFn,
7422 setOutlinedTargetRegionFunctionAttributes(OutlinedFn);
7423 auto OutlinedFnID = createOutlinedFunctionID(OutlinedFn, EntryFnIDName);
7424 auto EntryAddr = createTargetRegionEntryAddr(OutlinedFn, EntryFnName);
7425 OffloadInfoManager.registerTargetRegionEntryInfo(
7426 EntryInfo, EntryAddr, OutlinedFnID,
7427 OffloadEntriesInfoManager::OMPTargetRegionEntryTargetRegion);
7428 return OutlinedFnID;
7431OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTargetData(
7432 const LocationDescription &
Loc, InsertPointTy AllocaIP,
7433 InsertPointTy CodeGenIP,
Value *DeviceID,
Value *IfCond,
7434 TargetDataInfo &
Info, GenMapInfoCallbackTy GenMapInfoCB,
7436 function_ref<InsertPointOrErrorTy(InsertPointTy CodeGenIP,
7437 BodyGenTy BodyGenType)>
7440 if (!updateToLocation(
Loc))
7441 return InsertPointTy();
7443 Builder.restoreIP(CodeGenIP);
7445 if (Config.IsTargetDevice.value_or(
false)) {
7447 InsertPointOrErrorTy AfterIP =
7448 BodyGenCB(Builder.saveIP(), BodyGenTy::NoPriv);
7450 return AfterIP.takeError();
7451 Builder.restoreIP(*AfterIP);
7453 return Builder.saveIP();
7456 bool IsStandAlone = !BodyGenCB;
7457 MapInfosTy *MapInfo;
7461 auto BeginThenGen = [&](InsertPointTy AllocaIP,
7462 InsertPointTy CodeGenIP) ->
Error {
7463 MapInfo = &GenMapInfoCB(Builder.saveIP());
7464 if (
Error Err = emitOffloadingArrays(
7465 AllocaIP, Builder.saveIP(), *MapInfo,
Info, CustomMapperCB,
7466 true, DeviceAddrCB))
7469 TargetDataRTArgs RTArgs;
7470 emitOffloadingArraysArgument(Builder, RTArgs,
Info);
7473 Value *PointerNum = Builder.getInt32(
Info.NumberOfPtrs);
7478 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
7479 SrcLocInfo = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
7483 SrcLocInfo, DeviceID,
7484 PointerNum, RTArgs.BasePointersArray,
7485 RTArgs.PointersArray, RTArgs.SizesArray,
7486 RTArgs.MapTypesArray, RTArgs.MapNamesArray,
7487 RTArgs.MappersArray};
7490 assert(MapperFunc &&
"MapperFunc missing for standalone target data");
7494 if (
Info.HasNoWait) {
7501 createRuntimeFunctionCall(getOrCreateRuntimeFunctionPtr(*MapperFunc),
7504 if (
Info.HasNoWait) {
7508 emitBlock(OffloadContBlock, CurFn,
true);
7509 Builder.restoreIP(Builder.saveIP());
7514 bool RequiresOuterTargetTask =
Info.HasNoWait;
7515 if (!RequiresOuterTargetTask)
7516 cantFail(TaskBodyCB(
nullptr,
nullptr,
7519 cantFail(emitTargetTask(TaskBodyCB, DeviceID, SrcLocInfo, AllocaIP,
7520 {}, RTArgs,
Info.HasNoWait));
7522 Function *BeginMapperFunc = getOrCreateRuntimeFunctionPtr(
7523 omp::OMPRTL___tgt_target_data_begin_mapper);
7525 createRuntimeFunctionCall(BeginMapperFunc, OffloadingArgs);
7527 for (
auto DeviceMap :
Info.DevicePtrInfoMap) {
7530 Builder.CreateLoad(Builder.getPtrTy(), DeviceMap.second.first);
7531 Builder.CreateStore(LI, DeviceMap.second.second);
7538 InsertPointOrErrorTy AfterIP =
7539 BodyGenCB(Builder.saveIP(), BodyGenTy::Priv);
7541 return AfterIP.takeError();
7542 Builder.restoreIP(*AfterIP);
7550 auto BeginElseGen = [&](InsertPointTy AllocaIP,
7551 InsertPointTy CodeGenIP) ->
Error {
7552 InsertPointOrErrorTy AfterIP =
7553 BodyGenCB(Builder.saveIP(), BodyGenTy::DupNoPriv);
7555 return AfterIP.takeError();
7556 Builder.restoreIP(*AfterIP);
7561 auto EndThenGen = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
7562 TargetDataRTArgs RTArgs;
7563 Info.EmitDebug = !MapInfo->Names.empty();
7564 emitOffloadingArraysArgument(Builder, RTArgs,
Info,
true);
7567 Value *PointerNum = Builder.getInt32(
Info.NumberOfPtrs);
7572 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
7573 SrcLocInfo = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
7576 Value *OffloadingArgs[] = {SrcLocInfo, DeviceID,
7577 PointerNum, RTArgs.BasePointersArray,
7578 RTArgs.PointersArray, RTArgs.SizesArray,
7579 RTArgs.MapTypesArray, RTArgs.MapNamesArray,
7580 RTArgs.MappersArray};
7582 getOrCreateRuntimeFunctionPtr(omp::OMPRTL___tgt_target_data_end_mapper);
7584 createRuntimeFunctionCall(EndMapperFunc, OffloadingArgs);
7590 auto EndElseGen = [&](InsertPointTy AllocaIP, InsertPointTy CodeGenIP) {
7598 return emitIfClause(IfCond, BeginThenGen, BeginElseGen, AllocaIP);
7599 return BeginThenGen(AllocaIP, Builder.saveIP());
7607 InsertPointOrErrorTy AfterIP =
7608 BodyGenCB(Builder.saveIP(), BodyGenTy::NoPriv);
7610 return AfterIP.takeError();
7614 return emitIfClause(IfCond, EndThenGen, EndElseGen, AllocaIP);
7615 return EndThenGen(AllocaIP, Builder.saveIP());
7618 return emitIfClause(IfCond, BeginThenGen, EndElseGen, AllocaIP);
7619 return BeginThenGen(AllocaIP, Builder.saveIP());
7625 return Builder.saveIP();
7629OpenMPIRBuilder::createForStaticInitFunction(
unsigned IVSize,
bool IVSigned,
7630 bool IsGPUDistribute) {
7631 assert((IVSize == 32 || IVSize == 64) &&
7632 "IV size is not compatible with the omp runtime");
7634 if (IsGPUDistribute)
7636 ? (IVSigned ? omp::OMPRTL___kmpc_distribute_static_init_4
7637 : omp::OMPRTL___kmpc_distribute_static_init_4u)
7638 : (IVSigned ?
omp::OMPRTL___kmpc_distribute_static_init_8
7639 :
omp::OMPRTL___kmpc_distribute_static_init_8u);
7641 Name = IVSize == 32 ? (IVSigned ? omp::OMPRTL___kmpc_for_static_init_4
7642 : omp::OMPRTL___kmpc_for_static_init_4u)
7643 : (IVSigned ? omp::OMPRTL___kmpc_for_static_init_8
7644 : omp::OMPRTL___kmpc_for_static_init_8u);
7646 return getOrCreateRuntimeFunction(M, Name);
7649FunctionCallee OpenMPIRBuilder::createDispatchInitFunction(
unsigned IVSize,
7651 assert((IVSize == 32 || IVSize == 64) &&
7652 "IV size is not compatible with the omp runtime");
7654 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_init_4
7655 : omp::OMPRTL___kmpc_dispatch_init_4u)
7656 : (IVSigned ?
omp::OMPRTL___kmpc_dispatch_init_8
7657 :
omp::OMPRTL___kmpc_dispatch_init_8u);
7659 return getOrCreateRuntimeFunction(M, Name);
7662FunctionCallee OpenMPIRBuilder::createDispatchNextFunction(
unsigned IVSize,
7664 assert((IVSize == 32 || IVSize == 64) &&
7665 "IV size is not compatible with the omp runtime");
7667 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_next_4
7668 : omp::OMPRTL___kmpc_dispatch_next_4u)
7669 : (IVSigned ?
omp::OMPRTL___kmpc_dispatch_next_8
7670 :
omp::OMPRTL___kmpc_dispatch_next_8u);
7672 return getOrCreateRuntimeFunction(M, Name);
7675FunctionCallee OpenMPIRBuilder::createDispatchFiniFunction(
unsigned IVSize,
7677 assert((IVSize == 32 || IVSize == 64) &&
7678 "IV size is not compatible with the omp runtime");
7680 ? (IVSigned ? omp::OMPRTL___kmpc_dispatch_fini_4
7681 : omp::OMPRTL___kmpc_dispatch_fini_4u)
7682 : (IVSigned ?
omp::OMPRTL___kmpc_dispatch_fini_8
7683 :
omp::OMPRTL___kmpc_dispatch_fini_8u);
7685 return getOrCreateRuntimeFunction(M, Name);
7689 return getOrCreateRuntimeFunction(M, omp::OMPRTL___kmpc_dispatch_deinit);
7694 DenseMap<
Value *, std::tuple<Value *, unsigned>> &ValueReplacementMap) {
7702 auto GetUpdatedDIVariable = [&](
DILocalVariable *OldVar,
unsigned arg) {
7706 if (NewVar && (arg == NewVar->
getArg()))
7716 auto UpdateDebugRecord = [&](
auto *DR) {
7719 for (
auto Loc : DR->location_ops()) {
7720 auto Iter = ValueReplacementMap.find(
Loc);
7721 if (Iter != ValueReplacementMap.end()) {
7722 DR->replaceVariableLocationOp(
Loc, std::get<0>(Iter->second));
7723 ArgNo = std::get<1>(Iter->second) + 1;
7727 DR->setVariable(GetUpdatedDIVariable(OldVar, ArgNo));
7734 "Unexpected debug intrinsic");
7736 UpdateDebugRecord(&DVR);
7739 if (OMPBuilder.Config.isTargetDevice()) {
7741 Module *M = Func->getParent();
7744 DB.createQualifiedType(dwarf::DW_TAG_pointer_type,
nullptr);
7746 NewSP,
"dyn_ptr", 1, NewSP->
getFile(), 0,
7747 VoidPtrTy,
false, DINode::DIFlags::FlagArtificial);
7749 DB.insertDeclare(&(*Func->arg_begin()), Var, DB.createExpression(),
Loc,
7762 const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs,
7764 OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc,
7765 OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB) {
7767 if (OMPBuilder.Config.isTargetDevice()) {
7775 for (
auto &Arg : Inputs)
7780 for (
auto &Arg : Inputs)
7784 auto BB = Builder.GetInsertBlock();
7796 if (TargetCpuAttr.isStringAttribute())
7797 Func->addFnAttr(TargetCpuAttr);
7799 auto TargetFeaturesAttr = ParentFn->
getFnAttribute(
"target-features");
7800 if (TargetFeaturesAttr.isStringAttribute())
7801 Func->addFnAttr(TargetFeaturesAttr);
7803 if (OMPBuilder.Config.isTargetDevice()) {
7805 OMPBuilder.emitKernelExecutionMode(FuncName, DefaultAttrs.ExecFlags);
7806 OMPBuilder.emitUsed(
"llvm.compiler.used", {ExecMode});
7817 Builder.SetInsertPoint(EntryBB);
7820 if (OMPBuilder.Config.isTargetDevice())
7821 Builder.restoreIP(OMPBuilder.createTargetInit(Builder, DefaultAttrs));
7823 BasicBlock *UserCodeEntryBB = Builder.GetInsertBlock();
7828 if (OMPBuilder.Config.isTargetDevice())
7829 OMPBuilder.ConstantAllocaRaiseCandidates.emplace_back(Func);
7833 splitBB(Builder,
true,
"outlined.body");
7834 llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP = CBFunc(
7836 OpenMPIRBuilder::InsertPointTy(OutlinedBodyBB, OutlinedBodyBB->
begin()));
7838 return AfterIP.takeError();
7839 Builder.restoreIP(*AfterIP);
7840 if (OMPBuilder.Config.isTargetDevice())
7841 OMPBuilder.createTargetDeinit(Builder);
7844 Builder.CreateRetVoid();
7848 auto AllocaIP = Builder.saveIP();
7853 const auto &ArgRange =
7854 OMPBuilder.Config.isTargetDevice()
7855 ?
make_range(Func->arg_begin() + 1, Func->arg_end())
7888 if (Instr->getFunction() == Func)
7889 Instr->replaceUsesOfWith(
Input, InputCopy);
7895 for (
auto InArg :
zip(Inputs, ArgRange)) {
7897 Argument &Arg = std::get<1>(InArg);
7898 Value *InputCopy =
nullptr;
7900 llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP =
7901 ArgAccessorFuncCB(Arg,
Input, InputCopy, AllocaIP, Builder.saveIP());
7903 return AfterIP.takeError();
7904 Builder.restoreIP(*AfterIP);
7905 ValueReplacementMap[
Input] = std::make_tuple(InputCopy, Arg.
getArgNo());
7925 DeferredReplacement.push_back(std::make_pair(
Input, InputCopy));
7932 ReplaceValue(
Input, InputCopy, Func);
7936 for (
auto Deferred : DeferredReplacement)
7937 ReplaceValue(std::get<0>(Deferred), std::get<1>(Deferred), Func);
7940 ValueReplacementMap);
7948 Value *TaskWithPrivates,
7949 Type *TaskWithPrivatesTy) {
7951 Type *TaskTy = OMPIRBuilder.Task;
7954 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskWithPrivates, 0);
7955 Value *Shareds = TaskT;
7965 if (TaskWithPrivatesTy != TaskTy)
7966 Shareds = Builder.CreateStructGEP(TaskTy, TaskT, 0);
7983 const size_t NumOffloadingArrays,
const int SharedArgsOperandNo) {
7988 assert((!NumOffloadingArrays || PrivatesTy) &&
7989 "PrivatesTy cannot be nullptr when there are offloadingArrays"
7992 Module &M = OMPBuilder.M;
8016 OpenMPIRBuilder::InsertPointTy IP(StaleCI->
getParent(),
8022 Type *TaskPtrTy = OMPBuilder.TaskPtr;
8023 [[maybe_unused]]
Type *TaskTy = OMPBuilder.Task;
8029 ".omp_target_task_proxy_func",
8030 Builder.GetInsertBlock()->getModule());
8031 Value *ThreadId = ProxyFn->getArg(0);
8032 Value *TaskWithPrivates = ProxyFn->getArg(1);
8033 ThreadId->
setName(
"thread.id");
8034 TaskWithPrivates->
setName(
"task");
8036 bool HasShareds = SharedArgsOperandNo > 0;
8037 bool HasOffloadingArrays = NumOffloadingArrays > 0;
8040 Builder.SetInsertPoint(EntryBB);
8046 if (HasOffloadingArrays) {
8047 assert(TaskTy != TaskWithPrivatesTy &&
8048 "If there are offloading arrays to pass to the target"
8049 "TaskTy cannot be the same as TaskWithPrivatesTy");
8052 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskWithPrivates, 1);
8053 for (
unsigned int i = 0; i < NumOffloadingArrays; ++i)
8055 Builder.CreateStructGEP(PrivatesTy, Privates, i));
8059 auto *ArgStructAlloca =
8061 assert(ArgStructAlloca &&
8062 "Unable to find the alloca instruction corresponding to arguments "
8063 "for extracted function");
8067 Builder.CreateAlloca(ArgStructType,
nullptr,
"structArg");
8069 Value *SharedsSize =
8070 Builder.getInt64(M.getDataLayout().getTypeStoreSize(ArgStructType));
8073 OMPBuilder, Builder, TaskWithPrivates, TaskWithPrivatesTy);
8075 Builder.CreateMemCpy(
8076 NewArgStructAlloca, NewArgStructAlloca->
getAlign(), LoadShared,
8078 KernelLaunchArgs.
push_back(NewArgStructAlloca);
8080 OMPBuilder.createRuntimeFunctionCall(KernelLaunchFunction, KernelLaunchArgs);
8081 Builder.CreateRetVoid();
8087 return GEP->getSourceElementType();
8089 return Alloca->getAllocatedType();
8112 if (OffloadingArraysToPrivatize.
empty())
8113 return OMPIRBuilder.Task;
8116 for (
Value *V : OffloadingArraysToPrivatize) {
8117 assert(V->getType()->isPointerTy() &&
8118 "Expected pointer to array to privatize. Got a non-pointer value "
8121 assert(ArrayTy &&
"ArrayType cannot be nullptr");
8127 "struct.task_with_privates");
8130 OpenMPIRBuilder &OMPBuilder,
IRBuilderBase &Builder,
bool IsOffloadEntry,
8131 TargetRegionEntryInfo &EntryInfo,
8132 const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs,
8135 OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc,
8136 OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB) {
8138 OpenMPIRBuilder::FunctionGenCallback &&GenerateOutlinedFunction =
8141 EntryFnName, Inputs, CBFunc,
8145 return OMPBuilder.emitTargetRegionFunction(
8146 EntryInfo, GenerateOutlinedFunction, IsOffloadEntry, OutlinedFn,
8150OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::emitTargetTask(
8151 TargetTaskBodyCallbackTy TaskBodyCB,
Value *DeviceID,
Value *RTLoc,
8152 OpenMPIRBuilder::InsertPointTy AllocaIP,
8154 const TargetDataRTArgs &RTArgs,
bool HasNoWait) {
8278 splitBB(Builder,
true,
"target.task.body");
8280 splitBB(Builder,
true,
"target.task.alloca");
8282 InsertPointTy TargetTaskAllocaIP(TargetTaskAllocaBB,
8283 TargetTaskAllocaBB->
begin());
8284 InsertPointTy TargetTaskBodyIP(TargetTaskBodyBB, TargetTaskBodyBB->
begin());
8287 OI.EntryBB = TargetTaskAllocaBB;
8288 OI.OuterAllocaBB = AllocaIP.getBlock();
8293 Builder, AllocaIP, ToBeDeleted, TargetTaskAllocaIP,
"global.tid",
false));
8296 Builder.restoreIP(TargetTaskBodyIP);
8297 if (
Error Err = TaskBodyCB(DeviceID, RTLoc, TargetTaskAllocaIP))
8311 emitBlock(OI.ExitBB, Builder.GetInsertBlock()->getParent(),
8315 bool NeedsTargetTask = HasNoWait && DeviceID;
8316 if (NeedsTargetTask) {
8318 {RTArgs.BasePointersArray, RTArgs.PointersArray, RTArgs.MappersArray,
8319 RTArgs.MapNamesArray, RTArgs.MapTypesArray, RTArgs.MapTypesArrayEnd,
8320 RTArgs.SizesArray}) {
8322 OffloadingArraysToPrivatize.
push_back(V);
8323 OI.ExcludeArgsFromAggregate.push_back(V);
8327 OI.PostOutlineCB = [
this, ToBeDeleted, Dependencies, NeedsTargetTask,
8328 DeviceID, OffloadingArraysToPrivatize](
8331 "there must be a single user for the outlined function");
8345 const unsigned int NumStaleCIArgs = StaleCI->
arg_size();
8346 bool HasShareds = NumStaleCIArgs > OffloadingArraysToPrivatize.
size() + 1;
8348 NumStaleCIArgs == (OffloadingArraysToPrivatize.
size() + 2)) &&
8349 "Wrong number of arguments for StaleCI when shareds are present");
8350 int SharedArgOperandNo =
8351 HasShareds ? OffloadingArraysToPrivatize.
size() + 1 : 0;
8357 if (!OffloadingArraysToPrivatize.
empty())
8362 *
this, Builder, StaleCI, PrivatesTy, TaskWithPrivatesTy,
8363 OffloadingArraysToPrivatize.
size(), SharedArgOperandNo);
8365 LLVM_DEBUG(
dbgs() <<
"Proxy task entry function created: " << *ProxyFn
8368 Builder.SetInsertPoint(StaleCI);
8373 getOrCreateSrcLocStr(LocationDescription(Builder), SrcLocStrSize);
8374 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
8383 ? getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_alloc)
8384 : getOrCreateRuntimeFunctionPtr(
8385 OMPRTL___kmpc_omp_target_task_alloc);
8389 Value *ThreadID = getOrCreateThreadID(Ident);
8396 Value *TaskSize = Builder.getInt64(
8397 M.getDataLayout().getTypeStoreSize(TaskWithPrivatesTy));
8402 Value *SharedsSize = Builder.getInt64(0);
8404 auto *ArgStructAlloca =
8406 assert(ArgStructAlloca &&
8407 "Unable to find the alloca instruction corresponding to arguments "
8408 "for extracted function");
8409 auto *ArgStructType =
8411 assert(ArgStructType &&
"Unable to find struct type corresponding to "
8412 "arguments for extracted function");
8414 Builder.getInt64(M.getDataLayout().getTypeStoreSize(ArgStructType));
8423 Value *Flags = Builder.getInt32(0);
8433 TaskSize, SharedsSize,
8436 if (NeedsTargetTask) {
8437 assert(DeviceID &&
"Expected non-empty device ID.");
8441 TaskData = createRuntimeFunctionCall(TaskAllocFn, TaskAllocArgs);
8447 *
this, Builder, TaskData, TaskWithPrivatesTy);
8448 Builder.CreateMemCpy(TaskShareds, Alignment, Shareds, Alignment,
8451 if (!OffloadingArraysToPrivatize.
empty()) {
8453 Builder.CreateStructGEP(TaskWithPrivatesTy, TaskData, 1);
8454 for (
unsigned int i = 0; i < OffloadingArraysToPrivatize.
size(); ++i) {
8455 Value *PtrToPrivatize = OffloadingArraysToPrivatize[i];
8462 "ElementType should match ArrayType");
8465 Value *Dst = Builder.CreateStructGEP(PrivatesTy, Privates, i);
8466 Builder.CreateMemCpy(
8467 Dst, Alignment, PtrToPrivatize, Alignment,
8468 Builder.getInt64(M.getDataLayout().getTypeStoreSize(ElementType)));
8482 if (!NeedsTargetTask) {
8485 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_wait_deps);
8486 createRuntimeFunctionCall(
8489 Builder.getInt32(Dependencies.size()),
8491 ConstantInt::get(Builder.getInt32Ty(), 0),
8497 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_begin_if0);
8499 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_complete_if0);
8500 createRuntimeFunctionCall(TaskBeginFn, {Ident, ThreadID, TaskData});
8501 CallInst *CI = createRuntimeFunctionCall(ProxyFn, {ThreadID, TaskData});
8503 createRuntimeFunctionCall(TaskCompleteFn, {Ident, ThreadID, TaskData});
8504 }
else if (DepArray) {
8509 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task_with_deps);
8510 createRuntimeFunctionCall(
8512 {Ident, ThreadID, TaskData, Builder.getInt32(Dependencies.size()),
8513 DepArray, ConstantInt::get(Builder.getInt32Ty(), 0),
8517 Function *TaskFn = getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_task);
8518 createRuntimeFunctionCall(TaskFn, {Ident, ThreadID, TaskData});
8523 I->eraseFromParent();
8525 addOutlineInfo(std::move(OI));
8528 << *(Builder.GetInsertBlock()) <<
"\n");
8530 << *(Builder.GetInsertBlock()->getParent()->getParent())
8532 return Builder.saveIP();
8535Error OpenMPIRBuilder::emitOffloadingArraysAndArgs(
8536 InsertPointTy AllocaIP, InsertPointTy CodeGenIP, TargetDataInfo &
Info,
8537 TargetDataRTArgs &RTArgs, MapInfosTy &CombinedInfo,
8538 CustomMapperCallbackTy CustomMapperCB,
bool IsNonContiguous,
8541 emitOffloadingArrays(AllocaIP, CodeGenIP, CombinedInfo,
Info,
8542 CustomMapperCB, IsNonContiguous, DeviceAddrCB))
8544 emitOffloadingArraysArgument(Builder, RTArgs,
Info, ForEndCall);
8550 OpenMPIRBuilder::InsertPointTy AllocaIP,
8551 OpenMPIRBuilder::TargetDataInfo &
Info,
8552 const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs,
8553 const OpenMPIRBuilder::TargetKernelRuntimeAttrs &RuntimeAttrs,
8556 OpenMPIRBuilder::GenMapInfoCallbackTy GenMapInfoCB,
8557 OpenMPIRBuilder::CustomMapperCallbackTy CustomMapperCB,
8559 bool HasNoWait,
Value *DynCGroupMem,
8564 auto &&EmitTargetCallFallbackCB = [&](OpenMPIRBuilder::InsertPointTy IP)
8565 -> OpenMPIRBuilder::InsertPointOrErrorTy {
8566 Builder.restoreIP(IP);
8567 OMPBuilder.createRuntimeFunctionCall(OutlinedFn, Args);
8568 return Builder.saveIP();
8571 bool HasDependencies = Dependencies.
size() > 0;
8572 bool RequiresOuterTargetTask = HasNoWait || HasDependencies;
8574 OpenMPIRBuilder::TargetKernelArgs KArgs;
8581 llvm::OpenMPIRBuilder::InsertPointTy AfterIP =
cantFail([&]() {
8589 if (OutlinedFnID && DeviceID)
8590 return OMPBuilder.emitKernelLaunch(Builder, OutlinedFnID,
8591 EmitTargetCallFallbackCB, KArgs,
8592 DeviceID, RTLoc, TargetTaskAllocaIP);
8600 return EmitTargetCallFallbackCB(OMPBuilder.Builder.saveIP());
8603 OMPBuilder.Builder.restoreIP(AfterIP);
8607 auto &&EmitTargetCallElse =
8608 [&](OpenMPIRBuilder::InsertPointTy AllocaIP,
8609 OpenMPIRBuilder::InsertPointTy CodeGenIP) ->
Error {
8612 OpenMPIRBuilder::InsertPointTy AfterIP =
cantFail([&]() {
8613 if (RequiresOuterTargetTask) {
8617 OpenMPIRBuilder::TargetDataRTArgs EmptyRTArgs;
8618 return OMPBuilder.emitTargetTask(TaskBodyCB,
nullptr,
8620 Dependencies, EmptyRTArgs, HasNoWait);
8622 return EmitTargetCallFallbackCB(Builder.saveIP());
8625 Builder.restoreIP(AfterIP);
8629 auto &&EmitTargetCallThen =
8630 [&](OpenMPIRBuilder::InsertPointTy AllocaIP,
8631 OpenMPIRBuilder::InsertPointTy CodeGenIP) ->
Error {
8632 Info.HasNoWait = HasNoWait;
8633 OpenMPIRBuilder::MapInfosTy &MapInfo = GenMapInfoCB(Builder.saveIP());
8634 OpenMPIRBuilder::TargetDataRTArgs RTArgs;
8635 if (
Error Err = OMPBuilder.emitOffloadingArraysAndArgs(
8636 AllocaIP, Builder.saveIP(),
Info, RTArgs, MapInfo, CustomMapperCB,
8643 zip_equal(DefaultAttrs.MaxTeams, RuntimeAttrs.MaxTeams))
8644 NumTeamsC.
push_back(RuntimeVal ? RuntimeVal
8649 auto InitMaxThreadsClause = [&Builder](
Value *
Clause) {
8651 Clause = Builder.CreateIntCast(
Clause, Builder.getInt32Ty(),
8655 auto CombineMaxThreadsClauses = [&Builder](
Value *
Clause,
Value *&Result) {
8658 Result ? Builder.CreateSelect(Builder.CreateICmpULT(Result,
Clause),
8666 Value *MaxThreadsClause =
8667 RuntimeAttrs.TeamsThreadLimit.size() == 1
8668 ? InitMaxThreadsClause(RuntimeAttrs.MaxThreads)
8671 for (
auto [TeamsVal, TargetVal] :
zip_equal(
8672 RuntimeAttrs.TeamsThreadLimit, RuntimeAttrs.TargetThreadLimit)) {
8673 Value *TeamsThreadLimitClause = InitMaxThreadsClause(TeamsVal);
8674 Value *NumThreads = InitMaxThreadsClause(TargetVal);
8676 CombineMaxThreadsClauses(TeamsThreadLimitClause, NumThreads);
8677 CombineMaxThreadsClauses(MaxThreadsClause, NumThreads);
8679 NumThreadsC.
push_back(NumThreads ? NumThreads : Builder.getInt32(0));
8682 unsigned NumTargetItems =
Info.NumberOfPtrs;
8686 Constant *SrcLocStr = OMPBuilder.getOrCreateDefaultSrcLocStr(SrcLocStrSize);
8687 Value *RTLoc = OMPBuilder.getOrCreateIdent(SrcLocStr, SrcLocStrSize,
8690 Value *TripCount = RuntimeAttrs.LoopTripCount
8691 ? Builder.CreateIntCast(RuntimeAttrs.LoopTripCount,
8692 Builder.getInt64Ty(),
8694 : Builder.getInt64(0);
8698 DynCGroupMem = Builder.getInt32(0);
8700 KArgs = OpenMPIRBuilder::TargetKernelArgs(
8701 NumTargetItems, RTArgs, TripCount, NumTeamsC, NumThreadsC, DynCGroupMem,
8702 HasNoWait, DynCGroupMemFallback);
8706 OpenMPIRBuilder::InsertPointTy AfterIP =
cantFail([&]() {
8709 if (RequiresOuterTargetTask)
8710 return OMPBuilder.emitTargetTask(TaskBodyCB, DeviceID, RTLoc, AllocaIP,
8711 Dependencies, KArgs.RTArgs,
8714 return OMPBuilder.emitKernelLaunch(Builder, OutlinedFnID,
8715 EmitTargetCallFallbackCB, KArgs,
8716 DeviceID, RTLoc, AllocaIP);
8719 Builder.restoreIP(AfterIP);
8726 if (!OutlinedFnID) {
8727 cantFail(EmitTargetCallElse(AllocaIP, Builder.saveIP()));
8733 cantFail(EmitTargetCallThen(AllocaIP, Builder.saveIP()));
8737 cantFail(OMPBuilder.emitIfClause(IfCond, EmitTargetCallThen,
8738 EmitTargetCallElse, AllocaIP));
8741OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createTarget(
8742 const LocationDescription &
Loc,
bool IsOffloadEntry, InsertPointTy AllocaIP,
8743 InsertPointTy CodeGenIP, TargetDataInfo &
Info,
8744 TargetRegionEntryInfo &EntryInfo,
8745 const TargetKernelDefaultAttrs &DefaultAttrs,
8746 const TargetKernelRuntimeAttrs &RuntimeAttrs,
Value *IfCond,
8748 OpenMPIRBuilder::TargetBodyGenCallbackTy CBFunc,
8749 OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy ArgAccessorFuncCB,
8750 CustomMapperCallbackTy CustomMapperCB,
8754 if (!updateToLocation(
Loc))
8755 return InsertPointTy();
8757 Builder.restoreIP(CodeGenIP);
8765 *
this, Builder, IsOffloadEntry, EntryInfo, DefaultAttrs, OutlinedFn,
8766 OutlinedFnID, Inputs, CBFunc, ArgAccessorFuncCB))
8772 if (!Config.isTargetDevice())
8774 IfCond, OutlinedFn, OutlinedFnID, Inputs, GenMapInfoCB,
8775 CustomMapperCB, Dependencies, HasNowait, DynCGroupMem,
8776 DynCGroupMemFallback);
8777 return Builder.saveIP();
8790 return OS.
str().str();
8795 return OpenMPIRBuilder::getNameWithSeparators(Parts, Config.firstSeparator(),
8796 Config.separator());
8801 auto &Elem = *InternalVars.try_emplace(Name,
nullptr).first;
8803 assert(Elem.second->getValueType() == Ty &&
8804 "OMP internal variable has different type than requested");
8817 : M.getTargetTriple().isAMDGPU()
8819 :
DL.getDefaultGlobalsAddressSpace();
8828 const llvm::Align PtrAlign =
DL.getPointerABIAlignment(AddressSpaceVal);
8829 GV->setAlignment(std::max(TypeAlign, PtrAlign));
8836Value *OpenMPIRBuilder::getOMPCriticalRegionLock(
StringRef CriticalName) {
8837 std::string
Prefix =
Twine(
"gomp_critical_user_", CriticalName).
str();
8838 std::string
Name = getNameWithSeparators({
Prefix,
"var"},
".",
".");
8839 return getOrCreateInternalVariable(KmpCriticalNameTy, Name);
8842Value *OpenMPIRBuilder::getSizeInBytes(
Value *BasePtr) {
8847 Builder.CreateGEP(
BasePtr->getType(),
Null, Builder.getInt32(1));
8849 return SizePtrToInt;
8854 std::string VarName) {
8858 M, MaptypesArrayInit->
getType(),
8862 return MaptypesArrayGlobal;
8865void OpenMPIRBuilder::createMapperAllocas(
const LocationDescription &
Loc,
8866 InsertPointTy AllocaIP,
8867 unsigned NumOperands,
8868 struct MapperAllocas &MapperAllocas) {
8869 if (!updateToLocation(
Loc))
8874 Builder.restoreIP(AllocaIP);
8876 ArrI8PtrTy,
nullptr,
".offload_baseptrs");
8880 ArrI64Ty,
nullptr,
".offload_sizes");
8881 updateToLocation(
Loc);
8882 MapperAllocas.ArgsBase = ArgsBase;
8883 MapperAllocas.Args =
Args;
8884 MapperAllocas.ArgSizes = ArgSizes;
8887void OpenMPIRBuilder::emitMapperCall(
const LocationDescription &
Loc,
8890 struct MapperAllocas &MapperAllocas,
8891 int64_t DeviceID,
unsigned NumOperands) {
8892 if (!updateToLocation(
Loc))
8897 Value *ArgsBaseGEP =
8898 Builder.CreateInBoundsGEP(ArrI8PtrTy, MapperAllocas.ArgsBase,
8899 {Builder.getInt32(0), Builder.getInt32(0)});
8901 Builder.CreateInBoundsGEP(ArrI8PtrTy, MapperAllocas.Args,
8902 {Builder.getInt32(0), Builder.getInt32(0)});
8903 Value *ArgSizesGEP =
8904 Builder.CreateInBoundsGEP(ArrI64Ty, MapperAllocas.ArgSizes,
8905 {Builder.getInt32(0), Builder.getInt32(0)});
8908 createRuntimeFunctionCall(MapperFunc, {SrcLocInfo, Builder.getInt64(DeviceID),
8909 Builder.getInt32(NumOperands),
8910 ArgsBaseGEP, ArgsGEP, ArgSizesGEP,
8911 MaptypesArg, MapnamesArg, NullPtr});
8914void OpenMPIRBuilder::emitOffloadingArraysArgument(
IRBuilderBase &Builder,
8915 TargetDataRTArgs &RTArgs,
8916 TargetDataInfo &
Info,
8918 assert((!ForEndCall ||
Info.separateBeginEndCalls()) &&
8919 "expected region end call to runtime only when end call is separate");
8921 auto VoidPtrTy = UnqualPtrTy;
8922 auto VoidPtrPtrTy = UnqualPtrTy;
8924 auto Int64PtrTy = UnqualPtrTy;
8926 if (!
Info.NumberOfPtrs) {
8936 RTArgs.BasePointersArray = Builder.CreateConstInBoundsGEP2_32(
8938 Info.RTArgs.BasePointersArray,
8940 RTArgs.PointersArray = Builder.CreateConstInBoundsGEP2_32(
8944 RTArgs.SizesArray = Builder.CreateConstInBoundsGEP2_32(
8947 RTArgs.MapTypesArray = Builder.CreateConstInBoundsGEP2_32(
8949 ForEndCall &&
Info.RTArgs.MapTypesArrayEnd ?
Info.RTArgs.MapTypesArrayEnd
8950 :
Info.RTArgs.MapTypesArray,
8956 if (!
Info.EmitDebug)
8959 RTArgs.MapNamesArray = Builder.CreateConstInBoundsGEP2_32(
8965 if (!
Info.HasMapper)
8968 RTArgs.MappersArray =
8969 Builder.CreatePointerCast(
Info.RTArgs.MappersArray, VoidPtrPtrTy);
8972void OpenMPIRBuilder::emitNonContiguousDescriptor(InsertPointTy AllocaIP,
8973 InsertPointTy CodeGenIP,
8974 MapInfosTy &CombinedInfo,
8975 TargetDataInfo &
Info) {
8976 MapInfosTy::StructNonContiguousInfo &NonContigInfo =
8977 CombinedInfo.NonContigInfo;
8990 "struct.descriptor_dim");
8992 enum { OffsetFD = 0, CountFD, StrideFD };
8996 for (
unsigned I = 0, L = 0,
E = NonContigInfo.Dims.size();
I <
E; ++
I) {
8999 if (NonContigInfo.Dims[
I] == 1)
9001 Builder.restoreIP(AllocaIP);
9004 Builder.CreateAlloca(ArrayTy,
nullptr,
"dims");
9005 Builder.restoreIP(CodeGenIP);
9006 for (
unsigned II = 0, EE = NonContigInfo.Dims[
I];
II < EE; ++
II) {
9007 unsigned RevIdx = EE -
II - 1;
9008 Value *DimsLVal = Builder.CreateInBoundsGEP(
9010 {Builder.getInt64(0), Builder.getInt64(II)});
9012 Value *OffsetLVal = Builder.CreateStructGEP(DimTy, DimsLVal, OffsetFD);
9013 Builder.CreateAlignedStore(
9014 NonContigInfo.Offsets[L][RevIdx], OffsetLVal,
9015 M.getDataLayout().getPrefTypeAlign(OffsetLVal->
getType()));
9017 Value *CountLVal = Builder.CreateStructGEP(DimTy, DimsLVal, CountFD);
9018 Builder.CreateAlignedStore(
9019 NonContigInfo.Counts[L][RevIdx], CountLVal,
9020 M.getDataLayout().getPrefTypeAlign(CountLVal->
getType()));
9022 Value *StrideLVal = Builder.CreateStructGEP(DimTy, DimsLVal, StrideFD);
9023 Builder.CreateAlignedStore(
9024 NonContigInfo.Strides[L][RevIdx], StrideLVal,
9025 M.getDataLayout().getPrefTypeAlign(CountLVal->
getType()));
9028 Builder.restoreIP(CodeGenIP);
9029 Value *DAddr = Builder.CreatePointerBitCastOrAddrSpaceCast(
9030 DimsAddr, Builder.getPtrTy());
9031 Value *
P = Builder.CreateConstInBoundsGEP2_32(
9033 Info.RTArgs.PointersArray, 0,
I);
9034 Builder.CreateAlignedStore(
9035 DAddr,
P, M.getDataLayout().getPrefTypeAlign(Builder.getPtrTy()));
9040void OpenMPIRBuilder::emitUDMapperArrayInitOrDel(
9048 M.getContext(), createPlatformSpecificName({
"omp.array",
Prefix}));
9050 Builder.CreateICmpSGT(
Size, Builder.getInt64(1),
"omp.arrayinit.isarray");
9051 Value *DeleteBit = Builder.CreateAnd(
9054 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9055 OpenMPOffloadMappingFlags::OMP_MAP_DELETE)));
9060 Value *BaseIsBegin = Builder.CreateICmpNE(
Base, Begin);
9062 Value *PtrAndObjBit = Builder.CreateAnd(
9065 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9066 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ)));
9067 PtrAndObjBit = Builder.CreateIsNotNull(PtrAndObjBit);
9068 BaseIsBegin = Builder.CreateAnd(BaseIsBegin, PtrAndObjBit);
9069 Cond = Builder.CreateOr(IsArray, BaseIsBegin);
9070 DeleteCond = Builder.CreateIsNull(
9072 createPlatformSpecificName({
"omp.array",
Prefix,
".delete"}));
9075 DeleteCond = Builder.CreateIsNotNull(
9077 createPlatformSpecificName({
"omp.array",
Prefix,
".delete"}));
9079 Cond = Builder.CreateAnd(
Cond, DeleteCond);
9080 Builder.CreateCondBr(
Cond, BodyBB, ExitBB);
9082 emitBlock(BodyBB, MapperFn);
9085 Value *ArraySize = Builder.CreateNUWMul(
Size, Builder.getInt64(ElementSize));
9088 Value *MapTypeArg = Builder.CreateAnd(
9091 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9092 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9093 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9094 MapTypeArg = Builder.CreateOr(
9097 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9098 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT)));
9102 Value *OffloadingArgs[] = {MapperHandle,
Base, Begin,
9103 ArraySize, MapTypeArg, MapName};
9104 createRuntimeFunctionCall(
9105 getOrCreateRuntimeFunction(M, OMPRTL___tgt_push_mapper_component),
9113 Type *ElemTy,
StringRef FuncName, CustomMapperCallbackTy CustomMapperCB) {
9129 MapperFn->
addFnAttr(Attribute::NoInline);
9130 MapperFn->
addFnAttr(Attribute::NoUnwind);
9140 auto SavedIP = Builder.saveIP();
9141 Builder.SetInsertPoint(EntryBB);
9153 TypeSize ElementSize = M.getDataLayout().getTypeStoreSize(ElemTy);
9154 Size = Builder.CreateExactUDiv(
Size, Builder.getInt64(ElementSize));
9155 Value *PtrBegin = BeginIn;
9156 Value *PtrEnd = Builder.CreateGEP(ElemTy, PtrBegin,
Size);
9161 emitUDMapperArrayInitOrDel(MapperFn, MapperHandle, BaseIn, BeginIn,
Size,
9162 MapType, MapName, ElementSize, HeadBB,
9168 emitBlock(HeadBB, MapperFn);
9173 Builder.CreateICmpEQ(PtrBegin, PtrEnd,
"omp.arraymap.isempty");
9174 Builder.CreateCondBr(IsEmpty, DoneBB, BodyBB);
9177 emitBlock(BodyBB, MapperFn);
9180 Builder.CreatePHI(PtrBegin->
getType(), 2,
"omp.arraymap.ptrcurrent");
9184 MapInfosOrErrorTy
Info = GenMapInfoCB(Builder.saveIP(), PtrPHI, BeginIn);
9186 return Info.takeError();
9190 Value *OffloadingArgs[] = {MapperHandle};
9191 Value *PreviousSize = createRuntimeFunctionCall(
9192 getOrCreateRuntimeFunction(M, OMPRTL___tgt_mapper_num_components),
9194 Value *ShiftedPreviousSize =
9195 Builder.CreateShl(PreviousSize, Builder.getInt64(getFlagMemberOffset()));
9198 for (
unsigned I = 0;
I <
Info->BasePointers.size(); ++
I) {
9207 Value *OriMapType = Builder.getInt64(
9208 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9210 Value *MemberMapType =
9211 Builder.CreateNUWAdd(OriMapType, ShiftedPreviousSize);
9225 Value *LeftToFrom = Builder.CreateAnd(
9228 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9229 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9230 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9239 Value *IsAlloc = Builder.CreateIsNull(LeftToFrom);
9240 Builder.CreateCondBr(IsAlloc, AllocBB, AllocElseBB);
9242 emitBlock(AllocBB, MapperFn);
9243 Value *AllocMapType = Builder.CreateAnd(
9246 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9247 OpenMPOffloadMappingFlags::OMP_MAP_TO |
9248 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9249 Builder.CreateBr(EndBB);
9250 emitBlock(AllocElseBB, MapperFn);
9251 Value *IsTo = Builder.CreateICmpEQ(
9254 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9255 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
9256 Builder.CreateCondBr(IsTo, ToBB, ToElseBB);
9258 emitBlock(ToBB, MapperFn);
9259 Value *ToMapType = Builder.CreateAnd(
9262 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9263 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9264 Builder.CreateBr(EndBB);
9265 emitBlock(ToElseBB, MapperFn);
9266 Value *IsFrom = Builder.CreateICmpEQ(
9269 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9270 OpenMPOffloadMappingFlags::OMP_MAP_FROM)));
9271 Builder.CreateCondBr(IsFrom, FromBB, EndBB);
9273 emitBlock(FromBB, MapperFn);
9274 Value *FromMapType = Builder.CreateAnd(
9277 ~
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9278 OpenMPOffloadMappingFlags::OMP_MAP_TO)));
9280 emitBlock(EndBB, MapperFn);
9283 Builder.CreatePHI(Builder.getInt64Ty(), 4,
"omp.maptype");
9289 Value *OffloadingArgs[] = {MapperHandle, CurBaseArg, CurBeginArg,
9290 CurSizeArg, CurMapType, CurNameArg};
9292 auto ChildMapperFn = CustomMapperCB(
I);
9294 return ChildMapperFn.takeError();
9295 if (*ChildMapperFn) {
9297 createRuntimeFunctionCall(*ChildMapperFn, OffloadingArgs)
9298 ->setDoesNotThrow();
9302 createRuntimeFunctionCall(
9303 getOrCreateRuntimeFunction(M, OMPRTL___tgt_push_mapper_component),
9310 Value *PtrNext = Builder.CreateConstGEP1_32(ElemTy, PtrPHI, 1,
9311 "omp.arraymap.next");
9313 Value *IsDone = Builder.CreateICmpEQ(PtrNext, PtrEnd,
"omp.arraymap.isdone");
9315 Builder.CreateCondBr(IsDone, ExitBB, BodyBB);
9317 emitBlock(ExitBB, MapperFn);
9320 emitUDMapperArrayInitOrDel(MapperFn, MapperHandle, BaseIn, BeginIn,
Size,
9321 MapType, MapName, ElementSize, DoneBB,
9325 emitBlock(DoneBB, MapperFn,
true);
9327 Builder.CreateRetVoid();
9328 Builder.restoreIP(SavedIP);
9332Error OpenMPIRBuilder::emitOffloadingArrays(
9333 InsertPointTy AllocaIP, InsertPointTy CodeGenIP, MapInfosTy &CombinedInfo,
9334 TargetDataInfo &
Info, CustomMapperCallbackTy CustomMapperCB,
9335 bool IsNonContiguous,
9339 Info.clearArrayInfo();
9340 Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
9342 if (
Info.NumberOfPtrs == 0)
9345 Builder.restoreIP(AllocaIP);
9351 Info.RTArgs.BasePointersArray = Builder.CreateAlloca(
9352 PointerArrayType,
nullptr,
".offload_baseptrs");
9354 Info.RTArgs.PointersArray = Builder.CreateAlloca(
9355 PointerArrayType,
nullptr,
".offload_ptrs");
9356 AllocaInst *MappersArray = Builder.CreateAlloca(
9357 PointerArrayType,
nullptr,
".offload_mappers");
9358 Info.RTArgs.MappersArray = MappersArray;
9365 ConstantInt::get(Int64Ty, 0));
9367 for (
unsigned I = 0,
E = CombinedInfo.Sizes.size();
I <
E; ++
I) {
9370 if (IsNonContiguous &&
9371 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9372 CombinedInfo.Types[
I] &
9373 OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG))
9375 ConstantInt::get(Int64Ty, CombinedInfo.NonContigInfo.Dims[
I]);
9381 RuntimeSizes.set(
I);
9384 if (RuntimeSizes.all()) {
9386 Info.RTArgs.SizesArray = Builder.CreateAlloca(
9387 SizeArrayType,
nullptr,
".offload_sizes");
9392 std::string
Name = createPlatformSpecificName({
"offload_sizes"});
9393 auto *SizesArrayGbl =
9398 if (!RuntimeSizes.any()) {
9399 Info.RTArgs.SizesArray = SizesArrayGbl;
9401 unsigned IndexSize = M.getDataLayout().getIndexSizeInBits(0);
9402 Align OffloadSizeAlign = M.getDataLayout().getABIIntegerTypeAlignment(64);
9405 SizeArrayType,
nullptr,
".offload_sizes");
9408 Builder.CreateMemCpy(
9409 Buffer, M.getDataLayout().getPrefTypeAlign(Buffer->
getType()),
9410 SizesArrayGbl, OffloadSizeAlign,
9415 Info.RTArgs.SizesArray = Buffer;
9423 for (
auto mapFlag : CombinedInfo.Types)
9425 static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9427 std::string MaptypesName = createPlatformSpecificName({
"offload_maptypes"});
9428 auto *MapTypesArrayGbl = createOffloadMaptypes(Mapping, MaptypesName);
9429 Info.RTArgs.MapTypesArray = MapTypesArrayGbl;
9432 if (!CombinedInfo.Names.empty()) {
9433 auto *MapNamesArrayGbl = createOffloadMapnames(
9434 CombinedInfo.Names, createPlatformSpecificName({
"offload_mapnames"}));
9435 Info.RTArgs.MapNamesArray = MapNamesArrayGbl;
9436 Info.EmitDebug =
true;
9438 Info.RTArgs.MapNamesArray =
9440 Info.EmitDebug =
false;
9445 if (
Info.separateBeginEndCalls()) {
9446 bool EndMapTypesDiffer =
false;
9448 if (
Type &
static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>
>(
9449 OpenMPOffloadMappingFlags::OMP_MAP_PRESENT)) {
9450 Type &= ~static_cast<std::underlying_type_t<OpenMPOffloadMappingFlags>>(
9451 OpenMPOffloadMappingFlags::OMP_MAP_PRESENT);
9452 EndMapTypesDiffer =
true;
9455 if (EndMapTypesDiffer) {
9456 MapTypesArrayGbl = createOffloadMaptypes(Mapping, MaptypesName);
9457 Info.RTArgs.MapTypesArrayEnd = MapTypesArrayGbl;
9462 for (
unsigned I = 0;
I <
Info.NumberOfPtrs; ++
I) {
9463 Value *BPVal = CombinedInfo.BasePointers[
I];
9464 Value *BP = Builder.CreateConstInBoundsGEP2_32(
9467 Builder.CreateAlignedStore(BPVal, BP,
9468 M.getDataLayout().getPrefTypeAlign(PtrTy));
9470 if (
Info.requiresDevicePointerInfo()) {
9471 if (CombinedInfo.DevicePointers[
I] == DeviceInfoTy::Pointer) {
9472 CodeGenIP = Builder.saveIP();
9473 Builder.restoreIP(AllocaIP);
9474 Info.DevicePtrInfoMap[BPVal] = {BP, Builder.CreateAlloca(PtrTy)};
9475 Builder.restoreIP(CodeGenIP);
9477 DeviceAddrCB(
I,
Info.DevicePtrInfoMap[BPVal].second);
9478 }
else if (CombinedInfo.DevicePointers[
I] == DeviceInfoTy::Address) {
9479 Info.DevicePtrInfoMap[BPVal] = {BP, BP};
9481 DeviceAddrCB(
I, BP);
9485 Value *PVal = CombinedInfo.Pointers[
I];
9486 Value *
P = Builder.CreateConstInBoundsGEP2_32(
9490 Builder.CreateAlignedStore(PVal,
P,
9491 M.getDataLayout().getPrefTypeAlign(PtrTy));
9493 if (RuntimeSizes.test(
I)) {
9494 Value *S = Builder.CreateConstInBoundsGEP2_32(
9498 Builder.CreateAlignedStore(Builder.CreateIntCast(CombinedInfo.Sizes[
I],
9501 S, M.getDataLayout().getPrefTypeAlign(PtrTy));
9504 unsigned IndexSize = M.getDataLayout().getIndexSizeInBits(0);
9507 auto CustomMFunc = CustomMapperCB(
I);
9509 return CustomMFunc.takeError();
9511 MFunc = Builder.CreatePointerCast(*CustomMFunc, PtrTy);
9513 Value *MAddr = Builder.CreateInBoundsGEP(
9515 {Builder.getIntN(IndexSize, 0), Builder.getIntN(IndexSize, I)});
9516 Builder.CreateAlignedStore(
9517 MFunc, MAddr, M.getDataLayout().getPrefTypeAlign(MAddr->
getType()));
9520 if (!IsNonContiguous || CombinedInfo.NonContigInfo.Offsets.empty() ||
9521 Info.NumberOfPtrs == 0)
9523 emitNonContiguousDescriptor(AllocaIP, CodeGenIP, CombinedInfo,
Info);
9528 BasicBlock *CurBB = Builder.GetInsertBlock();
9535 Builder.CreateBr(
Target);
9538 Builder.ClearInsertionPoint();
9543 BasicBlock *CurBB = Builder.GetInsertBlock();
9559 Builder.SetInsertPoint(BB);
9562Error OpenMPIRBuilder::emitIfClause(
Value *
Cond, BodyGenCallbackTy ThenGen,
9563 BodyGenCallbackTy ElseGen,
9564 InsertPointTy AllocaIP) {
9568 auto CondConstant = CI->getSExtValue();
9570 return ThenGen(AllocaIP, Builder.saveIP());
9572 return ElseGen(AllocaIP, Builder.saveIP());
9582 Builder.CreateCondBr(
Cond, ThenBlock, ElseBlock);
9584 emitBlock(ThenBlock, CurFn);
9585 if (
Error Err = ThenGen(AllocaIP, Builder.saveIP()))
9587 emitBranch(ContBlock);
9590 emitBlock(ElseBlock, CurFn);
9591 if (
Error Err = ElseGen(AllocaIP, Builder.saveIP()))
9594 emitBranch(ContBlock);
9596 emitBlock(ContBlock, CurFn,
true);
9600bool OpenMPIRBuilder::checkAndEmitFlushAfterAtomic(
9604 "Unexpected Atomic Ordering.");
9661OpenMPIRBuilder::InsertPointTy
9662OpenMPIRBuilder::createAtomicRead(
const LocationDescription &
Loc,
9663 AtomicOpValue &
X, AtomicOpValue &V,
9665 if (!updateToLocation(
Loc))
9668 assert(
X.Var->getType()->isPointerTy() &&
9669 "OMP Atomic expects a pointer to target memory");
9670 Type *XElemTy =
X.ElemTy;
9673 "OMP atomic read expected a scalar type");
9675 Value *XRead =
nullptr;
9679 Builder.CreateLoad(XElemTy,
X.Var,
X.IsVolatile,
"omp.atomic.read");
9685 LoadInst *OldVal = Builder.CreateLoad(XElemTy,
X.Var,
"omp.atomic.read");
9688 unsigned LoadSize =
DL.getTypeStoreSize(XElemTy);
9689 OpenMPIRBuilder::AtomicInfo atomicInfo(
9690 &Builder, XElemTy, LoadSize * 8, LoadSize * 8, OldVal->
getAlign(),
9691 OldVal->
getAlign(),
true , AllocaIP,
X.Var);
9692 auto AtomicLoadRes = atomicInfo.EmitAtomicLoadLibcall(AO);
9693 XRead = AtomicLoadRes.first;
9700 Builder.CreateLoad(IntCastTy,
X.Var,
X.IsVolatile,
"omp.atomic.load");
9703 XRead = Builder.CreateBitCast(XLoad, XElemTy,
"atomic.flt.cast");
9705 XRead = Builder.CreateIntToPtr(XLoad, XElemTy,
"atomic.ptr.cast");
9708 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Read);
9709 Builder.CreateStore(XRead, V.Var, V.IsVolatile);
9710 return Builder.saveIP();
9713OpenMPIRBuilder::InsertPointTy
9714OpenMPIRBuilder::createAtomicWrite(
const LocationDescription &
Loc,
9715 AtomicOpValue &
X,
Value *Expr,
9717 if (!updateToLocation(
Loc))
9720 assert(
X.Var->getType()->isPointerTy() &&
9721 "OMP Atomic expects a pointer to target memory");
9722 Type *XElemTy =
X.ElemTy;
9725 "OMP atomic write expected a scalar type");
9728 StoreInst *XSt = Builder.CreateStore(Expr,
X.Var,
X.IsVolatile);
9731 LoadInst *OldVal = Builder.CreateLoad(XElemTy,
X.Var,
"omp.atomic.read");
9733 unsigned LoadSize =
DL.getTypeStoreSize(XElemTy);
9734 OpenMPIRBuilder::AtomicInfo atomicInfo(
9735 &Builder, XElemTy, LoadSize * 8, LoadSize * 8, OldVal->
getAlign(),
9736 OldVal->
getAlign(),
true , AllocaIP,
X.Var);
9737 atomicInfo.EmitAtomicStoreLibcall(AO, Expr);
9744 Builder.CreateBitCast(Expr, IntCastTy,
"atomic.src.int.cast");
9745 StoreInst *XSt = Builder.CreateStore(ExprCast,
X.Var,
X.IsVolatile);
9749 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Write);
9750 return Builder.saveIP();
9753OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createAtomicUpdate(
9754 const LocationDescription &
Loc, InsertPointTy AllocaIP, AtomicOpValue &
X,
9756 AtomicUpdateCallbackTy &UpdateOp,
bool IsXBinopExpr,
9757 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
9759 if (!updateToLocation(
Loc))
9763 Type *XTy =
X.Var->getType();
9765 "OMP Atomic expects a pointer to target memory");
9766 Type *XElemTy =
X.ElemTy;
9769 "OMP atomic update expected a scalar type");
9772 "OpenMP atomic does not support LT or GT operations");
9776 AllocaIP,
X.Var,
X.ElemTy, Expr, AO, RMWOp, UpdateOp,
X.IsVolatile,
9777 IsXBinopExpr, IsIgnoreDenormalMode, IsFineGrainedMemory, IsRemoteMemory);
9779 return AtomicResult.takeError();
9780 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Update);
9781 return Builder.saveIP();
9785Value *OpenMPIRBuilder::emitRMWOpAsInstruction(
Value *Src1,
Value *Src2,
9789 return Builder.CreateAdd(Src1, Src2);
9791 return Builder.CreateSub(Src1, Src2);
9793 return Builder.CreateAnd(Src1, Src2);
9795 return Builder.CreateNeg(Builder.CreateAnd(Src1, Src2));
9797 return Builder.CreateOr(Src1, Src2);
9799 return Builder.CreateXor(Src1, Src2);
9824 AtomicUpdateCallbackTy &UpdateOp,
bool VolatileX,
bool IsXBinopExpr,
9825 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
9828 bool emitRMWOp =
false;
9836 emitRMWOp = XElemTy;
9839 emitRMWOp = (IsXBinopExpr && XElemTy);
9846 std::pair<Value *, Value *> Res;
9851 if (IsIgnoreDenormalMode)
9852 RMWInst->
setMetadata(
"amdgpu.ignore.denormal.mode",
9854 if (!IsFineGrainedMemory)
9855 RMWInst->
setMetadata(
"amdgpu.no.fine.grained.memory",
9857 if (!IsRemoteMemory)
9861 Res.first = RMWInst;
9866 Res.second = Res.first;
9868 Res.second = emitRMWOpAsInstruction(Res.first, Expr, RMWOp);
9872 Builder.CreateLoad(XElemTy,
X,
X->getName() +
".atomic.load");
9878 OpenMPIRBuilder::AtomicInfo atomicInfo(
9879 &Builder, XElemTy, LoadSize * 8, LoadSize * 8, OldVal->
getAlign(),
9880 OldVal->
getAlign(),
true , AllocaIP,
X);
9881 auto AtomicLoadRes = atomicInfo.EmitAtomicLoadLibcall(AO);
9882 BasicBlock *CurBB = Builder.GetInsertBlock();
9884 CurBBTI = CurBBTI ? CurBBTI : Builder.CreateUnreachable();
9888 X->getName() +
".atomic.cont");
9890 Builder.restoreIP(AllocaIP);
9891 AllocaInst *NewAtomicAddr = Builder.CreateAlloca(XElemTy);
9892 NewAtomicAddr->
setName(
X->getName() +
"x.new.val");
9893 Builder.SetInsertPoint(ContBB);
9895 PHI->addIncoming(AtomicLoadRes.first, CurBB);
9900 Value *Upd = *CBResult;
9901 Builder.CreateStore(Upd, NewAtomicAddr);
9904 auto Result = atomicInfo.EmitAtomicCompareExchangeLibcall(
9905 AtomicLoadRes.second, NewAtomicAddr, AO, Failure);
9907 PHI->addIncoming(PHILoad, Builder.GetInsertBlock());
9908 Builder.CreateCondBr(
Result.second, ExitBB, ContBB);
9910 Res.first = OldExprVal;
9916 Builder.SetInsertPoint(ExitBB);
9918 Builder.SetInsertPoint(ExitTI);
9924 Builder.CreateLoad(IntCastTy,
X,
X->getName() +
".atomic.load");
9931 BasicBlock *CurBB = Builder.GetInsertBlock();
9933 CurBBTI = CurBBTI ? CurBBTI : Builder.CreateUnreachable();
9937 X->getName() +
".atomic.cont");
9939 Builder.restoreIP(AllocaIP);
9940 AllocaInst *NewAtomicAddr = Builder.CreateAlloca(XElemTy);
9941 NewAtomicAddr->
setName(
X->getName() +
"x.new.val");
9942 Builder.SetInsertPoint(ContBB);
9944 PHI->addIncoming(OldVal, CurBB);
9949 OldExprVal = Builder.CreateBitCast(
PHI, XElemTy,
9950 X->getName() +
".atomic.fltCast");
9952 OldExprVal = Builder.CreateIntToPtr(
PHI, XElemTy,
9953 X->getName() +
".atomic.ptrCast");
9960 Value *Upd = *CBResult;
9961 Builder.CreateStore(Upd, NewAtomicAddr);
9962 LoadInst *DesiredVal = Builder.CreateLoad(IntCastTy, NewAtomicAddr);
9967 Result->setVolatile(VolatileX);
9968 Value *PreviousVal = Builder.CreateExtractValue(Result, 0);
9969 Value *SuccessFailureVal = Builder.CreateExtractValue(Result, 1);
9970 PHI->addIncoming(PreviousVal, Builder.GetInsertBlock());
9971 Builder.CreateCondBr(SuccessFailureVal, ExitBB, ContBB);
9973 Res.first = OldExprVal;
9980 Builder.SetInsertPoint(ExitBB);
9982 Builder.SetInsertPoint(ExitTI);
9989OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::createAtomicCapture(
9990 const LocationDescription &
Loc, InsertPointTy AllocaIP, AtomicOpValue &
X,
9993 bool UpdateExpr,
bool IsPostfixUpdate,
bool IsXBinopExpr,
9994 bool IsIgnoreDenormalMode,
bool IsFineGrainedMemory,
bool IsRemoteMemory) {
9995 if (!updateToLocation(
Loc))
9999 Type *XTy =
X.Var->getType();
10001 "OMP Atomic expects a pointer to target memory");
10002 Type *XElemTy =
X.ElemTy;
10005 "OMP atomic capture expected a scalar type");
10007 "OpenMP atomic does not support LT or GT operations");
10014 AllocaIP,
X.Var,
X.ElemTy, Expr, AO, AtomicOp, UpdateOp,
X.IsVolatile,
10015 IsXBinopExpr, IsIgnoreDenormalMode, IsFineGrainedMemory, IsRemoteMemory);
10018 Value *CapturedVal =
10019 (IsPostfixUpdate ? AtomicResult->first : AtomicResult->second);
10020 Builder.CreateStore(CapturedVal, V.Var, V.IsVolatile);
10022 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Capture);
10023 return Builder.saveIP();
10026OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCompare(
10027 const LocationDescription &
Loc, AtomicOpValue &
X, AtomicOpValue &V,
10033 return createAtomicCompare(
Loc,
X, V, R,
E,
D, AO,
Op, IsXBinopExpr,
10034 IsPostfixUpdate, IsFailOnly, Failure);
10037OpenMPIRBuilder::InsertPointTy OpenMPIRBuilder::createAtomicCompare(
10038 const LocationDescription &
Loc, AtomicOpValue &
X, AtomicOpValue &V,
10043 if (!updateToLocation(
Loc))
10046 assert(
X.Var->getType()->isPointerTy() &&
10047 "OMP atomic expects a pointer to target memory");
10050 assert(V.Var->getType()->isPointerTy() &&
"v.var must be of pointer type");
10051 assert(V.ElemTy ==
X.ElemTy &&
"x and v must be of same type");
10054 bool IsInteger =
E->getType()->isIntegerTy();
10056 if (
Op == OMPAtomicCompareOp::EQ) {
10061 Value *EBCast = Builder.CreateBitCast(
E, IntCastTy);
10062 Value *DBCast = Builder.CreateBitCast(
D, IntCastTy);
10067 Builder.CreateAtomicCmpXchg(
X.Var,
E,
D,
MaybeAlign(), AO, Failure);
10071 Value *OldValue = Builder.CreateExtractValue(Result, 0);
10073 OldValue = Builder.CreateBitCast(OldValue,
X.ElemTy);
10075 "OldValue and V must be of same type");
10076 if (IsPostfixUpdate) {
10077 Builder.CreateStore(OldValue, V.Var, V.IsVolatile);
10079 Value *SuccessOrFail = Builder.CreateExtractValue(Result, 1);
10090 BasicBlock *CurBB = Builder.GetInsertBlock();
10092 CurBBTI = CurBBTI ? CurBBTI : Builder.CreateUnreachable();
10094 CurBBTI,
X.Var->getName() +
".atomic.exit");
10100 Builder.CreateCondBr(SuccessOrFail, ExitBB, ContBB);
10102 Builder.SetInsertPoint(ContBB);
10103 Builder.CreateStore(OldValue, V.Var);
10104 Builder.CreateBr(ExitBB);
10109 Builder.SetInsertPoint(ExitBB);
10111 Builder.SetInsertPoint(ExitTI);
10114 Value *CapturedValue =
10115 Builder.CreateSelect(SuccessOrFail,
E, OldValue);
10116 Builder.CreateStore(CapturedValue, V.Var, V.IsVolatile);
10122 assert(
R.Var->getType()->isPointerTy() &&
10123 "r.var must be of pointer type");
10124 assert(
R.ElemTy->isIntegerTy() &&
"r must be of integral type");
10126 Value *SuccessFailureVal = Builder.CreateExtractValue(Result, 1);
10127 Value *ResultCast =
R.IsSigned
10128 ? Builder.CreateSExt(SuccessFailureVal,
R.ElemTy)
10129 : Builder.CreateZExt(SuccessFailureVal,
R.ElemTy);
10130 Builder.CreateStore(ResultCast,
R.Var,
R.IsVolatile);
10133 assert((
Op == OMPAtomicCompareOp::MAX ||
Op == OMPAtomicCompareOp::MIN) &&
10134 "Op should be either max or min at this point");
10135 assert(!IsFailOnly &&
"IsFailOnly is only valid when the comparison is ==");
10146 if (IsXBinopExpr) {
10173 Builder.CreateAtomicRMW(NewOp,
X.Var,
E,
MaybeAlign(), AO);
10175 Value *CapturedValue =
nullptr;
10176 if (IsPostfixUpdate) {
10177 CapturedValue = OldValue;
10202 Value *NonAtomicCmp = Builder.CreateCmp(Pred, OldValue,
E);
10203 CapturedValue = Builder.CreateSelect(NonAtomicCmp,
E, OldValue);
10205 Builder.CreateStore(CapturedValue, V.Var, V.IsVolatile);
10209 checkAndEmitFlushAfterAtomic(
Loc, AO, AtomicKind::Compare);
10211 return Builder.saveIP();
10214OpenMPIRBuilder::InsertPointOrErrorTy
10215OpenMPIRBuilder::createTeams(
const LocationDescription &
Loc,
10216 BodyGenCallbackTy BodyGenCB,
Value *NumTeamsLower,
10219 if (!updateToLocation(
Loc))
10220 return InsertPointTy();
10223 Constant *SrcLocStr = getOrCreateSrcLocStr(
Loc, SrcLocStrSize);
10224 Value *Ident = getOrCreateIdent(SrcLocStr, SrcLocStrSize);
10229 if (&OuterAllocaBB == Builder.GetInsertBlock()) {
10230 BasicBlock *BodyBB = splitBB(Builder,
true,
"teams.entry");
10231 Builder.SetInsertPoint(BodyBB, BodyBB->
begin());
10251 BasicBlock *ExitBB = splitBB(Builder,
true,
"teams.exit");
10252 BasicBlock *BodyBB = splitBB(Builder,
true,
"teams.body");
10254 splitBB(Builder,
true,
"teams.alloca");
10256 bool SubClausesPresent =
10257 (NumTeamsLower || NumTeamsUpper || ThreadLimit || IfExpr);
10259 if (!Config.isTargetDevice() && SubClausesPresent) {
10260 assert((NumTeamsLower ==
nullptr || NumTeamsUpper !=
nullptr) &&
10261 "if lowerbound is non-null, then upperbound must also be non-null "
10262 "for bounds on num_teams");
10264 if (NumTeamsUpper ==
nullptr)
10265 NumTeamsUpper = Builder.getInt32(0);
10267 if (NumTeamsLower ==
nullptr)
10268 NumTeamsLower = NumTeamsUpper;
10272 "argument to if clause must be an integer value");
10276 IfExpr = Builder.CreateICmpNE(IfExpr,
10277 ConstantInt::get(IfExpr->
getType(), 0));
10278 NumTeamsUpper = Builder.CreateSelect(
10279 IfExpr, NumTeamsUpper, Builder.getInt32(1),
"numTeamsUpper");
10282 NumTeamsLower = Builder.CreateSelect(
10283 IfExpr, NumTeamsLower, Builder.getInt32(1),
"numTeamsLower");
10286 if (ThreadLimit ==
nullptr)
10287 ThreadLimit = Builder.getInt32(0);
10289 Value *ThreadNum = getOrCreateThreadID(Ident);
10290 createRuntimeFunctionCall(
10291 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_num_teams_51),
10292 {Ident, ThreadNum, NumTeamsLower, NumTeamsUpper, ThreadLimit});
10295 InsertPointTy AllocaIP(AllocaBB, AllocaBB->
begin());
10296 InsertPointTy CodeGenIP(BodyBB, BodyBB->
begin());
10297 if (
Error Err = BodyGenCB(AllocaIP, CodeGenIP))
10301 OI.EntryBB = AllocaBB;
10302 OI.ExitBB = ExitBB;
10303 OI.OuterAllocaBB = &OuterAllocaBB;
10307 InsertPointTy OuterAllocaIP(&OuterAllocaBB, OuterAllocaBB.
begin());
10309 Builder, OuterAllocaIP, ToBeDeleted, AllocaIP,
"gid",
true));
10311 Builder, OuterAllocaIP, ToBeDeleted, AllocaIP,
"tid",
true));
10313 auto HostPostOutlineCB = [
this, Ident,
10314 ToBeDeleted](
Function &OutlinedFn)
mutable {
10319 "there must be a single user for the outlined function");
10324 "Outlined function must have two or three arguments only");
10326 bool HasShared = OutlinedFn.
arg_size() == 3;
10334 assert(StaleCI &&
"Error while outlining - no CallInst user found for the "
10335 "outlined function.");
10336 Builder.SetInsertPoint(StaleCI);
10338 Ident, Builder.getInt32(StaleCI->
arg_size() - 2), &OutlinedFn};
10341 createRuntimeFunctionCall(
10342 getOrCreateRuntimeFunctionPtr(
10343 omp::RuntimeFunction::OMPRTL___kmpc_fork_teams),
10347 I->eraseFromParent();
10350 if (!Config.isTargetDevice())
10351 OI.PostOutlineCB = HostPostOutlineCB;
10353 addOutlineInfo(std::move(OI));
10355 Builder.SetInsertPoint(ExitBB, ExitBB->
begin());
10357 return Builder.saveIP();
10360OpenMPIRBuilder::InsertPointOrErrorTy
10361OpenMPIRBuilder::createDistribute(
const LocationDescription &
Loc,
10362 InsertPointTy OuterAllocaIP,
10363 BodyGenCallbackTy BodyGenCB) {
10364 if (!updateToLocation(
Loc))
10365 return InsertPointTy();
10367 BasicBlock *OuterAllocaBB = OuterAllocaIP.getBlock();
10369 if (OuterAllocaBB == Builder.GetInsertBlock()) {
10371 splitBB(Builder,
true,
"distribute.entry");
10372 Builder.SetInsertPoint(BodyBB, BodyBB->
begin());
10375 splitBB(Builder,
true,
"distribute.exit");
10377 splitBB(Builder,
true,
"distribute.body");
10379 splitBB(Builder,
true,
"distribute.alloca");
10382 InsertPointTy AllocaIP(AllocaBB, AllocaBB->
begin());
10383 InsertPointTy CodeGenIP(BodyBB, BodyBB->
begin());
10384 if (
Error Err = BodyGenCB(AllocaIP, CodeGenIP))
10389 if (Config.isTargetDevice()) {
10391 OI.OuterAllocaBB = OuterAllocaIP.getBlock();
10392 OI.EntryBB = AllocaBB;
10393 OI.ExitBB = ExitBB;
10395 addOutlineInfo(std::move(OI));
10397 Builder.SetInsertPoint(ExitBB, ExitBB->
begin());
10399 return Builder.saveIP();
10404 std::string VarName) {
10410 M, MapNamesArrayInit->
getType(),
10413 return MapNamesArrayGlobal;
10418void OpenMPIRBuilder::initializeTypes(
Module &M) {
10421 unsigned DefaultTargetAS = Config.getDefaultTargetAS();
10422 unsigned ProgramAS = M.getDataLayout().getProgramAddressSpace();
10423#define OMP_TYPE(VarName, InitValue) VarName = InitValue;
10424#define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \
10425 VarName##Ty = ArrayType::get(ElemTy, ArraySize); \
10426 VarName##PtrTy = PointerType::get(Ctx, DefaultTargetAS);
10427#define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \
10428 VarName = FunctionType::get(ReturnType, {__VA_ARGS__}, IsVarArg); \
10429 VarName##Ptr = PointerType::get(Ctx, ProgramAS);
10430#define OMP_STRUCT_TYPE(VarName, StructName, Packed, ...) \
10431 T = StructType::getTypeByName(Ctx, StructName); \
10433 T = StructType::create(Ctx, {__VA_ARGS__}, StructName, Packed); \
10435 VarName##Ptr = PointerType::get(Ctx, DefaultTargetAS);
10436#include "llvm/Frontend/OpenMP/OMPKinds.def"
10439void OpenMPIRBuilder::OutlineInfo::collectBlocks(
10447 while (!Worklist.
empty()) {
10451 if (
BlockSet.insert(SuccBB).second)
10460 if (!Config.isGPU()) {
10475 Fn->
addFnAttr(
"uniform-work-group-size",
"true");
10476 Fn->
addFnAttr(Attribute::MustProgress);
10480void OpenMPIRBuilder::createOffloadEntriesAndInfoMetadata(
10481 EmitMetadataErrorReportFunctionTy &ErrorFn) {
10484 if (OffloadInfoManager.empty())
10488 SmallVector<std::pair<
const OffloadEntriesInfoManager::OffloadEntryInfo *,
10489 TargetRegionEntryInfo>,
10491 OrderedEntries(OffloadInfoManager.size());
10494 auto &&GetMDInt = [
this](
unsigned V) {
10501 NamedMDNode *MD = M.getOrInsertNamedMetadata(
"omp_offload.info");
10502 auto &&TargetRegionMetadataEmitter =
10503 [&
C, MD, &OrderedEntries, &GetMDInt, &GetMDString](
10504 const TargetRegionEntryInfo &EntryInfo,
10505 const OffloadEntriesInfoManager::OffloadEntryInfoTargetRegion &
E) {
10518 GetMDInt(
E.getKind()), GetMDInt(EntryInfo.DeviceID),
10519 GetMDInt(EntryInfo.FileID), GetMDString(EntryInfo.ParentName),
10520 GetMDInt(EntryInfo.Line), GetMDInt(EntryInfo.Count),
10521 GetMDInt(
E.getOrder())};
10524 OrderedEntries[
E.getOrder()] = std::make_pair(&
E, EntryInfo);
10530 OffloadInfoManager.actOnTargetRegionEntriesInfo(TargetRegionMetadataEmitter);
10533 auto &&DeviceGlobalVarMetadataEmitter =
10534 [&
C, &OrderedEntries, &GetMDInt, &GetMDString, MD](
10536 const OffloadEntriesInfoManager::OffloadEntryInfoDeviceGlobalVar &
E) {
10544 Metadata *
Ops[] = {GetMDInt(
E.getKind()), GetMDString(MangledName),
10545 GetMDInt(
E.getFlags()), GetMDInt(
E.getOrder())};
10548 TargetRegionEntryInfo varInfo(MangledName, 0, 0, 0);
10549 OrderedEntries[
E.getOrder()] = std::make_pair(&
E, varInfo);
10555 OffloadInfoManager.actOnDeviceGlobalVarEntriesInfo(
10556 DeviceGlobalVarMetadataEmitter);
10558 for (
const auto &
E : OrderedEntries) {
10559 assert(
E.first &&
"All ordered entries must exist!");
10560 if (
const auto *CE =
10563 if (!
CE->getID() || !
CE->getAddress()) {
10565 TargetRegionEntryInfo EntryInfo =
E.second;
10566 StringRef FnName = EntryInfo.ParentName;
10567 if (!M.getNamedValue(FnName))
10569 ErrorFn(EMIT_MD_TARGET_REGION_ERROR, EntryInfo);
10572 createOffloadEntry(
CE->getID(),
CE->getAddress(),
10575 }
else if (
const auto *CE =
dyn_cast<
10576 OffloadEntriesInfoManager::OffloadEntryInfoDeviceGlobalVar>(
10578 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind Flags =
10579 static_cast<OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
>(
10582 case OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter:
10583 case OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo:
10584 if (Config.isTargetDevice() && Config.hasRequiresUnifiedSharedMemory())
10586 if (!
CE->getAddress()) {
10587 ErrorFn(EMIT_MD_DECLARE_TARGET_ERROR,
E.second);
10591 if (
CE->getVarSize() == 0)
10594 case OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink:
10595 assert(((Config.isTargetDevice() && !
CE->getAddress()) ||
10596 (!Config.isTargetDevice() &&
CE->getAddress())) &&
10597 "Declaret target link address is set.");
10598 if (Config.isTargetDevice())
10600 if (!
CE->getAddress()) {
10601 ErrorFn(EMIT_MD_GLOBAL_VAR_LINK_ERROR, TargetRegionEntryInfo());
10613 if ((
GV->hasLocalLinkage() ||
GV->hasHiddenVisibility()) &&
10614 Flags != OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect)
10619 if (Flags == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect)
10620 createOffloadEntry(
CE->getAddress(),
CE->getAddress(),
CE->getVarSize(),
10621 Flags,
CE->getLinkage(),
CE->getVarName());
10623 createOffloadEntry(
CE->getAddress(),
CE->getAddress(),
CE->getVarSize(),
10624 Flags,
CE->getLinkage());
10635 if (Config.hasRequiresFlags() && !Config.isTargetDevice())
10640 OffloadEntriesInfoManager::OMPTargetGlobalRegisterRequires,
10641 Config.getRequiresFlags());
10644void TargetRegionEntryInfo::getTargetRegionEntryFnName(
10646 unsigned FileID,
unsigned Line,
unsigned Count) {
10648 OS << KernelNamePrefix <<
llvm::format(
"%x", DeviceID)
10649 <<
llvm::format(
"_%x_", FileID) << ParentName <<
"_l" << Line;
10651 OS <<
"_" <<
Count;
10654void OffloadEntriesInfoManager::getTargetRegionEntryFnName(
10656 unsigned NewCount = getTargetRegionEntryInfoCount(EntryInfo);
10657 TargetRegionEntryInfo::getTargetRegionEntryFnName(
10658 Name, EntryInfo.ParentName, EntryInfo.DeviceID, EntryInfo.FileID,
10659 EntryInfo.Line, NewCount);
10662TargetRegionEntryInfo
10663OpenMPIRBuilder::getTargetEntryUniqueInfo(FileIdentifierInfoCallbackTy CallBack,
10667 auto FileIDInfo = CallBack();
10671 FileID =
Status->getUniqueID().getFile();
10675 FileID =
hash_value(std::get<0>(FileIDInfo));
10678 return TargetRegionEntryInfo(ParentName,
ID.getDevice(), FileID,
10679 std::get<1>(FileIDInfo));
10682unsigned OpenMPIRBuilder::getFlagMemberOffset() {
10685 static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
10687 !(Remain & 1); Remain = Remain >> 1)
10693OpenMPIRBuilder::getMemberOfFlag(
unsigned Position) {
10696 << getFlagMemberOffset());
10699void OpenMPIRBuilder::setCorrectMemberOfFlag(
10705 if (
static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
10707 static_cast<std::underlying_type_t<omp::OpenMPOffloadMappingFlags>
>(
10714 Flags &=
~omp::OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF;
10715 Flags |= MemberOfFlag;
10718Constant *OpenMPIRBuilder::getAddrOfDeclareTargetVar(
10719 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause,
10720 OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause,
10721 bool IsDeclaration,
bool IsExternallyVisible,
10722 TargetRegionEntryInfo EntryInfo,
StringRef MangledName,
10723 std::vector<GlobalVariable *> &GeneratedRefs,
bool OpenMPSIMD,
10724 std::vector<Triple> TargetTriple,
Type *LlvmPtrTy,
10725 std::function<
Constant *()> GlobalInitializer,
10732 if (CaptureClause == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink ||
10733 ((CaptureClause == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo ||
10735 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter) &&
10736 Config.hasRequiresUnifiedSharedMemory())) {
10741 if (!IsExternallyVisible)
10742 OS <<
format(
"_%x", EntryInfo.FileID);
10743 OS <<
"_decl_tgt_ref_ptr";
10746 Value *Ptr = M.getNamedValue(PtrName);
10750 Ptr = getOrCreateInternalVariable(LlvmPtrTy, PtrName);
10755 if (!Config.isTargetDevice()) {
10756 if (GlobalInitializer)
10757 GV->setInitializer(GlobalInitializer());
10762 registerTargetGlobalVariable(
10763 CaptureClause, DeviceClause, IsDeclaration, IsExternallyVisible,
10764 EntryInfo, MangledName, GeneratedRefs, OpenMPSIMD, TargetTriple,
10765 GlobalInitializer, VariableLinkage, LlvmPtrTy,
cast<Constant>(Ptr));
10774void OpenMPIRBuilder::registerTargetGlobalVariable(
10775 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind CaptureClause,
10776 OffloadEntriesInfoManager::OMPTargetDeviceClauseKind DeviceClause,
10777 bool IsDeclaration,
bool IsExternallyVisible,
10778 TargetRegionEntryInfo EntryInfo,
StringRef MangledName,
10779 std::vector<GlobalVariable *> &GeneratedRefs,
bool OpenMPSIMD,
10780 std::vector<Triple> TargetTriple,
10781 std::function<
Constant *()> GlobalInitializer,
10784 if (DeviceClause != OffloadEntriesInfoManager::OMPTargetDeviceClauseAny ||
10785 (TargetTriple.empty() && !Config.isTargetDevice()))
10788 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind Flags;
10793 if ((CaptureClause == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo ||
10795 OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter) &&
10796 !Config.hasRequiresUnifiedSharedMemory()) {
10797 Flags = OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
10801 if (!IsDeclaration)
10803 M.getDataLayout().getTypeSizeInBits(LlvmVal->
getValueType()), 8);
10810 if (Config.isTargetDevice() &&
10814 if (!OffloadInfoManager.hasDeviceGlobalVarEntryInfo(VarName))
10817 std::string RefName = createPlatformSpecificName({
VarName,
"ref"});
10819 if (!M.getNamedValue(RefName)) {
10821 getOrCreateInternalVariable(Addr->
getType(), RefName);
10823 GvAddrRef->setConstant(
true);
10825 GvAddrRef->setInitializer(Addr);
10826 GeneratedRefs.push_back(GvAddrRef);
10830 if (CaptureClause == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink)
10831 Flags = OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink;
10833 Flags = OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo;
10835 if (Config.isTargetDevice()) {
10839 Addr = getAddrOfDeclareTargetVar(
10840 CaptureClause, DeviceClause, IsDeclaration, IsExternallyVisible,
10841 EntryInfo, MangledName, GeneratedRefs, OpenMPSIMD, TargetTriple,
10842 LlvmPtrTy, GlobalInitializer, VariableLinkage);
10845 VarSize = M.getDataLayout().getPointerSize();
10849 OffloadInfoManager.registerDeviceGlobalVarEntryInfo(VarName, Addr, VarSize,
10855void OpenMPIRBuilder::loadOffloadInfoMetadata(
Module &M) {
10859 NamedMDNode *MD = M.getNamedMetadata(ompOffloadInfoName);
10864 auto &&GetMDInt = [MN](
unsigned Idx) {
10869 auto &&GetMDString = [MN](
unsigned Idx) {
10871 return V->getString();
10874 switch (GetMDInt(0)) {
10878 case OffloadEntriesInfoManager::OffloadEntryInfo::
10879 OffloadingEntryInfoTargetRegion: {
10880 TargetRegionEntryInfo EntryInfo(GetMDString(3),
10885 OffloadInfoManager.initializeTargetRegionEntryInfo(EntryInfo,
10889 case OffloadEntriesInfoManager::OffloadEntryInfo::
10890 OffloadingEntryInfoDeviceGlobalVar:
10891 OffloadInfoManager.initializeDeviceGlobalVarEntryInfo(
10893 static_cast<OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
>(
10903 if (HostFilePath.
empty())
10907 if (std::error_code Err = Buf.getError()) {
10909 "OpenMPIRBuilder: " +
10917 if (std::error_code Err = M.getError()) {
10919 (
"error parsing host file inside of OpenMPIRBuilder: " + Err.message())
10923 loadOffloadInfoMetadata(*M.get());
10930bool OffloadEntriesInfoManager::empty()
const {
10931 return OffloadEntriesTargetRegion.empty() &&
10932 OffloadEntriesDeviceGlobalVar.empty();
10935unsigned OffloadEntriesInfoManager::getTargetRegionEntryInfoCount(
10936 const TargetRegionEntryInfo &EntryInfo)
const {
10937 auto It = OffloadEntriesTargetRegionCount.find(
10938 getTargetRegionEntryCountKey(EntryInfo));
10939 if (It == OffloadEntriesTargetRegionCount.end())
10944void OffloadEntriesInfoManager::incrementTargetRegionEntryInfoCount(
10945 const TargetRegionEntryInfo &EntryInfo) {
10946 OffloadEntriesTargetRegionCount[getTargetRegionEntryCountKey(EntryInfo)] =
10947 EntryInfo.Count + 1;
10951void OffloadEntriesInfoManager::initializeTargetRegionEntryInfo(
10952 const TargetRegionEntryInfo &EntryInfo,
unsigned Order) {
10953 OffloadEntriesTargetRegion[EntryInfo] =
10954 OffloadEntryInfoTargetRegion(Order,
nullptr,
nullptr,
10955 OMPTargetRegionEntryTargetRegion);
10956 ++OffloadingEntriesNum;
10959void OffloadEntriesInfoManager::registerTargetRegionEntryInfo(
10961 OMPTargetRegionEntryKind Flags) {
10962 assert(EntryInfo.Count == 0 &&
"expected default EntryInfo");
10965 EntryInfo.Count = getTargetRegionEntryInfoCount(EntryInfo);
10969 if (OMPBuilder->Config.isTargetDevice()) {
10971 if (!hasTargetRegionEntryInfo(EntryInfo)) {
10974 auto &Entry = OffloadEntriesTargetRegion[EntryInfo];
10975 Entry.setAddress(Addr);
10977 Entry.setFlags(Flags);
10979 if (Flags == OffloadEntriesInfoManager::OMPTargetRegionEntryTargetRegion &&
10980 hasTargetRegionEntryInfo(EntryInfo,
true))
10982 assert(!hasTargetRegionEntryInfo(EntryInfo) &&
10983 "Target region entry already registered!");
10984 OffloadEntryInfoTargetRegion Entry(OffloadingEntriesNum, Addr,
ID, Flags);
10985 OffloadEntriesTargetRegion[EntryInfo] = Entry;
10986 ++OffloadingEntriesNum;
10988 incrementTargetRegionEntryInfoCount(EntryInfo);
10991bool OffloadEntriesInfoManager::hasTargetRegionEntryInfo(
10992 TargetRegionEntryInfo EntryInfo,
bool IgnoreAddressId)
const {
10995 EntryInfo.Count = getTargetRegionEntryInfoCount(EntryInfo);
10997 auto It = OffloadEntriesTargetRegion.find(EntryInfo);
10998 if (It == OffloadEntriesTargetRegion.end()) {
11002 if (!IgnoreAddressId && (It->second.getAddress() || It->second.getID()))
11007void OffloadEntriesInfoManager::actOnTargetRegionEntriesInfo(
11008 const OffloadTargetRegionEntryInfoActTy &Action) {
11010 for (
const auto &It : OffloadEntriesTargetRegion) {
11011 Action(It.first, It.second);
11015void OffloadEntriesInfoManager::initializeDeviceGlobalVarEntryInfo(
11016 StringRef Name, OMPTargetGlobalVarEntryKind Flags,
unsigned Order) {
11017 OffloadEntriesDeviceGlobalVar.try_emplace(Name, Order, Flags);
11018 ++OffloadingEntriesNum;
11021void OffloadEntriesInfoManager::registerDeviceGlobalVarEntryInfo(
11024 if (OMPBuilder->Config.isTargetDevice()) {
11026 if (!hasDeviceGlobalVarEntryInfo(VarName))
11028 auto &Entry = OffloadEntriesDeviceGlobalVar[
VarName];
11029 if (Entry.getAddress() && hasDeviceGlobalVarEntryInfo(VarName)) {
11030 if (Entry.getVarSize() == 0) {
11031 Entry.setVarSize(VarSize);
11036 Entry.setVarSize(VarSize);
11038 Entry.setAddress(Addr);
11040 if (hasDeviceGlobalVarEntryInfo(VarName)) {
11041 auto &Entry = OffloadEntriesDeviceGlobalVar[
VarName];
11042 assert(Entry.isValid() && Entry.getFlags() == Flags &&
11043 "Entry not initialized!");
11044 if (Entry.getVarSize() == 0) {
11045 Entry.setVarSize(VarSize);
11050 if (Flags == OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect)
11051 OffloadEntriesDeviceGlobalVar.try_emplace(VarName, OffloadingEntriesNum,
11052 Addr, VarSize, Flags,
Linkage,
11055 OffloadEntriesDeviceGlobalVar.try_emplace(
11056 VarName, OffloadingEntriesNum, Addr, VarSize, Flags,
Linkage,
"");
11057 ++OffloadingEntriesNum;
11061void OffloadEntriesInfoManager::actOnDeviceGlobalVarEntriesInfo(
11062 const OffloadDeviceGlobalVarEntryInfoActTy &Action) {
11064 for (
const auto &
E : OffloadEntriesDeviceGlobalVar)
11065 Action(
E.getKey(),
E.getValue());
11072void CanonicalLoopInfo::collectControlBlocks(
11079 BBs.
append({getPreheader(), Header,
Cond, Latch,
Exit, getAfter()});
11082BasicBlock *CanonicalLoopInfo::getPreheader()
const {
11091void CanonicalLoopInfo::setTripCount(
Value *TripCount) {
11103void CanonicalLoopInfo::mapIndVar(
11113 for (
Use &U : OldIV->
uses()) {
11117 if (
User->getParent() == getCond())
11119 if (
User->getParent() == getLatch())
11125 Value *NewIV = Updater(OldIV);
11128 for (
Use *U : ReplacableUses)
11136void CanonicalLoopInfo::assertOK()
const {
11149 "Preheader must terminate with unconditional branch");
11151 "Preheader must jump to header");
11155 "Header must terminate with unconditional branch");
11156 assert(Header->getSingleSuccessor() ==
Cond &&
11157 "Header must jump to exiting block");
11160 assert(
Cond->getSinglePredecessor() == Header &&
11161 "Exiting block only reachable from header");
11164 "Exiting block must terminate with conditional branch");
11166 "Exiting block must have two successors");
11168 "Exiting block's first successor jump to the body");
11170 "Exiting block's second successor must exit the loop");
11174 "Body only reachable from exiting block");
11179 "Latch must terminate with unconditional branch");
11188 "Exit block must terminate with unconditional branch");
11189 assert(
Exit->getSingleSuccessor() == After &&
11190 "Exit block must jump to after block");
11194 "After block only reachable from exit block");
11198 assert(IndVar &&
"Canonical induction variable not found?");
11200 "Induction variable must be an integer");
11202 "Induction variable must be a PHI in the loop header");
11208 auto *NextIndVar =
cast<PHINode>(IndVar)->getIncomingValue(1);
11215 Value *TripCount = getTripCount();
11216 assert(TripCount &&
"Loop trip count not found?");
11218 "Trip count and induction variable must have the same type");
11222 "Exit condition must be a signed less-than comparison");
11224 "Exit condition must compare the induction variable");
11226 "Exit condition must compare with the trip count");
11230void CanonicalLoopInfo::invalidate() {
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
Expand Atomic instructions
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This header defines various interfaces for pass management in LLVM.
iv Induction Variable Users
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
static cl::opt< unsigned > TileSize("fuse-matrix-tile-size", cl::init(4), cl::Hidden, cl::desc("Tile size for matrix instruction fusion using square-shaped tiles."))
uint64_t IntrinsicInst * II
#define OMP_KERNEL_ARG_VERSION
Provides definitions for Target specific Grid Values.
static Value * removeASCastIfPresent(Value *V)
static void createTargetLoopWorkshareCall(OpenMPIRBuilder *OMPBuilder, WorksharingLoopType LoopType, BasicBlock *InsertBlock, Value *Ident, Value *LoopBodyArg, Value *TripCount, Function &LoopBodyFn, bool NoLoop)
static void redirectTo(BasicBlock *Source, BasicBlock *Target, DebugLoc DL)
Make Source branch to Target.
static FunctionCallee getKmpcDistForStaticInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Value * createFakeIntVal(IRBuilderBase &Builder, OpenMPIRBuilder::InsertPointTy OuterAllocaIP, llvm::SmallVectorImpl< Instruction * > &ToBeDeleted, OpenMPIRBuilder::InsertPointTy InnerAllocaIP, const Twine &Name="", bool AsPtr=true)
static void applyParallelAccessesMetadata(CanonicalLoopInfo *CLI, LLVMContext &Ctx, Loop *Loop, LoopInfo &LoopInfo, SmallVector< Metadata * > &LoopMDList)
static FunctionCallee getKmpcForDynamicFiniForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for finalizing the dynamic loop using depending on type.
static Expected< Function * > createOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, StringRef FuncName, SmallVectorImpl< Value * > &Inputs, OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc, OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB)
static void FixupDebugInfoForOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, Function *Func, DenseMap< Value *, std::tuple< Value *, unsigned > > &ValueReplacementMap)
static OMPScheduleType getOpenMPOrderingScheduleType(OMPScheduleType BaseScheduleType, bool HasOrderedClause)
Adds ordering modifier flags to schedule type.
static OMPScheduleType getOpenMPMonotonicityScheduleType(OMPScheduleType ScheduleType, bool HasSimdModifier, bool HasMonotonic, bool HasNonmonotonic, bool HasOrderedClause)
Adds monotonicity modifier flags to schedule type.
static void workshareLoopTargetCallback(OpenMPIRBuilder *OMPIRBuilder, CanonicalLoopInfo *CLI, Value *Ident, Function &OutlinedFn, const SmallVector< Instruction *, 4 > &ToBeDeleted, WorksharingLoopType LoopType, bool NoLoop)
static bool isValidWorkshareLoopScheduleType(OMPScheduleType SchedType)
static llvm::CallInst * emitNoUnwindRuntimeCall(IRBuilder<> &Builder, llvm::FunctionCallee Callee, ArrayRef< llvm::Value * > Args, const llvm::Twine &Name)
static Error populateReductionFunction(Function *ReductionFunc, ArrayRef< OpenMPIRBuilder::ReductionInfo > ReductionInfos, IRBuilder<> &Builder, ArrayRef< bool > IsByRef, bool IsGPU)
static Function * getFreshReductionFunc(Module &M)
static void raiseUserConstantDataAllocasToEntryBlock(IRBuilderBase &Builder, Function *Function)
static FunctionCallee getKmpcForDynamicNextForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for updating the next loop using OpenMP dynamic scheduling depending...
static bool isConflictIP(IRBuilder<>::InsertPoint IP1, IRBuilder<>::InsertPoint IP2)
Return whether IP1 and IP2 are ambiguous, i.e.
static void checkReductionInfos(ArrayRef< OpenMPIRBuilder::ReductionInfo > ReductionInfos, bool IsGPU)
static Type * getOffloadingArrayType(Value *V)
static OMPScheduleType getOpenMPBaseScheduleType(llvm::omp::ScheduleKind ClauseKind, bool HasChunks, bool HasSimdModifier, bool HasDistScheduleChunks)
Determine which scheduling algorithm to use, determined from schedule clause arguments.
static OMPScheduleType computeOpenMPScheduleType(ScheduleKind ClauseKind, bool HasChunks, bool HasSimdModifier, bool HasMonotonicModifier, bool HasNonmonotonicModifier, bool HasOrderedClause, bool HasDistScheduleChunks)
Determine the schedule type using schedule and ordering clause arguments.
static FunctionCallee getKmpcForDynamicInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
Returns an LLVM function to call for initializing loop bounds using OpenMP dynamic scheduling dependi...
static StructType * createTaskWithPrivatesTy(OpenMPIRBuilder &OMPIRBuilder, ArrayRef< Value * > OffloadingArraysToPrivatize)
static cl::opt< double > UnrollThresholdFactor("openmp-ir-builder-unroll-threshold-factor", cl::Hidden, cl::desc("Factor for the unroll threshold to account for code " "simplifications still taking place"), cl::init(1.5))
static int32_t computeHeuristicUnrollFactor(CanonicalLoopInfo *CLI)
Heuristically determine the best-performant unroll factor for CLI.
static Value * emitTaskDependencies(OpenMPIRBuilder &OMPBuilder, const SmallVectorImpl< OpenMPIRBuilder::DependData > &Dependencies)
static Error emitTargetOutlinedFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, bool IsOffloadEntry, TargetRegionEntryInfo &EntryInfo, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, Function *&OutlinedFn, Constant *&OutlinedFnID, SmallVectorImpl< Value * > &Inputs, OpenMPIRBuilder::TargetBodyGenCallbackTy &CBFunc, OpenMPIRBuilder::TargetGenArgAccessorsCallbackTy &ArgAccessorFuncCB)
static void updateNVPTXAttr(Function &Kernel, StringRef Name, int32_t Value, bool Min)
static OpenMPIRBuilder::InsertPointTy getInsertPointAfterInstr(Instruction *I)
static void redirectAllPredecessorsTo(BasicBlock *OldTarget, BasicBlock *NewTarget, DebugLoc DL)
Redirect all edges that branch to OldTarget to NewTarget.
static std::unique_ptr< TargetMachine > createTargetMachine(Function *F, CodeGenOptLevel OptLevel)
Create the TargetMachine object to query the backend for optimization preferences.
static FunctionCallee getKmpcForStaticInitForType(Type *Ty, Module &M, OpenMPIRBuilder &OMPBuilder)
static void addAccessGroupMetadata(BasicBlock *Block, MDNode *AccessGroup, LoopInfo &LI)
Attach llvm.access.group metadata to the memref instructions of Block.
static void addBasicBlockMetadata(BasicBlock *BB, ArrayRef< Metadata * > Properties)
Attach metadata Properties to the basic block described by BB.
static void restoreIPandDebugLoc(llvm::IRBuilderBase &Builder, llvm::IRBuilderBase::InsertPoint IP)
This is wrapper over IRBuilderBase::restoreIP that also restores the current debug location to the la...
static LoadInst * loadSharedDataFromTaskDescriptor(OpenMPIRBuilder &OMPIRBuilder, IRBuilderBase &Builder, Value *TaskWithPrivates, Type *TaskWithPrivatesTy)
Given a task descriptor, TaskWithPrivates, return the pointer to the block of pointers containing sha...
static cl::opt< bool > OptimisticAttributes("openmp-ir-builder-optimistic-attributes", cl::Hidden, cl::desc("Use optimistic attributes describing " "'as-if' properties of runtime calls."), cl::init(false))
static FunctionCallee getKmpcForStaticLoopForType(Type *Ty, OpenMPIRBuilder *OMPBuilder, WorksharingLoopType LoopType)
static void emitTargetCall(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, OpenMPIRBuilder::InsertPointTy AllocaIP, OpenMPIRBuilder::TargetDataInfo &Info, const OpenMPIRBuilder::TargetKernelDefaultAttrs &DefaultAttrs, const OpenMPIRBuilder::TargetKernelRuntimeAttrs &RuntimeAttrs, Value *IfCond, Function *OutlinedFn, Constant *OutlinedFnID, SmallVectorImpl< Value * > &Args, OpenMPIRBuilder::GenMapInfoCallbackTy GenMapInfoCB, OpenMPIRBuilder::CustomMapperCallbackTy CustomMapperCB, const SmallVector< llvm::OpenMPIRBuilder::DependData > &Dependencies, bool HasNoWait, Value *DynCGroupMem, OMPDynGroupprivateFallbackType DynCGroupMemFallback)
static const omp::GV & getGridValue(const Triple &T, Function *Kernel)
static Function * emitTargetTaskProxyFunction(OpenMPIRBuilder &OMPBuilder, IRBuilderBase &Builder, CallInst *StaleCI, StructType *PrivatesTy, StructType *TaskWithPrivatesTy, const size_t NumOffloadingArrays, const int SharedArgsOperandNo)
Create an entry point for a target task with the following.
static void addLoopMetadata(CanonicalLoopInfo *Loop, ArrayRef< Metadata * > Properties)
Attach loop metadata Properties to the loop described by Loop.
static void removeUnusedBlocksFromParent(ArrayRef< BasicBlock * > BBs)
Determine which blocks in BBs are reachable from outside and remove the ones that are not reachable f...
static void targetParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn, Function *OuterFn, BasicBlock *OuterAllocaBB, Value *Ident, Value *IfCondition, Value *NumThreads, Instruction *PrivTID, AllocaInst *PrivTIDAddr, Value *ThreadID, const SmallVector< Instruction *, 4 > &ToBeDeleted)
static void hostParallelCallback(OpenMPIRBuilder *OMPIRBuilder, Function &OutlinedFn, Function *OuterFn, Value *Ident, Value *IfCondition, Instruction *PrivTID, AllocaInst *PrivTIDAddr, const SmallVector< Instruction *, 4 > &ToBeDeleted)
FunctionAnalysisManager FAM
This file defines the Pass Instrumentation classes that provide instrumentation points into the pass ...
const SmallVectorImpl< MachineOperand > & Cond
static cl::opt< RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode > Mode("regalloc-enable-advisor", cl::Hidden, cl::init(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default), cl::desc("Enable regalloc advisor mode"), cl::values(clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Default, "default", "Default"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Release, "release", "precompiled"), clEnumValN(RegAllocEvictionAdvisorAnalysisLegacy::AdvisorMode::Development, "development", "for training")))
Remove Loads Into Fake Uses
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
unsigned unsigned DefaultVal
std::unordered_set< BasicBlock * > BlockSet
This file implements the SmallBitVector class.
This file defines the SmallSet class.
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
static SymbolRef::Type getType(const Symbol *Sym)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Defines the virtual file system interface vfs::FileSystem.
static cl::opt< unsigned > MaxThreads("xcore-max-threads", cl::Optional, cl::desc("Maximum number of threads (for emulation thread-local storage)"), cl::Hidden, cl::value_desc("number"), cl::init(8))
static const uint32_t IV[8]
Class for arbitrary precision integers.
This class represents a conversion between pointers from one address space to another.
an instruction to allocate memory on the stack
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
PointerType * getType() const
Overload to return most specific pointer type.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
unsigned getAddressSpace() const
Return the address space for the allocation.
LLVM_ABI std::optional< TypeSize > getAllocationSize(const DataLayout &DL) const
Get allocation size in bytes.
void setAlignment(Align Align)
const Value * getArraySize() const
Get the number of elements allocated.
This class represents an incoming formal argument to a Function.
unsigned getArgNo() const
Return the index of this formal argument in its containing function.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
A function analysis which provides an AssumptionCache.
LLVM_ABI AssumptionCache run(Function &F, FunctionAnalysisManager &)
A cache of @llvm.assume calls within a function.
An instruction that atomically checks whether a specified value is in a memory location,...
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
an instruction that atomically reads a memory location, combines it with another value,...
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ USubCond
Subtract only if no unsigned overflow.
@ FMinimum
*p = minimum(old, v) minimum matches the behavior of llvm.minimum.
@ Min
*p = old <signed v ? old : v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ FMaximum
*p = maximum(old, v) maximum matches the behavior of llvm.maximum.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ UMax
*p = old >unsigned v ? old : v
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
This class holds the attributes for a particular argument, parameter, function, or return value.
LLVM_ABI AttributeSet addAttributes(LLVMContext &C, AttributeSet AS) const
Add attributes to the attribute set.
LLVM_ABI AttributeSet addAttribute(LLVMContext &C, Attribute::AttrKind Kind) const
Add an argument attribute.
LLVM Basic Block Representation.
LLVM_ABI void replaceSuccessorsPhiUsesWith(BasicBlock *Old, BasicBlock *New)
Update all phi nodes in this basic block's successors to refer to basic block New instead of basic bl...
iterator begin()
Instruction iterator methods.
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
reverse_iterator rbegin()
const Instruction & back() const
LLVM_ABI InstListType::const_iterator getFirstNonPHIIt() const
Returns an iterator to the first instruction in this block that is not a PHINode instruction.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
LLVM_ABI InstListType::const_iterator getFirstNonPHIOrDbg(bool SkipPseudoOp=true) const
Returns a pointer to the first instruction in this block that is not a PHINode or a debug intrinsic,...
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
LLVM_ABI const BasicBlock * getUniqueSuccessor() const
Return the successor of this block if it has a unique successor.
LLVM_ABI const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
const Instruction & front() const
InstListType::reverse_iterator reverse_iterator
LLVM_ABI const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
LLVM_ABI SymbolTableList< BasicBlock >::iterator eraseFromParent()
Unlink 'this' from the containing function and delete it.
InstListType::iterator iterator
Instruction iterators...
LLVM_ABI LLVMContext & getContext() const
Get the context in which this basic block lives.
void moveBefore(BasicBlock *MovePos)
Unlink this basic block from its current function and insert it into the function that MovePos lives ...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
void splice(BasicBlock::iterator ToIt, BasicBlock *FromBB)
Transfer all instructions from FromBB to this basic block at ToIt.
LLVM_ABI const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
LLVM_ABI void removePredecessor(BasicBlock *Pred, bool KeepOneInputPHIs=false)
Update PHI nodes in this BasicBlock before removal of predecessor Pred.
Conditional or Unconditional Branch instruction.
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Value * getArgOperand(unsigned i) const
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
@ ICMP_SLT
signed less than
@ ICMP_SLE
signed less or equal
@ FCMP_OLT
0 1 0 0 True if ordered and less than
@ FCMP_OGT
0 0 1 0 True if ordered and greater than
@ ICMP_UGT
unsigned greater than
@ ICMP_SGT
signed greater than
@ ICMP_ULT
unsigned less than
@ ICMP_ULE
unsigned less or equal
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static LLVM_ABI Constant * getString(LLVMContext &Context, StringRef Initializer, bool AddNull=true)
This method constructs a CDS and initializes it with a text string.
static Constant * get(LLVMContext &Context, ArrayRef< ElementTy > Elts)
get() constructor - Return a constant with array type with an element count and element type matching...
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getTruncOrBitCast(Constant *C, Type *Ty)
static LLVM_ABI Constant * getPointerBitCastOrAddrSpaceCast(Constant *C, Type *Ty)
Create a BitCast or AddrSpaceCast for a pointer type depending on the address space.
static LLVM_ABI Constant * getSizeOf(Type *Ty)
getSizeOf constant expr - computes the (alloc) size of a type (in address-units, not bits) in a targe...
static LLVM_ABI Constant * getAddrSpaceCast(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
This is an important base class in LLVM.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
DILocalScope * getScope() const
Get the local scope for this variable.
DINodeArray getAnnotations() const
Subprogram description. Uses SubclassData1.
uint32_t getAlignInBits() const
StringRef getName() const
A parsed version of the target data layout string in and methods for querying it.
TypeSize getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type.
Record of a variable value-assignment, aka a non instruction representation of the dbg....
Analysis pass which computes a DominatorTree.
LLVM_ABI DominatorTree run(Function &F, FunctionAnalysisManager &)
Run the analysis pass over a function and produce a dominator tree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Represents either an error or a value T.
Lightweight error class with error context and mandatory checking.
static ErrorSuccess success()
Create a success value.
Tagged union holding either a T or a Error.
Error takeError()
Take ownership of the stored error.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Type * getParamType(unsigned i) const
Parameter type accessors.
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
void addFnAttr(Attribute::AttrKind Kind)
Add function attributes to this function.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
const BasicBlock & getEntryBlock() const
FunctionType * getFunctionType() const
Returns the FunctionType for me.
void removeFromParent()
removeFromParent - This method unlinks 'this' from the containing module, but does not delete it.
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
AttributeList getAttributes() const
Return the attribute list for this Function.
const Function & getFunction() const
void setAttributes(AttributeList Attrs)
Set the attribute list for this Function.
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
void addParamAttr(unsigned ArgNo, Attribute::AttrKind Kind)
adds the attribute to the list of attributes for the given arg.
Function::iterator insert(Function::iterator Position, BasicBlock *BB)
Insert BB in the basic block list at Position.
Type * getReturnType() const
Returns the type of the ret val.
void setCallingConv(CallingConv::ID CC)
Argument * getArg(unsigned i) const
bool hasMetadata() const
Return true if this value has any metadata attached to it.
LLVM_ABI void addMetadata(unsigned KindID, MDNode &MD)
Add a metadata attachment.
LinkageTypes getLinkage() const
void setLinkage(LinkageTypes LT)
Module * getParent()
Get the module that this global value is contained inside of...
void setDSOLocal(bool Local)
PointerType * getType() const
Global values are always pointers.
@ HiddenVisibility
The GV is hidden.
@ ProtectedVisibility
The GV is protected.
void setVisibility(VisibilityTypes V)
LinkageTypes
An enumeration for the kinds of linkage for global values.
@ PrivateLinkage
Like Internal, but omit from symbol table.
@ CommonLinkage
Tentative definitions.
@ InternalLinkage
Rename collisions when linking (static functions).
@ WeakODRLinkage
Same, but only replaced by something equivalent.
@ WeakAnyLinkage
Keep one copy of named function when linking (weak)
@ AppendingLinkage
Special purpose, only applies to global arrays.
@ LinkOnceODRLinkage
Same, but only replaced by something equivalent.
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
LLVM_ABI void setInitializer(Constant *InitVal)
setInitializer - Sets the initializer for this global variable, removing any existing initializer if ...
InsertPoint - A saved insertion point.
BasicBlock * getBlock() const
BasicBlock::iterator getPoint() const
Common base class shared among various IRBuilders.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
LLVM_ABI const DebugLoc & getStableDebugLoc() const
Fetch the debug location for this node, unless this is a debug intrinsic, in which case fetch the deb...
LLVM_ABI void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI BasicBlock * getSuccessor(unsigned Idx) const LLVM_READONLY
Return the specified successor. This instruction must be a terminator.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI void moveBeforePreserving(InstListType::iterator MovePos)
Perform a moveBefore operation, while signalling that the caller intends to preserve the original ord...
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void insertAfter(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately after the specified instruction.
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
Value * getPointerOperand()
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
Align getAlign() const
Return the alignment of the access that is being performed.
Analysis pass that exposes the LoopInfo for a function.
LLVM_ABI LoopInfo run(Function &F, FunctionAnalysisManager &AM)
ArrayRef< BlockT * > getBlocks() const
Get a list of the basic blocks which make up this loop.
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
This class represents a loop nest and can be used to query its properties.
Represents a single loop in the control flow graph.
LLVM_ABI MDNode * createCallbackEncoding(unsigned CalleeArgNo, ArrayRef< int > Arguments, bool VarArgsArePassed)
Return metadata describing a callback (see llvm::AbstractCallSite).
LLVM_ABI void replaceOperandWith(unsigned I, Metadata *New)
Replace a specific operand.
static MDTuple * getDistinct(LLVMContext &Context, ArrayRef< Metadata * > MDs)
ArrayRef< MDOperand > operands() const
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
This class implements a map that also provides access to all stored values in a deterministic order.
A Module instance is used to store all the information related to an LLVM module.
const Triple & getTargetTriple() const
Get the target triple which is a string describing the target host.
LLVMContext & getContext() const
Get the global data context.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
iterator_range< op_iterator > operands()
unsigned getOpcode() const
Return the opcode for this Instruction or ConstantExpr.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Pseudo-analysis pass that exposes the PassInstrumentation to pass managers.
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Analysis pass that exposes the ScalarEvolution for a function.
LLVM_ABI ScalarEvolution run(Function &F, FunctionAnalysisManager &AM)
The main scalar evolution driver.
A vector that has set insertion semantics.
bool remove_if(UnaryPredicate P)
Remove items from the set vector based on a predicate function.
This is a 'bitvector' (really, a variable-sized bit array), optimized for the case when the array is ...
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
bool remove_if(UnaryPredicate P)
Remove elements that match the given predicate.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
void append(StringRef RHS)
Append from a StringRef.
StringRef str() const
Explicit conversion to StringRef.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAlignment(Align Align)
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
StringMap - This is an unconventional map that is specialized for handling keys that are "strings",...
ValueTy lookup(StringRef Key) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
StringRef - Represent a constant reference to a string, i.e.
std::string str() const
str - Get the contents as an std::string.
constexpr bool empty() const
empty - Check if the string is empty.
constexpr size_t size() const
size - Get the string size.
size_t count(char C) const
Return the number of occurrences of C in the string.
bool ends_with(StringRef Suffix) const
Check if this string ends with the given Suffix.
StringRef drop_back(size_t N=1) const
Return a StringRef equal to 'this' but with the last N elements dropped.
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
static LLVM_ABI StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
Type * getElementType(unsigned N) const
LLVM_ABI void addCase(ConstantInt *OnVal, BasicBlock *Dest)
Add an entry to the switch instruction.
Analysis pass providing the TargetTransformInfo.
LLVM_ABI Result run(const Function &F, FunctionAnalysisManager &)
TargetTransformInfo Result
Analysis pass providing the TargetLibraryInfo.
Target - Wrapper for Target specific information.
TargetMachine * createTargetMachine(const Triple &TT, StringRef CPU, StringRef Features, const TargetOptions &Options, std::optional< Reloc::Model > RM, std::optional< CodeModel::Model > CM=std::nullopt, CodeGenOptLevel OL=CodeGenOptLevel::Default, bool JIT=false) const
createTargetMachine - Create a target specific machine implementation for the specified Triple.
Triple - Helper class for working with autoconf configuration names.
bool isPPC() const
Tests whether the target is PowerPC (32- or 64-bit LE or BE).
bool isX86() const
Tests whether the target is x86 (32- or 64-bit).
ArchType getArch() const
Get the parsed architecture type of this triple.
bool isWasm() const
Tests whether the target is wasm (32- and 64-bit).
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
LLVM_ABI std::string str() const
Return the twine contents as a std::string.
The instances of the Type class are immutable: once they are created, they are never changed.
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
LLVM_ABI unsigned getIntegerBitWidth() const
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
bool isPointerTy() const
True if this is an instance of PointerType.
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
bool isStructTy() const
True if this is an instance of StructType.
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
static LLVM_ABI IntegerType * getInt1Ty(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
bool isVoidTy() const
Return true if this is 'void'.
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
This function has undefined behavior.
Produce an estimate of the unrolled cost of the specified loop.
LLVM_ABI bool canUnroll() const
Whether it is legal to unroll this loop.
uint64_t getRolledLoopSize() const
A Use represents the edge between a Value definition and its users.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
LLVM_ABI bool hasNUses(unsigned N) const
Return true if this Value has exactly N uses.
LLVM_ABI void replaceUsesWithIf(Value *New, llvm::function_ref< bool(Use &U)> ShouldReplace)
Go through the uses list for this definition and make each use point to "V" if the callback ShouldRep...
LLVM_ABI User * getUniqueUndroppableUser()
Return true if there is exactly one unique user of this value that cannot be dropped (that user can h...
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
iterator_range< use_iterator > uses()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
A raw_ostream that writes to an SmallVector or SmallString.
The virtual file system interface.
llvm::ErrorOr< std::unique_ptr< llvm::MemoryBuffer > > getBufferForFile(const Twine &Name, int64_t FileSize=-1, bool RequiresNullTerminator=true, bool IsVolatile=false, bool IsText=true)
This is a convenience method that opens a file, gets its content and then closes the file.
virtual llvm::ErrorOr< Status > status(const Twine &Path)=0
Get the status of the entry at Path, if one exists.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr char Attrs[]
Key for Kernel::Metadata::mAttrs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
@ CE
Windows NT (Windows on ARM)
initializer< Ty > init(const Ty &Val)
@ Switch
The "resume-switch" lowering, where there are separate resume and destroy functions that are shared b...
LLVM_ABI GlobalVariable * emitOffloadingEntry(Module &M, object::OffloadKind Kind, Constant *Addr, StringRef Name, uint64_t Size, uint32_t Flags, uint64_t Data, Constant *AuxAddr=nullptr, StringRef SectionName="llvm_offload_entries")
Create an offloading section struct used to register this global at runtime.
OpenMPOffloadMappingFlags
Values for bit flags used to specify the mapping type for offloading.
@ OMP_MAP_PTR_AND_OBJ
The element being mapped is a pointer-pointee pair; both the pointer and the pointee should be mapped...
@ OMP_MAP_MEMBER_OF
The 16 MSBs of the flags indicate whether the entry is member of some struct/class.
@ OMP_DEVICEID_UNDEF
Device ID if the device was not defined, runtime should get it from environment variables in the spec...
IdentFlag
IDs for all omp runtime library ident_t flag encodings (see their defintion in openmp/runtime/src/kmp...
RuntimeFunction
IDs for all omp runtime library (RTL) functions.
constexpr const GV & getAMDGPUGridValues()
static constexpr GV SPIRVGridValues
For generic SPIR-V GPUs.
OMPDynGroupprivateFallbackType
The fallback types for the dyn_groupprivate clause.
@ Null
Return null pointer.
static constexpr GV NVPTXGridValues
For Nvidia GPUs.
Function * Kernel
Summary of a kernel (=entry point for target offloading).
WorksharingLoopType
A type of worksharing loop construct.
OMPAtomicCompareOp
Atomic compare operations. Currently OpenMP only supports ==, >, and <.
NodeAddr< PhiNode * > Phi
NodeAddr< FuncNode * > Func
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
detail::zippy< detail::zip_shortest, T, U, Args... > zip(T &&t, U &&u, Args &&...args)
zip iterator for two or more iteratable types.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
hash_code hash_value(const FixedPointSemantics &Val)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI Expected< std::unique_ptr< Module > > parseBitcodeFile(MemoryBufferRef Buffer, LLVMContext &Context, ParserCallbacks Callbacks={})
Read the specified bitcode file, returning the module.
detail::zippy< detail::zip_first, T, U, Args... > zip_equal(T &&t, U &&u, Args &&...args)
zip iterator that assumes that all iteratees have the same length.
LLVM_ABI BasicBlock * CloneBasicBlock(const BasicBlock *BB, ValueToValueMapTy &VMap, const Twine &NameSuffix="", Function *F=nullptr, ClonedCodeInfo *CodeInfo=nullptr, bool MapAtoms=true)
Return a copy of the specified basic block, but without embedding the block into a particular functio...
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
unsigned getPointerAddressSpace(const Type *T)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
auto successors(const MachineBasicBlock *BB)
testing::Matcher< const detail::ErrorHolder & > Failed()
constexpr from_range_t from_range
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE()
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
std::string utostr(uint64_t X, bool isNeg=false)
ErrorOr< T > expectedToErrorOrAndEmitErrors(LLVMContext &Ctx, Expected< T > Val)
bool isa_and_nonnull(const Y &Val)
LLVM_ABI bool convertUsersOfConstantsToInstructions(ArrayRef< Constant * > Consts, Function *RestrictToFunc=nullptr, bool RemoveDeadConstants=true, bool IncludeSelf=false)
Replace constant expressions users of the given constants with instructions.
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
auto reverse(ContainerTy &&C)
TargetTransformInfo::PeelingPreferences gatherPeelingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, std::optional< bool > UserAllowPeeling, std::optional< bool > UserAllowProfileBasedPeeling, bool UnrollingSpecficValues=false)
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
FunctionAddr VTableAddr Count
CodeGenOptLevel
Code generation optimization level.
LLVM_ABI bool computeUnrollCount(Loop *L, const TargetTransformInfo &TTI, DominatorTree &DT, LoopInfo *LI, AssumptionCache *AC, ScalarEvolution &SE, const SmallPtrSetImpl< const Value * > &EphValues, OptimizationRemarkEmitter *ORE, unsigned TripCount, unsigned MaxTripCount, bool MaxOrZero, unsigned TripMultiple, const UnrollCostEstimator &UCE, TargetTransformInfo::UnrollingPreferences &UP, TargetTransformInfo::PeelingPreferences &PP, bool &UseUpperBound)
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ LLVM_MARK_AS_BITMASK_ENUM
constexpr T divideCeil(U Numerator, V Denominator)
Returns the integer ceil(Numerator / Denominator).
void cantFail(Error Err, const char *Msg=nullptr)
Report a fatal error if Err is a failure value.
LLVM_ABI bool MergeBlockIntoPredecessor(BasicBlock *BB, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, MemoryDependenceResults *MemDep=nullptr, bool PredecessorWithTwoSuccessors=false, DominatorTree *DT=nullptr)
Attempts to merge a block into its predecessor, if possible.
FunctionAddr VTableAddr Next
DWARFExpression::Operation Op
LLVM_ABI void remapInstructionsInBlocks(ArrayRef< BasicBlock * > Blocks, ValueToValueMapTy &VMap)
Remaps instructions in Blocks using the mapping in VMap.
LLVM_ABI TargetTransformInfo::UnrollingPreferences gatherUnrollingPreferences(Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI, llvm::OptimizationRemarkEmitter &ORE, int OptLevel, std::optional< unsigned > UserThreshold, std::optional< unsigned > UserCount, std::optional< bool > UserAllowPartial, std::optional< bool > UserRuntime, std::optional< bool > UserUpperBound, std::optional< unsigned > UserFullUnrollMaxCount)
Gather the various unrolling parameters based on the defaults, compiler flags, TTI overrides and user...
ValueMap< const Value *, WeakTrackingVH > ValueToValueMapTy
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
auto predecessors(const MachineBasicBlock *BB)
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
LLVM_ABI Constant * ConstantFoldInsertValueInstruction(Constant *Agg, Constant *Val, ArrayRef< unsigned > Idxs)
Attempt to constant fold an insertvalue instruction with the specified operands and indices.
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI void DeleteDeadBlocks(ArrayRef< BasicBlock * > BBs, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified blocks from BB.
bool to_integer(StringRef S, N &Num, unsigned Base=0)
Convert the string S to an integer of the specified type using the radix Base. If Base is 0,...
static auto filterDbgVars(iterator_range< simple_ilist< DbgRecord >::iterator > R)
Filter the DbgRecord range to DbgVariableRecord types only and downcast.
This struct is a compact representation of a valid (non-zero power of two) alignment.
static LLVM_ABI void collectEphemeralValues(const Loop *L, AssumptionCache *AC, SmallPtrSetImpl< const Value * > &EphValues)
Collect a loop's ephemeral values (those used only by an assume or similar intrinsics in the loop).
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
static const Target * lookupTarget(StringRef TripleStr, std::string &Error)
lookupTarget - Lookup a target based on a target triple.
Defines various target-specific GPU grid values that must be consistent between host RTL (plugin),...