62#define DEBUG_TYPE "openmp-ir-builder"
69 cl::desc(
"Use optimistic attributes describing "
70 "'as-if' properties of runtime calls."),
74 "openmp-ir-builder-unroll-threshold-factor",
cl::Hidden,
75 cl::desc(
"Factor for the unroll threshold to account for code "
76 "simplifications still taking place"),
87 if (!IP1.isSet() || !IP2.isSet())
89 return IP1.getBlock() == IP2.getBlock() && IP1.getPoint() == IP2.getPoint();
94 switch (SchedType & ~OMPScheduleType::MonotonicityMask) {
95 case OMPScheduleType::UnorderedStaticChunked:
96 case OMPScheduleType::UnorderedStatic:
97 case OMPScheduleType::UnorderedDynamicChunked:
98 case OMPScheduleType::UnorderedGuidedChunked:
99 case OMPScheduleType::UnorderedRuntime:
100 case OMPScheduleType::UnorderedAuto:
101 case OMPScheduleType::UnorderedTrapezoidal:
102 case OMPScheduleType::UnorderedGreedy:
103 case OMPScheduleType::UnorderedBalanced:
104 case OMPScheduleType::UnorderedGuidedIterativeChunked:
105 case OMPScheduleType::UnorderedGuidedAnalyticalChunked:
106 case OMPScheduleType::UnorderedSteal:
107 case OMPScheduleType::UnorderedStaticBalancedChunked:
108 case OMPScheduleType::UnorderedGuidedSimd:
109 case OMPScheduleType::UnorderedRuntimeSimd:
110 case OMPScheduleType::OrderedStaticChunked:
111 case OMPScheduleType::OrderedStatic:
112 case OMPScheduleType::OrderedDynamicChunked:
113 case OMPScheduleType::OrderedGuidedChunked:
114 case OMPScheduleType::OrderedRuntime:
115 case OMPScheduleType::OrderedAuto:
116 case OMPScheduleType::OrderdTrapezoidal:
117 case OMPScheduleType::NomergeUnorderedStaticChunked:
118 case OMPScheduleType::NomergeUnorderedStatic:
119 case OMPScheduleType::NomergeUnorderedDynamicChunked:
120 case OMPScheduleType::NomergeUnorderedGuidedChunked:
121 case OMPScheduleType::NomergeUnorderedRuntime:
122 case OMPScheduleType::NomergeUnorderedAuto:
123 case OMPScheduleType::NomergeUnorderedTrapezoidal:
124 case OMPScheduleType::NomergeUnorderedGreedy:
125 case OMPScheduleType::NomergeUnorderedBalanced:
126 case OMPScheduleType::NomergeUnorderedGuidedIterativeChunked:
127 case OMPScheduleType::NomergeUnorderedGuidedAnalyticalChunked:
128 case OMPScheduleType::NomergeUnorderedSteal:
129 case OMPScheduleType::NomergeOrderedStaticChunked:
130 case OMPScheduleType::NomergeOrderedStatic:
131 case OMPScheduleType::NomergeOrderedDynamicChunked:
132 case OMPScheduleType::NomergeOrderedGuidedChunked:
133 case OMPScheduleType::NomergeOrderedRuntime:
134 case OMPScheduleType::NomergeOrderedAuto:
135 case OMPScheduleType::NomergeOrderedTrapezoidal:
143 SchedType & OMPScheduleType::MonotonicityMask;
144 if (MonotonicityFlags == OMPScheduleType::MonotonicityMask)
155 if (Features.
count(
"+wavefrontsize64"))
156 return omp::getAMDGPUGridValues<64>();
157 return omp::getAMDGPUGridValues<32>();
168 bool HasSimdModifier) {
170 switch (ClauseKind) {
171 case OMP_SCHEDULE_Default:
172 case OMP_SCHEDULE_Static:
173 return HasChunks ? OMPScheduleType::BaseStaticChunked
174 : OMPScheduleType::BaseStatic;
175 case OMP_SCHEDULE_Dynamic:
176 return OMPScheduleType::BaseDynamicChunked;
177 case OMP_SCHEDULE_Guided:
178 return HasSimdModifier ? OMPScheduleType::BaseGuidedSimd
179 : OMPScheduleType::BaseGuidedChunked;
180 case OMP_SCHEDULE_Auto:
182 case OMP_SCHEDULE_Runtime:
183 return HasSimdModifier ? OMPScheduleType::BaseRuntimeSimd
184 : OMPScheduleType::BaseRuntime;
192 bool HasOrderedClause) {
193 assert((BaseScheduleType & OMPScheduleType::ModifierMask) ==
194 OMPScheduleType::None &&
195 "Must not have ordering nor monotonicity flags already set");
198 ? OMPScheduleType::ModifierOrdered
199 : OMPScheduleType::ModifierUnordered;
200 OMPScheduleType OrderingScheduleType = BaseScheduleType | OrderingModifier;
203 if (OrderingScheduleType ==
204 (OMPScheduleType::BaseGuidedSimd | OMPScheduleType::ModifierOrdered))
205 return OMPScheduleType::OrderedGuidedChunked;
206 else if (OrderingScheduleType == (OMPScheduleType::BaseRuntimeSimd |
207 OMPScheduleType::ModifierOrdered))
208 return OMPScheduleType::OrderedRuntime;
210 return OrderingScheduleType;
216 bool HasSimdModifier,
bool HasMonotonic,
217 bool HasNonmonotonic,
bool HasOrderedClause) {
218 assert((ScheduleType & OMPScheduleType::MonotonicityMask) ==
219 OMPScheduleType::None &&
220 "Must not have monotonicity flags already set");
221 assert((!HasMonotonic || !HasNonmonotonic) &&
222 "Monotonic and Nonmonotonic are contradicting each other");
225 return ScheduleType | OMPScheduleType::ModifierMonotonic;
226 }
else if (HasNonmonotonic) {
227 return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
237 if ((BaseScheduleType == OMPScheduleType::BaseStatic) ||
238 (BaseScheduleType == OMPScheduleType::BaseStaticChunked) ||
244 return ScheduleType | OMPScheduleType::ModifierNonmonotonic;
252 bool HasSimdModifier,
bool HasMonotonicModifier,
253 bool HasNonmonotonicModifier,
bool HasOrderedClause) {
259 OrderedSchedule, HasSimdModifier, HasMonotonicModifier,
260 HasNonmonotonicModifier, HasOrderedClause);
274 auto *Br = cast<BranchInst>(Term);
275 assert(!Br->isConditional() &&
276 "BB's terminator must be an unconditional branch (or degenerate)");
279 Br->setSuccessor(0,
Target);
284 NewBr->setDebugLoc(
DL);
289 assert(New->getFirstInsertionPt() == New->begin() &&
290 "Target BB must not have PHI nodes");
294 New->splice(New->begin(), Old, IP.
getPoint(), Old->
end());
322 New->replaceSuccessorsPhiUsesWith(Old, New);
367 const Twine &
Name =
"",
bool AsPtr =
true) {
375 FakeVal = FakeValAddr;
403enum OpenMPOffloadingRequiresDirFlags {
405 OMP_REQ_UNDEFINED = 0x000,
407 OMP_REQ_NONE = 0x001,
409 OMP_REQ_REVERSE_OFFLOAD = 0x002,
411 OMP_REQ_UNIFIED_ADDRESS = 0x004,
413 OMP_REQ_UNIFIED_SHARED_MEMORY = 0x008,
415 OMP_REQ_DYNAMIC_ALLOCATORS = 0x010,
422 : RequiresFlags(OMP_REQ_UNDEFINED) {}
425 bool IsTargetDevice,
bool IsGPU,
bool OpenMPOffloadMandatory,
426 bool HasRequiresReverseOffload,
bool HasRequiresUnifiedAddress,
427 bool HasRequiresUnifiedSharedMemory,
bool HasRequiresDynamicAllocators)
428 : IsTargetDevice(IsTargetDevice), IsGPU(IsGPU),
429 OpenMPOffloadMandatory(OpenMPOffloadMandatory),
430 RequiresFlags(OMP_REQ_UNDEFINED) {
431 if (HasRequiresReverseOffload)
432 RequiresFlags |= OMP_REQ_REVERSE_OFFLOAD;
433 if (HasRequiresUnifiedAddress)
434 RequiresFlags |= OMP_REQ_UNIFIED_ADDRESS;
435 if (HasRequiresUnifiedSharedMemory)
436 RequiresFlags |= OMP_REQ_UNIFIED_SHARED_MEMORY;
437 if (HasRequiresDynamicAllocators)
438 RequiresFlags |= OMP_REQ_DYNAMIC_ALLOCATORS;
442 return RequiresFlags & OMP_REQ_REVERSE_OFFLOAD;
446 return RequiresFlags & OMP_REQ_UNIFIED_ADDRESS;
450 return RequiresFlags & OMP_REQ_UNIFIED_SHARED_MEMORY;
454 return RequiresFlags & OMP_REQ_DYNAMIC_ALLOCATORS;
459 :
static_cast<int64_t
>(OMP_REQ_NONE);
464 RequiresFlags |= OMP_REQ_REVERSE_OFFLOAD;
466 RequiresFlags &= ~OMP_REQ_REVERSE_OFFLOAD;
471 RequiresFlags |= OMP_REQ_UNIFIED_ADDRESS;
473 RequiresFlags &= ~OMP_REQ_UNIFIED_ADDRESS;
478 RequiresFlags |= OMP_REQ_UNIFIED_SHARED_MEMORY;
480 RequiresFlags &= ~OMP_REQ_UNIFIED_SHARED_MEMORY;
485 RequiresFlags |= OMP_REQ_DYNAMIC_ALLOCATORS;
487 RequiresFlags &= ~OMP_REQ_DYNAMIC_ALLOCATORS;
500 constexpr const size_t MaxDim = 3;
508 for (
unsigned I = 1;
I < std::min(KernelArgs.
NumTeams.size(), MaxDim); ++
I)
511 Value *NumThreads3D =
512 Builder.CreateInsertValue(ZeroArray, KernelArgs.NumThreads, {0});
516 KernelArgs.RTArgs.BasePointersArray,
517 KernelArgs.RTArgs.PointersArray,
518 KernelArgs.RTArgs.SizesArray,
519 KernelArgs.RTArgs.MapTypesArray,
520 KernelArgs.RTArgs.MapNamesArray,
521 KernelArgs.RTArgs.MappersArray,
522 KernelArgs.NumIterations,
526 KernelArgs.DynCGGroupMem};
534 auto FnAttrs = Attrs.getFnAttrs();
535 auto RetAttrs = Attrs.getRetAttrs();
537 for (
size_t ArgNo = 0; ArgNo < Fn.
arg_size(); ++ArgNo)
542 bool Param =
true) ->
void {
543 bool HasSignExt = AS.hasAttribute(Attribute::SExt);
544 bool HasZeroExt = AS.hasAttribute(Attribute::ZExt);
545 if (HasSignExt || HasZeroExt) {
546 assert(AS.getNumAttributes() == 1 &&
547 "Currently not handling extension attr combined with others.");
549 if (
auto AK = TargetLibraryInfo::getExtAttrForI32Param(
T, HasSignExt))
552 TargetLibraryInfo::getExtAttrForI32Return(
T, HasSignExt))
559#define OMP_ATTRS_SET(VarName, AttrSet) AttributeSet VarName = AttrSet;
560#include "llvm/Frontend/OpenMP/OMPKinds.def"
564#define OMP_RTL_ATTRS(Enum, FnAttrSet, RetAttrSet, ArgAttrSets) \
566 FnAttrs = FnAttrs.addAttributes(Ctx, FnAttrSet); \
567 addAttrSet(RetAttrs, RetAttrSet, false); \
568 for (size_t ArgNo = 0; ArgNo < ArgAttrSets.size(); ++ArgNo) \
569 addAttrSet(ArgAttrs[ArgNo], ArgAttrSets[ArgNo]); \
570 Fn.setAttributes(AttributeList::get(Ctx, FnAttrs, RetAttrs, ArgAttrs)); \
572#include "llvm/Frontend/OpenMP/OMPKinds.def"
586#define OMP_RTL(Enum, Str, IsVarArg, ReturnType, ...) \
588 FnTy = FunctionType::get(ReturnType, ArrayRef<Type *>{__VA_ARGS__}, \
590 Fn = M.getFunction(Str); \
592#include "llvm/Frontend/OpenMP/OMPKinds.def"
598#define OMP_RTL(Enum, Str, ...) \
600 Fn = Function::Create(FnTy, GlobalValue::ExternalLinkage, Str, M); \
602#include "llvm/Frontend/OpenMP/OMPKinds.def"
606 if (FnID == OMPRTL___kmpc_fork_call || FnID == OMPRTL___kmpc_fork_teams) {
616 LLVMContext::MD_callback,
618 2, {-1, -1},
true)}));
631 assert(Fn &&
"Failed to create OpenMP runtime function");
638 auto *Fn = dyn_cast<llvm::Function>(RTLFn.
getCallee());
639 assert(Fn &&
"Failed to create OpenMP runtime function pointer");
654 for (
auto Inst =
Block->getReverseIterator()->begin();
655 Inst !=
Block->getReverseIterator()->end();) {
656 if (
auto *
AllocaInst = dyn_cast_if_present<llvm::AllocaInst>(Inst)) {
680 ParallelRegionBlockSet.
clear();
682 OI.collectBlocks(ParallelRegionBlockSet,
Blocks);
701 ".omp_par", ArgsInZeroAddressSpace);
705 <<
" Exit: " << OI.ExitBB->getName() <<
"\n");
707 "Expected OpenMP outlining to be possible!");
709 for (
auto *V : OI.ExcludeArgsFromAggregate)
716 if (TargetCpuAttr.isStringAttribute())
719 auto TargetFeaturesAttr = OuterFn->
getFnAttribute(
"target-features");
720 if (TargetFeaturesAttr.isStringAttribute())
721 OutlinedFn->
addFnAttr(TargetFeaturesAttr);
724 LLVM_DEBUG(
dbgs() <<
" Outlined function: " << *OutlinedFn <<
"\n");
726 "OpenMP outlined functions should not return a value!");
738 assert(OI.EntryBB->getUniquePredecessor() == &ArtificialEntry);
745 "Expected instructions to add in the outlined region entry");
752 if (
I.isTerminator())
755 I.moveBeforePreserving(*OI.EntryBB, OI.EntryBB->getFirstInsertionPt());
758 OI.EntryBB->moveBefore(&ArtificialEntry);
765 if (OI.PostOutlineCB)
766 OI.PostOutlineCB(*OutlinedFn);
797 errs() <<
"Error of kind: " << Kind
798 <<
" when emitting offload entries and metadata during "
799 "OMPIRBuilder finalization \n";
806 std::vector<WeakTrackingVH> LLVMCompilerUsed = {
808 emitUsed(
"llvm.compiler.used", LLVMCompilerUsed);
830 unsigned Reserve2Flags) {
832 LocFlags |= OMP_IDENT_FLAG_KMPC;
840 ConstantInt::get(
Int32, Reserve2Flags),
841 ConstantInt::get(
Int32, SrcLocStrSize), SrcLocStr};
848 if (
GV.getValueType() == OpenMPIRBuilder::Ident &&
GV.hasInitializer())
849 if (
GV.getInitializer() == Initializer)
854 M, OpenMPIRBuilder::Ident,
869 SrcLocStrSize = LocStr.
size();
878 if (
GV.isConstant() &&
GV.hasInitializer() &&
879 GV.getInitializer() == Initializer)
890 unsigned Line,
unsigned Column,
896 Buffer.
append(FunctionName);
898 Buffer.
append(std::to_string(Line));
900 Buffer.
append(std::to_string(Column));
908 StringRef UnknownLoc =
";unknown;unknown;0;0;;";
919 if (
DIFile *DIF = DIL->getFile())
920 if (std::optional<StringRef> Source = DIF->getSource())
926 DIL->getColumn(), SrcLocStrSize);
938 "omp_global_thread_num");
943 bool ForceSimpleCall,
bool CheckCancelFlag) {
953 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_FOR;
956 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SECTIONS;
959 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL_SINGLE;
962 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_EXPL;
965 BarrierLocFlags = OMP_IDENT_FLAG_BARRIER_IMPL;
978 bool UseCancelBarrier =
983 UseCancelBarrier ? OMPRTL___kmpc_cancel_barrier
984 : OMPRTL___kmpc_barrier),
987 if (UseCancelBarrier && CheckCancelFlag)
996 omp::Directive CanceledDirective) {
1008 Value *CancelKind =
nullptr;
1009 switch (CanceledDirective) {
1010#define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
1011 case DirectiveEnum: \
1012 CancelKind = Builder.getInt32(Value); \
1014#include "llvm/Frontend/OpenMP/OMPKinds.def"
1025 auto ExitCB = [
this, CanceledDirective, Loc](
InsertPointTy IP) {
1026 if (CanceledDirective == OMPD_parallel) {
1030 omp::Directive::OMPD_unknown,
false,
1040 UI->eraseFromParent();
1053 auto *KernelArgsPtr =
1066 NumThreads, HostPtr, KernelArgsPtr};
1094 assert(OutlinedFnID &&
"Invalid outlined function ID!");
1098 Value *Return =
nullptr;
1118 Args.NumTeams.front(), Args.NumThreads,
1119 OutlinedFnID, ArgsVector));
1132 emitBlock(OffloadContBlock, CurFn,
true);
1137 omp::Directive CanceledDirective,
1140 "Unexpected cancellation!");
1190 OutlinedFn.
addFnAttr(Attribute::NoUnwind);
1193 "Expected at least tid and bounded tid as arguments");
1194 unsigned NumCapturedVars = OutlinedFn.
arg_size() - 2;
1197 assert(CI &&
"Expected call instruction to outlined function");
1198 CI->
getParent()->setName(
"omp_parallel");
1201 Type *PtrTy = OMPIRBuilder->VoidPtr;
1205 OpenMPIRBuilder ::InsertPointTy CurrentIP = Builder.
saveIP();
1209 Value *Args = ArgsAlloca;
1217 for (
unsigned Idx = 0;
Idx < NumCapturedVars;
Idx++) {
1229 Value *Parallel51CallArgs[] = {
1233 NumThreads ? NumThreads : Builder.
getInt32(-1),
1236 Builder.
CreateBitCast(&OutlinedFn, OMPIRBuilder->ParallelTaskPtr),
1239 Builder.
getInt64(NumCapturedVars)};
1244 Builder.
CreateCall(RTLFn, Parallel51CallArgs);
1259 I->eraseFromParent();
1281 if (
auto *
F = dyn_cast<Function>(RTLFn.
getCallee())) {
1282 if (!
F->hasMetadata(LLVMContext::MD_callback)) {
1290 F->addMetadata(LLVMContext::MD_callback,
1299 OutlinedFn.
addFnAttr(Attribute::NoUnwind);
1302 "Expected at least tid and bounded tid as arguments");
1303 unsigned NumCapturedVars = OutlinedFn.
arg_size() - 2;
1306 CI->
getParent()->setName(
"omp_parallel");
1310 Value *ForkCallArgs[] = {
1311 Ident, Builder.
getInt32(NumCapturedVars),
1312 Builder.
CreateBitCast(&OutlinedFn, OMPIRBuilder->ParallelTaskPtr)};
1315 RealArgs.
append(std::begin(ForkCallArgs), std::end(ForkCallArgs));
1324 auto PtrTy = OMPIRBuilder->VoidPtr;
1325 if (IfCondition && NumCapturedVars == 0) {
1329 if (IfCondition && RealArgs.
back()->getType() != PtrTy)
1347 I->eraseFromParent();
1355 omp::ProcBindKind ProcBind,
bool IsCancellable) {
1382 if (ProcBind != OMP_PROC_BIND_default) {
1386 ConstantInt::get(
Int32,
unsigned(ProcBind),
true)};
1414 TIDAddrAlloca, PointerType ::get(
M.
getContext(), 0),
"tid.addr.ascast");
1419 "zero.addr.ascast");
1443 if (IP.getBlock()->end() == IP.getPoint()) {
1449 assert(IP.getBlock()->getTerminator()->getNumSuccessors() == 1 &&
1450 IP.getBlock()->getTerminator()->getSuccessor(0) == PRegExitBB &&
1451 "Unexpected insertion point for finalization call!");
1487 LLVM_DEBUG(
dbgs() <<
"Before body codegen: " << *OuterFn <<
"\n");
1490 assert(BodyGenCB &&
"Expected body generation callback!");
1492 BodyGenCB(InnerAllocaIP, CodeGenIP);
1494 LLVM_DEBUG(
dbgs() <<
"After body codegen: " << *OuterFn <<
"\n");
1500 std::move(ToBeDeleted)](
Function &OutlinedFn) {
1502 IfCondition, NumThreads, PrivTID, PrivTIDAddr,
1503 ThreadID, ToBeDeletedVec);
1508 std::move(ToBeDeleted)](
Function &OutlinedFn) {
1510 PrivTID, PrivTIDAddr, ToBeDeletedVec);
1527 PRegOutlinedExitBB->
setName(
"omp.par.outlined.exit");
1528 Blocks.push_back(PRegOutlinedExitBB);
1539 ".omp_par", ArgsInZeroAddressSpace);
1544 Extractor.
findAllocas(CEAC, SinkingCands, HoistingCands, CommonExit);
1547 LLVM_DEBUG(
dbgs() <<
"Before privatization: " << *OuterFn <<
"\n");
1552 auto PrivHelper = [&](
Value &V) {
1553 if (&V == TIDAddr || &V == ZeroAddr) {
1559 for (
Use &U : V.uses())
1560 if (
auto *UserI = dyn_cast<Instruction>(U.getUser()))
1561 if (ParallelRegionBlockSet.
count(UserI->getParent()))
1571 if (!V.getType()->isPointerTy()) {
1590 Value *ReplacementValue =
nullptr;
1591 CallInst *CI = dyn_cast<CallInst>(&V);
1593 ReplacementValue = PrivTID;
1596 PrivCB(InnerAllocaIP,
Builder.
saveIP(), V, *Inner, ReplacementValue));
1601 assert(ReplacementValue &&
1602 "Expected copy/create callback to set replacement value!");
1603 if (ReplacementValue == &V)
1608 UPtr->set(ReplacementValue);
1625 for (
Value *Input : Inputs) {
1630 for (
Value *Output : Outputs)
1634 "OpenMP outlining should not produce live-out values!");
1636 LLVM_DEBUG(
dbgs() <<
"After privatization: " << *OuterFn <<
"\n");
1639 dbgs() <<
" PBR: " << BB->getName() <<
"\n";
1647 assert(FiniInfo.DK == OMPD_parallel &&
1648 "Unexpected finalization stack state!");
1658 InsertPointTy AfterIP(UI->getParent(), UI->getParent()->end());
1659 UI->eraseFromParent();
1725 if (Dependencies.
empty())
1745 Type *DependInfo = OMPBuilder.DependInfo;
1748 Value *DepArray =
nullptr;
1754 DepArray = Builder.
CreateAlloca(DepArrayTy,
nullptr,
".dep.arr.addr");
1756 for (
const auto &[DepIdx, Dep] :
enumerate(Dependencies)) {
1762 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr));
1767 DependInfo,
Base,
static_cast<unsigned int>(RTLDependInfoFields::Len));
1769 Builder.
getInt64(M.getDataLayout().getTypeStoreSize(Dep.DepValueType)),
1774 static_cast<unsigned int>(RTLDependInfoFields::Flags));
1777 static_cast<unsigned int>(Dep.DepKind)),
1820 BodyGenCB(TaskAllocaIP, TaskBodyIP);
1830 Builder, AllocaIP, ToBeDeleted, TaskAllocaIP,
"global.tid",
false));
1832 OI.
PostOutlineCB = [
this, Ident, Tied, Final, IfCondition, Dependencies,
1833 TaskAllocaBB, ToBeDeleted](
Function &OutlinedFn)
mutable {
1835 assert(OutlinedFn.getNumUses() == 1 &&
1836 "there must be a single user for the outlined function");
1837 CallInst *StaleCI = cast<CallInst>(OutlinedFn.user_back());
1841 bool HasShareds = StaleCI->
arg_size() > 1;
1880 assert(ArgStructAlloca &&
1881 "Unable to find the alloca instruction corresponding to arguments "
1882 "for extracted function");
1885 assert(ArgStructType &&
"Unable to find struct type corresponding to "
1886 "arguments for extracted function");
1894 TaskAllocFn, {Ident, ThreadID,
Flags,
1895 TaskSize, SharedsSize,
1907 Value *DepArray =
nullptr;
1908 if (Dependencies.
size()) {
1923 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr));
1930 static_cast<unsigned int>(RTLDependInfoFields::Len));
1937 static_cast<unsigned int>(RTLDependInfoFields::Flags));
1940 static_cast<unsigned int>(Dep.DepKind)),
1971 Instruction *ThenTI = IfTerminator, *ElseTI =
nullptr;
1977 if (Dependencies.
size()) {
2001 if (Dependencies.
size()) {
2022 Shareds, [Shareds](
Use &U) {
return U.getUser() != Shareds; });
2074 if (IP.getBlock()->end() != IP.getPoint())
2085 auto *CaseBB = IP.getBlock()->getSinglePredecessor();
2086 auto *CondBB = CaseBB->getSinglePredecessor()->getSinglePredecessor();
2087 auto *ExitBB = CondBB->getTerminator()->getSuccessor(1);
2118 unsigned CaseNumber = 0;
2119 for (
auto SectionCB : SectionCBs) {
2135 Value *LB = ConstantInt::get(I32Ty, 0);
2136 Value *UB = ConstantInt::get(I32Ty, SectionCBs.
size());
2137 Value *ST = ConstantInt::get(I32Ty, 1);
2139 Loc, LoopBodyGenCB, LB, UB, ST,
true,
false, AllocaIP,
"section_loop");
2141 applyStaticWorkshareLoop(Loc.
DL,
LoopInfo, AllocaIP, !IsNowait);
2145 assert(FiniInfo.DK == OMPD_sections &&
2146 "Unexpected finalization stack state!");
2152 AfterIP = {FiniBB, FiniBB->
begin()};
2166 if (IP.getBlock()->end() != IP.getPoint())
2185 Directive OMPD = Directive::OMPD_sections;
2188 return EmitOMPInlinedRegion(OMPD,
nullptr,
nullptr, BodyGenCB, FiniCBWrapper,
2200 std::vector<WeakTrackingVH> &
List) {
2207 for (
unsigned I = 0, E =
List.size();
I != E; ++
I)
2211 if (UsedArray.
empty())
2218 GV->setSection(
"llvm.metadata");
2221Value *OpenMPIRBuilder::getGPUThreadID() {
2224 OMPRTL___kmpc_get_hardware_thread_id_in_block),
2228Value *OpenMPIRBuilder::getGPUWarpSize() {
2233Value *OpenMPIRBuilder::getNVPTXWarpID() {
2238Value *OpenMPIRBuilder::getNVPTXLaneID() {
2240 assert(LaneIDBits < 32 &&
"Invalid LaneIDBits size in NVPTX device.");
2241 unsigned LaneIDMask = ~0
u >> (32u - LaneIDBits);
2246Value *OpenMPIRBuilder::castValueToType(InsertPointTy AllocaIP,
Value *
From,
2251 assert(FromSize > 0 &&
"From size must be greater than zero");
2252 assert(ToSize > 0 &&
"To size must be greater than zero");
2253 if (FromType == ToType)
2255 if (FromSize == ToSize)
2265 CastItem,
FromType->getPointerTo());
2270Value *OpenMPIRBuilder::createRuntimeShuffleFunction(InsertPointTy AllocaIP,
2275 assert(
Size <= 8 &&
"Unsupported bitwidth in shuffle instruction");
2279 Value *ElemCast = castValueToType(AllocaIP, Element, CastTy);
2283 Size <= 4 ? RuntimeFunction::OMPRTL___kmpc_shuffle_int32
2284 : RuntimeFunction::OMPRTL___kmpc_shuffle_int64);
2285 Value *WarpSizeCast =
2287 Value *ShuffleCall =
2289 return castValueToType(AllocaIP, ShuffleCall, CastTy);
2292void OpenMPIRBuilder::shuffleAndStore(InsertPointTy AllocaIP,
Value *SrcAddr,
2308 Value *ElemPtr = DstAddr;
2310 for (
unsigned IntSize = 8; IntSize >= 1; IntSize /= 2) {
2322 if ((
Size / IntSize) > 1) {
2346 Value *Res = createRuntimeShuffleFunction(
2355 Value *LocalElemPtr =
2362 Value *Res = createRuntimeShuffleFunction(
2376void OpenMPIRBuilder::emitReductionListCopy(
2377 InsertPointTy AllocaIP, CopyAction Action,
Type *ReductionArrayTy,
2379 CopyOptionsTy CopyOptions) {
2382 Value *RemoteLaneOffset = CopyOptions.RemoteLaneOffset;
2386 for (
auto En :
enumerate(ReductionInfos)) {
2387 const ReductionInfo &RI = En.value();
2388 Value *SrcElementAddr =
nullptr;
2389 Value *DestElementAddr =
nullptr;
2390 Value *DestElementPtrAddr =
nullptr;
2392 bool ShuffleInElement =
false;
2395 bool UpdateDestListPtr =
false;
2399 ReductionArrayTy, SrcBase,
2400 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
2406 ReductionArrayTy, DestBase,
2407 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
2413 ".omp.reduction.element");
2416 DestElementAddr = DestAlloca;
2419 DestElementAddr->
getName() +
".ascast");
2421 ShuffleInElement =
true;
2422 UpdateDestListPtr =
true;
2434 if (ShuffleInElement) {
2435 shuffleAndStore(AllocaIP, SrcElementAddr, DestElementAddr, RI.ElementType,
2436 RemoteLaneOffset, ReductionArrayTy);
2438 switch (RI.EvaluationKind) {
2447 RI.ElementType, SrcElementAddr, 0, 0,
".realp");
2449 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
2451 RI.ElementType, SrcElementAddr, 0, 1,
".imagp");
2453 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
2456 RI.ElementType, DestElementAddr, 0, 0,
".realp");
2458 RI.ElementType, DestElementAddr, 0, 1,
".imagp");
2480 if (UpdateDestListPtr) {
2483 DestElementAddr->
getName() +
".ascast");
2489Function *OpenMPIRBuilder::emitInterWarpCopyFunction(
2499 "_omp_reduction_inter_warp_copy_func", &
M);
2522 "__openmp_nvptx_data_transfer_temporary_storage";
2526 if (!TransferMedium) {
2535 Value *GPUThreadID = getGPUThreadID();
2537 Value *LaneID = getNVPTXLaneID();
2539 Value *WarpID = getNVPTXWarpID();
2548 Arg0Type,
nullptr, ReduceListArg->
getName() +
".addr");
2552 ReduceListAlloca, Arg0Type, ReduceListAlloca->
getName() +
".ascast");
2555 NumWarpsAlloca->
getName() +
".ascast");
2566 for (
auto En :
enumerate(ReductionInfos)) {
2571 const ReductionInfo &RI = En.value();
2573 for (
unsigned TySize = 4; TySize > 0 && RealTySize > 0; TySize /= 2) {
2576 unsigned NumIters = RealTySize / TySize;
2579 Value *Cnt =
nullptr;
2580 Value *CntAddr =
nullptr;
2590 CntAddr->
getName() +
".ascast");
2609 omp::Directive::OMPD_unknown,
2622 auto *RedListArrayTy =
2628 {ConstantInt::get(IndexTy, 0),
2629 ConstantInt::get(IndexTy, En.index())});
2654 omp::Directive::OMPD_unknown,
2663 Value *NumWarpsVal =
2666 Value *IsActiveThread =
2677 Value *TargetElemPtrPtr =
2679 {ConstantInt::get(IndexTy, 0),
2680 ConstantInt::get(IndexTy, En.index())});
2681 Value *TargetElemPtrVal =
2683 Value *TargetElemPtr = TargetElemPtrVal;
2689 Value *SrcMediumValue =
2708 RealTySize %= TySize;
2718Function *OpenMPIRBuilder::emitShuffleAndReduceFunction(
2724 {Builder.getPtrTy(), Builder.getInt16Ty(),
2725 Builder.getInt16Ty(), Builder.getInt16Ty()},
2729 "_omp_reduction_shuffle_and_reduce_func", &
M);
2750 Type *ReduceListArgType = ReduceListArg->
getType();
2754 ReduceListArgType,
nullptr, ReduceListArg->
getName() +
".addr");
2756 LaneIDArg->
getName() +
".addr");
2758 LaneIDArgType,
nullptr, RemoteLaneOffsetArg->
getName() +
".addr");
2760 AlgoVerArg->
getName() +
".addr");
2767 RedListArrayTy,
nullptr,
".omp.reduction.remote_reduce_list");
2770 ReduceListAlloca, ReduceListArgType,
2771 ReduceListAlloca->
getName() +
".ascast");
2773 LaneIdAlloca, LaneIDArgPtrType, LaneIdAlloca->
getName() +
".ascast");
2775 RemoteLaneOffsetAlloca, LaneIDArgPtrType,
2776 RemoteLaneOffsetAlloca->
getName() +
".ascast");
2778 AlgoVerAlloca, LaneIDArgPtrType, AlgoVerAlloca->
getName() +
".ascast");
2781 RemoteReductionListAlloca->
getName() +
".ascast");
2790 Value *RemoteLaneOffset =
2799 emitReductionListCopy(
2801 ReduceList, RemoteListAddrCast, {RemoteLaneOffset,
nullptr,
nullptr});
2832 Value *RemoteOffsetComp =
2849 ->addFnAttr(Attribute::NoUnwind);
2870 ReductionInfos, RemoteListAddrCast, ReduceList);
2883Function *OpenMPIRBuilder::emitListToGlobalCopyFunction(
2890 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
2894 "_omp_reduction_list_to_global_copy_func", &
M);
2911 BufferArg->
getName() +
".addr");
2918 BufferArgAlloca->
getName() +
".ascast");
2923 ReduceListArgAlloca->
getName() +
".ascast");
2929 Value *LocalReduceList =
2931 Value *BufferArgVal =
2936 for (
auto En :
enumerate(ReductionInfos)) {
2937 const ReductionInfo &RI = En.value();
2938 auto *RedListArrayTy =
2942 RedListArrayTy, LocalReduceList,
2943 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
2951 ReductionsBufferTy, BufferVD, 0, En.index());
2953 switch (RI.EvaluationKind) {
2961 RI.ElementType, ElemPtr, 0, 0,
".realp");
2963 RI.ElementType->getStructElementType(0), SrcRealPtr,
".real");
2965 RI.ElementType, ElemPtr, 0, 1,
".imagp");
2967 RI.ElementType->getStructElementType(1), SrcImgPtr,
".imag");
2970 RI.ElementType, GlobVal, 0, 0,
".realp");
2972 RI.ElementType, GlobVal, 0, 1,
".imagp");
2993Function *OpenMPIRBuilder::emitListToGlobalReduceFunction(
3000 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3004 "_omp_reduction_list_to_global_reduce_func", &
M);
3021 BufferArg->
getName() +
".addr");
3026 auto *RedListArrayTy =
3031 Value *LocalReduceList =
3036 BufferArgAlloca->
getName() +
".ascast");
3041 ReduceListArgAlloca->
getName() +
".ascast");
3044 LocalReduceList->
getName() +
".ascast");
3054 for (
auto En :
enumerate(ReductionInfos)) {
3056 RedListArrayTy, LocalReduceListAddrCast,
3057 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3062 ReductionsBufferTy, BufferVD, 0, En.index());
3070 ->addFnAttr(Attribute::NoUnwind);
3076Function *OpenMPIRBuilder::emitGlobalToListCopyFunction(
3083 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3087 "_omp_reduction_global_to_list_copy_func", &
M);
3104 BufferArg->
getName() +
".addr");
3111 BufferArgAlloca->
getName() +
".ascast");
3116 ReduceListArgAlloca->
getName() +
".ascast");
3121 Value *LocalReduceList =
3127 for (
auto En :
enumerate(ReductionInfos)) {
3129 auto *RedListArrayTy =
3133 RedListArrayTy, LocalReduceList,
3134 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3141 ReductionsBufferTy, BufferVD, 0, En.index());
3184Function *OpenMPIRBuilder::emitGlobalToListReduceFunction(
3191 {Builder.getPtrTy(), Builder.getInt32Ty(), Builder.getPtrTy()},
3195 "_omp_reduction_global_to_list_reduce_func", &
M);
3212 BufferArg->
getName() +
".addr");
3222 Value *LocalReduceList =
3227 BufferArgAlloca->
getName() +
".ascast");
3232 ReduceListArgAlloca->
getName() +
".ascast");
3235 LocalReduceList->
getName() +
".ascast");
3245 for (
auto En :
enumerate(ReductionInfos)) {
3247 RedListArrayTy, ReductionList,
3248 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3253 ReductionsBufferTy, BufferVD, 0, En.index());
3261 ->addFnAttr(Attribute::NoUnwind);
3267std::string OpenMPIRBuilder::getReductionFuncName(
StringRef Name)
const {
3268 std::string Suffix =
3270 return (
Name + Suffix).str();
3273Function *OpenMPIRBuilder::createReductionFunction(
3275 ReductionGenCBKind ReductionGenCBKind,
AttributeList FuncAttrs) {
3277 {Builder.getPtrTy(), Builder.getPtrTy()},
3279 std::string
Name = getReductionFuncName(ReducerName);
3291 Value *LHSArrayPtr =
nullptr;
3292 Value *RHSArrayPtr =
nullptr;
3303 LHSAlloca, Arg0Type, LHSAlloca->
getName() +
".ascast");
3305 RHSAlloca, Arg1Type, RHSAlloca->
getName() +
".ascast");
3315 for (
auto En :
enumerate(ReductionInfos)) {
3316 const ReductionInfo &RI = En.value();
3318 RedArrayTy, RHSArrayPtr,
3319 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3322 RHSI8Ptr, RI.PrivateVariable->getType(),
3323 RHSI8Ptr->
getName() +
".ascast");
3326 RedArrayTy, LHSArrayPtr,
3327 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3330 LHSI8Ptr, RI.Variable->getType(), LHSI8Ptr->
getName() +
".ascast");
3341 return ReductionFunc;
3347 for (
auto En :
enumerate(ReductionInfos)) {
3348 unsigned Index = En.index();
3349 const ReductionInfo &RI = En.value();
3350 Value *LHSFixupPtr, *RHSFixupPtr;
3357 LHSPtrs[
Index], [ReductionFunc](
const Use &U) {
3358 return cast<Instruction>(
U.getUser())->getParent()->getParent() ==
3362 RHSPtrs[
Index], [ReductionFunc](
const Use &U) {
3363 return cast<Instruction>(
U.getUser())->getParent()->getParent() ==
3369 return ReductionFunc;
3377 assert(RI.Variable &&
"expected non-null variable");
3378 assert(RI.PrivateVariable &&
"expected non-null private variable");
3379 assert((RI.ReductionGen || RI.ReductionGenClang) &&
3380 "expected non-null reduction generator callback");
3383 RI.Variable->getType() == RI.PrivateVariable->getType() &&
3384 "expected variables and their private equivalents to have the same "
3387 assert(RI.Variable->getType()->isPointerTy() &&
3388 "expected variables to be pointers");
3395 bool IsNoWait,
bool IsTeamsReduction,
bool HasDistribute,
3397 unsigned ReductionBufNum,
Value *SrcLocInfo) {
3411 if (ReductionInfos.
size() == 0)
3430 if (GridValue.has_value())
3448 Value *ReductionListAlloca =
3451 ReductionListAlloca, PtrTy, ReductionListAlloca->
getName() +
".ascast");
3455 for (
auto En :
enumerate(ReductionInfos)) {
3458 RedArrayTy, ReductionList,
3459 {ConstantInt::get(IndexTy, 0), ConstantInt::get(IndexTy, En.index())});
3466 emitShuffleAndReduceFunction(ReductionInfos, ReductionFunc, FuncAttrs);
3467 Function *WcFunc = emitInterWarpCopyFunction(Loc, ReductionInfos, FuncAttrs);
3472 unsigned MaxDataSize = 0;
3474 for (
auto En :
enumerate(ReductionInfos)) {
3476 if (
Size > MaxDataSize)
3478 ReductionTypeArgs.
emplace_back(En.value().ElementType);
3480 Value *ReductionDataSize =
3482 if (!IsTeamsReduction) {
3483 Value *SarFuncCast =
3487 Value *Args[] = {SrcLocInfo, ReductionDataSize, RL, SarFuncCast,
3490 RuntimeFunction::OMPRTL___kmpc_nvptx_parallel_reduce_nowait_v2);
3495 Ctx, ReductionTypeArgs,
"struct._globalized_locals_ty");
3497 RuntimeFunction::OMPRTL___kmpc_reduction_get_fixed_buffer);
3498 Function *LtGCFunc = emitListToGlobalCopyFunction(
3499 ReductionInfos, ReductionsBufferTy, FuncAttrs);
3500 Function *LtGRFunc = emitListToGlobalReduceFunction(
3501 ReductionInfos, ReductionFunc, ReductionsBufferTy, FuncAttrs);
3502 Function *GtLCFunc = emitGlobalToListCopyFunction(
3503 ReductionInfos, ReductionsBufferTy, FuncAttrs);
3504 Function *GtLRFunc = emitGlobalToListReduceFunction(
3505 ReductionInfos, ReductionFunc, ReductionsBufferTy, FuncAttrs);
3509 RedFixedBuferFn, {},
"_openmp_teams_reductions_buffer_$_$ptr");
3511 Value *Args3[] = {SrcLocInfo,
3512 KernelTeamsReductionPtr,
3524 RuntimeFunction::OMPRTL___kmpc_nvptx_teams_reduce_nowait_v2);
3541 for (
auto En :
enumerate(ReductionInfos)) {
3548 Value *LHSPtr, *RHSPtr;
3550 &LHSPtr, &RHSPtr, CurFunc));
3555 return cast<Instruction>(U.getUser())->getParent()->getParent() ==
3559 return cast<Instruction>(U.getUser())->getParent()->getParent() ==
3563 assert(
false &&
"Unhandled ReductionGenCBKind");
3579 ".omp.reduction.func", &M);
3590 assert(RI.Variable &&
"expected non-null variable");
3591 assert(RI.PrivateVariable &&
"expected non-null private variable");
3592 assert(RI.ReductionGen &&
"expected non-null reduction generator callback");
3593 assert(RI.Variable->getType() == RI.PrivateVariable->getType() &&
3594 "expected variables and their private equivalents to have the same "
3596 assert(RI.Variable->getType()->isPointerTy() &&
3597 "expected variables to be pointers");
3610 unsigned NumReductions = ReductionInfos.
size();
3617 for (
auto En :
enumerate(ReductionInfos)) {
3618 unsigned Index = En.index();
3636 ? IdentFlag::OMP_IDENT_FLAG_ATOMIC_REDUCE
3641 unsigned RedArrayByteSize =
DL.getTypeStoreSize(RedArrayTy);
3644 Value *Lock = getOMPCriticalRegionLock(
".reduction");
3646 IsNoWait ? RuntimeFunction::OMPRTL___kmpc_reduce_nowait
3647 : RuntimeFunction::OMPRTL___kmpc_reduce);
3650 {Ident, ThreadId, NumVariables, RedArraySize, RedArray,
3651 ReductionFunc, Lock},
3670 for (
auto En :
enumerate(ReductionInfos)) {
3675 Value *RedValue =
nullptr;
3676 if (!IsByRef[En.index()]) {
3678 "red.value." +
Twine(En.index()));
3680 Value *PrivateRedValue =
3682 "red.private.value." +
Twine(En.index()));
3684 if (IsByRef[En.index()]) {
3686 PrivateRedValue, Reduced));
3689 PrivateRedValue, Reduced));
3694 if (!IsByRef[En.index()])
3698 IsNoWait ? RuntimeFunction::OMPRTL___kmpc_end_reduce_nowait
3699 : RuntimeFunction::OMPRTL___kmpc_end_reduce);
3707 if (CanGenerateAtomic &&
llvm::none_of(IsByRef, [](
bool P) {
return P; })) {
3728 for (
auto En :
enumerate(ReductionInfos)) {
3731 RedArrayTy, LHSArrayPtr, 0, En.index());
3736 RedArrayTy, RHSArrayPtr, 0, En.index());
3746 if (!IsByRef[En.index()])
3763 Directive OMPD = Directive::OMPD_master;
3768 Value *Args[] = {Ident, ThreadId};
3776 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
3787 Directive OMPD = Directive::OMPD_masked;
3793 Value *ArgsEnd[] = {Ident, ThreadId};
3801 return EmitOMPInlinedRegion(OMPD, EntryCall, ExitCall, BodyGenCB, FiniCB,
3836 IndVarPHI->
addIncoming(ConstantInt::get(IndVarTy, 0), Preheader);
3849 "omp_" +
Name +
".next",
true);
3860 CL->Header = Header;
3879 NextBB, NextBB,
Name);
3903 Value *Start,
Value *Stop,
Value *Step,
bool IsSigned,
bool InclusiveStop,
3913 auto *IndVarTy = cast<IntegerType>(Start->getType());
3914 assert(IndVarTy == Stop->
getType() &&
"Stop type mismatch");
3915 assert(IndVarTy == Step->
getType() &&
"Step type mismatch");
3921 ConstantInt *Zero = ConstantInt::get(IndVarTy, 0);
3949 Value *CountIfLooping;
3950 if (InclusiveStop) {
3960 "omp_" +
Name +
".tripcount");
3981 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_4u);
3984 M, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_8u);
3990 InsertPointTy AllocaIP,
3991 bool NeedsBarrier) {
3992 assert(CLI->
isValid() &&
"Requires a valid canonical loop");
3994 "Require dedicated allocate IP");
4006 Type *IVTy =
IV->getType();
4026 Constant *One = ConstantInt::get(IVTy, 1);
4034 Constant *SchedulingType = ConstantInt::get(
4035 I32Type,
static_cast<int>(OMPScheduleType::UnorderedStatic));
4040 {SrcLoc, ThreadNum, SchedulingType, PLastIter, PLowerBound,
4041 PUpperBound, PStride, One,
Zero});
4046 CLI->setTripCount(TripCount);
4067 omp::Directive::OMPD_for,
false,
4078 bool NeedsBarrier,
Value *ChunkSize) {
4079 assert(CLI->
isValid() &&
"Requires a valid canonical loop");
4080 assert(ChunkSize &&
"Chunk size is required");
4085 Type *IVTy =
IV->getType();
4087 "Max supported tripcount bitwidth is 64 bits");
4089 :
Type::getInt64Ty(Ctx);
4092 Constant *One = ConstantInt::get(InternalIVTy, 1);
4104 Value *PLowerBound =
4106 Value *PUpperBound =
4115 Value *CastedChunkSize =
4117 Value *CastedTripCount =
4120 Constant *SchedulingType = ConstantInt::get(
4121 I32Type,
static_cast<int>(OMPScheduleType::UnorderedStaticChunked));
4135 SchedulingType, PLastIter,
4136 PLowerBound, PUpperBound,
4141 Value *FirstChunkStart =
4143 Value *FirstChunkStop =
4148 Value *NextChunkStride =
4153 Value *DispatchCounter;
4157 FirstChunkStart, CastedTripCount, NextChunkStride,
4181 Value *IsLastChunk =
4183 Value *CountUntilOrigTripCount =
4186 IsLastChunk, CountUntilOrigTripCount, ChunkRange,
"omp_chunk.tripcount");
4187 Value *BackcastedChunkTC =
4189 CLI->setTripCount(BackcastedChunkTC);
4194 Value *BackcastedDispatchCounter =