44#ifndef LLVM_ADT_GENERICUNIFORMITYIMPL_H
45#define LLVM_ADT_GENERICUNIFORMITYIMPL_H
56#define DEBUG_TYPE "uniformity"
91 using BlockT =
typename ContextT::BlockT;
96 using CycleT =
typename CycleInfoT::CycleT;
101 bool empty()
const {
return Order.empty(); }
102 size_t size()
const {
return Order.size(); }
111 POIndex[&BB] = Order.size();
112 Order.push_back(&BB);
114 <<
"): " << Context.print(&BB) <<
"\n");
115 if (IsReducibleCycleHeader)
116 ReducibleCycleHeaders.insert(&BB);
120 assert(POIndex.count(BB));
121 return POIndex.lookup(BB);
125 return ReducibleCycleHeaders.contains(BB);
132 const ContextT &Context;
142template <
typename>
class DivergencePropagator;
264 using BlockT =
typename ContextT::BlockT;
271 using CycleT =
typename CycleInfoT::CycleT;
321 CachedControlDivDescs;
332 using BlockT =
typename ContextT::BlockT;
336 using UseT =
typename ContextT::UseT;
341 using CycleT =
typename CycleInfoT::CycleT;
345 typename SyncDependenceAnalysisT::DivergenceDescriptor;
349 std::tuple<ConstValueRefT, InstructionT *, const CycleT *>;
386 assert(
I.isTerminator() &&
"Expected a terminator instruction!");
401 if (ContextT::isAlwaysUniform(V))
479 void taintAndPushAllDefs(
const BlockT &JoinBlock);
483 void taintAndPushPhiNodes(
const BlockT &JoinBlock);
488 void propagateCycleExitDivergence(
const BlockT &DivExit,
492 void analyzeCycleExitDivergence(
const CycleT &DefCycle);
505 bool isTemporalDivergent(
const BlockT &ObservingBlock,
509template <
typename ImplT>
517 using BlockT =
typename ContextT::BlockT;
523 using CycleT =
typename CycleInfoT::CycleT;
528 typename SyncDependenceAnalysisT::DivergenceDescriptor;
544 std::unique_ptr<DivergenceDescriptorT>
DivDesc;
554 Out <<
"Propagator::BlockLabels {\n";
555 for (
int BlockIdx = (
int)
CyclePOT.size() - 1; BlockIdx >= 0; --BlockIdx) {
558 Out <<
Context.print(
Block) <<
"(" << BlockIdx <<
") : ";
562 Out <<
Context.print(Label) <<
"\n";
574 <<
"\tpushed label: " <<
Context.print(&PushedLabel)
576 <<
"\told label: " <<
Context.print(OldLabel) <<
"\n");
579 if (OldLabel == &PushedLabel)
582 if (OldLabel != &SuccBlock) {
583 auto SuccIdx =
CyclePOT.getIndex(&SuccBlock);
612 DivDesc->CycleDivBlocks.insert(&ExitBlock);
624 DivDesc->JoinDivBlocks.insert(&SuccBlock);
645 if (
C->isReducible())
647 while (
const CycleT *
P =
C->getParentCycle()) {
648 if (
P->isReducible())
659 if (DivTermCycle && !DivTermCycle->contains(SuccBlock)) {
663 DivDesc->CycleDivBlocks.insert(SuccBlock);
665 <<
Context.print(SuccBlock) <<
"\n");
684 (!IrreducibleAncestor || !IrreducibleAncestor->contains(
Block)))
690 if (BlockIdx == DivTermIdx) {
696 << BlockIdx <<
"\n");
718 const auto *BlockCycle =
CI.getCycle(
Block);
721 BlockCycle->getExitBlocks(BlockCycleExits);
722 bool BranchIsInside = BlockCycle->contains(&
DivTermBlock);
723 for (
auto *BlockCycleExit : BlockCycleExits) {
742 if (
Cycle->isReducible()) {
748 Cycle->getExitBlocks(Exits);
749 auto *Header =
Cycle->getHeader();
751 for (
const auto *Exit : Exits) {
754 DivDesc->CycleDivBlocks.insert(Exit);
765template <
typename ContextT>
768 : CyclePO(Context), DT(DT), CI(CI) {
772template <
typename ContextT>
777 return EmptyDivergenceDesc;
781 auto ItCached = CachedControlDivDescs.find(DivTermBlock);
782 if (ItCached != CachedControlDivDescs.end())
783 return *ItCached->second;
793 for (
const auto *BB : Blocks) {
794 Out << LS << CI.getSSAContext().print(BB);
801 dbgs() <<
"\nResult (" << CI.getSSAContext().print(DivTermBlock)
802 <<
"):\n JoinDivBlocks: " << PrintBlockSet(DivDesc->JoinDivBlocks)
803 <<
" CycleDivBlocks: " << PrintBlockSet(DivDesc->CycleDivBlocks)
808 CachedControlDivDescs.try_emplace(DivTermBlock, std::move(DivDesc));
809 assert(ItInserted.second);
810 return *ItInserted.first->second;
813template <
typename ContextT>
826 if (
I.isTerminator()) {
830 <<
Context.print(
I.getParent()) <<
"\n");
840template <
typename ContextT>
850template <
typename ContextT>
853 UniformOverrides.insert(&Instr);
856template <
typename ContextT>
875template <
typename ContextT>
876void GenericUniformityAnalysisImpl<ContextT>::analyzeCycleExitDivergence(
877 const CycleT &DefCycle) {
879 DefCycle.getExitBlocks(Exits);
880 for (
auto *Exit : Exits) {
881 for (
auto &Phi : Exit->phis()) {
882 if (usesValueFromCycle(Phi, DefCycle)) {
888 for (
auto *BB : DefCycle.blocks()) {
890 [&](BlockT *Exit) {
return DT.dominates(BB, Exit); }))
892 for (
auto &
II : *BB) {
893 propagateTemporalDivergence(
II, DefCycle);
898template <
typename ContextT>
899void GenericUniformityAnalysisImpl<ContextT>::propagateCycleExitDivergence(
900 const BlockT &DivExit,
const CycleT &InnerDivCycle) {
903 auto *DivCycle = &InnerDivCycle;
904 auto *OuterDivCycle = DivCycle;
905 auto *ExitLevelCycle = CI.getCycle(&DivExit);
906 const unsigned CycleExitDepth =
907 ExitLevelCycle ? ExitLevelCycle->getDepth() : 0;
910 while (DivCycle && DivCycle->getDepth() > CycleExitDepth) {
912 <<
Context.print(DivCycle->getHeader()) <<
"\n");
913 OuterDivCycle = DivCycle;
914 DivCycle = DivCycle->getParentCycle();
917 <<
Context.print(OuterDivCycle->getHeader()) <<
"\n");
919 if (!DivergentExitCycles.insert(OuterDivCycle).second)
924 for (
const auto *
C : AssumedDivergent) {
925 if (
C->contains(OuterDivCycle))
929 analyzeCycleExitDivergence(*OuterDivCycle);
932template <
typename ContextT>
933void GenericUniformityAnalysisImpl<ContextT>::taintAndPushAllDefs(
936 for (
const auto &
I :
instrs(BB)) {
940 if (
I.isTerminator())
948template <
typename ContextT>
949void GenericUniformityAnalysisImpl<ContextT>::taintAndPushPhiNodes(
950 const BlockT &JoinBlock) {
953 for (
const auto &Phi : JoinBlock.phis()) {
961 if (ContextT::isConstantOrUndefValuePhi(Phi))
970template <
typename CycleT>
974 [Candidate](CycleT *
C) {
return C->contains(Candidate); }))
976 Cycles.push_back(Candidate);
985template <
typename CycleT,
typename BlockT>
987 const BlockT *DivTermBlock,
988 const BlockT *JoinBlock) {
992 if (
Cycle->contains(DivTermBlock))
995 const auto *OriginalCycle =
Cycle;
996 const auto *Parent =
Cycle->getParentCycle();
997 while (Parent && !Parent->contains(DivTermBlock)) {
999 Parent =
Cycle->getParentCycle();
1005 (void)OriginalCycle;
1008 if (
Cycle->isReducible()) {
1013 LLVM_DEBUG(
dbgs() <<
"cycle made divergent by external branch\n");
1021template <
typename ContextT,
typename CycleT,
typename BlockT,
1022 typename DominatorTreeT>
1023static const CycleT *
1025 const BlockT *JoinBlock,
const DominatorTreeT &DT,
1026 ContextT &Context) {
1028 <<
" for internal branch " << Context.print(DivTermBlock)
1030 if (DT.properlyDominates(DivTermBlock, JoinBlock))
1035 while (
Cycle && !
Cycle->contains(DivTermBlock)) {
1041 if (DT.properlyDominates(
Cycle->getHeader(), JoinBlock))
1045 <<
" does not dominate join\n");
1047 const auto *Parent =
Cycle->getParentCycle();
1048 while (Parent && !DT.properlyDominates(Parent->getHeader(), JoinBlock)) {
1049 LLVM_DEBUG(
dbgs() <<
" header " << Context.print(Parent->getHeader())
1050 <<
" does not dominate join\n");
1052 Parent = Parent->getParentCycle();
1055 LLVM_DEBUG(
dbgs() <<
" cycle made divergent by internal branch\n");
1059template <
typename ContextT,
typename CycleT,
typename BlockT,
1060 typename DominatorTreeT>
1061static const CycleT *
1063 const BlockT *JoinBlock,
const DominatorTreeT &DT,
1064 ContextT &Context) {
1080template <
typename ContextT>
1081bool GenericUniformityAnalysisImpl<ContextT>::isTemporalDivergent(
1082 const BlockT &ObservingBlock,
const InstructionT &Def)
const {
1083 const BlockT *DefBlock = Def.getParent();
1084 for (
const CycleT *
Cycle = CI.getCycle(DefBlock);
1087 if (DivergentExitCycles.contains(
Cycle)) {
1094template <
typename ContextT>
1097 const auto *DivTermBlock = Term.getParent();
1103 if (!DT.isReachableFromEntry(DivTermBlock))
1106 const auto &DivDesc = SDA.getJoinBlocks(DivTermBlock);
1110 for (
const auto *JoinBlock : DivDesc.JoinDivBlocks) {
1111 const auto *
Cycle =
CI.getCycle(JoinBlock);
1120 taintAndPushPhiNodes(*JoinBlock);
1126 return A->getDepth() >
B->getDepth();
1134 for (
auto *
C : DivCycles) {
1138 for (
const BlockT *BB :
C->blocks()) {
1139 taintAndPushAllDefs(*BB);
1143 const auto *BranchCycle =
CI.getCycle(DivTermBlock);
1144 assert(DivDesc.CycleDivBlocks.empty() || BranchCycle);
1145 for (
const auto *DivExitBlock : DivDesc.CycleDivBlocks) {
1146 propagateCycleExitDivergence(*DivExitBlock, *BranchCycle);
1150template <
typename ContextT>
1162 if (
I->isTerminator()) {
1173template <
typename ContextT>
1180template <
typename ContextT>
1183 return UniformOverrides.contains(&Instr);
1186template <
typename ContextT>
1192template <
typename ContextT>
1196 DA.reset(
new ImplT{DT, CI,
TTI});
1199template <
typename ContextT>
1204 constexpr bool IsMIR = std::is_same<InstructionT, MachineInstr>::value;
1205 std::string NewLine = IsMIR ?
"" :
"\n";
1207 bool FoundDivergence =
false;
1211 if (!AssumedDivergent.empty()) {
1212 FoundDivergence =
true;
1213 OS <<
"CYCLES ASSUMED DIVERGENT:\n";
1219 if (!DivergentExitCycles.empty()) {
1220 FoundDivergence =
true;
1221 OS <<
"CYCLES WITH DIVERGENT EXIT:\n";
1228 FoundDivergence =
true;
1229 OS <<
"\nTEMPORAL DIVERGENCE LIST:\n";
1232 OS <<
"Value :" <<
Context.print(Val) << NewLine
1233 <<
"Used by :" <<
Context.print(UseInst) << NewLine
1234 <<
"Outside cycle :" <<
Cycle->print(
Context) <<
"\n\n";
1241 OS <<
"DEFINITIONS\n";
1244 for (
auto Value : Defs) {
1246 FoundDivergence =
true;
1247 OS <<
" DIVERGENT: ";
1254 OS <<
"TERMINATORS\n";
1258 if (DivergentTerminators)
1259 FoundDivergence =
true;
1260 for (
auto *
T : Terms) {
1261 if (DivergentTerminators)
1262 OS <<
" DIVERGENT: ";
1268 OS <<
"END BLOCK\n";
1271 if (!FoundDivergence)
1272 OS <<
"ALL VALUES UNIFORM\n";
1275template <
typename ContextT>
1279 return make_range(DA->TemporalDivergenceList.begin(),
1280 DA->TemporalDivergenceList.end());
1283template <
typename ContextT>
1284const typename ContextT::FunctionT &
1286 return DA->getFunction();
1292template <
typename ContextT>
1294 return DA && DA->isDivergent(V);
1297template <
typename ContextT>
1300 assert(
I->isTerminator() &&
"Expected a terminator instruction!");
1301 return DA && DA->isDivergentTerminator(*
I);
1304template <
typename ContextT>
1306 return DA && DA->isDivergentUse(U);
1309template <
typename ContextT>
1311 return DA && DA->hasDivergentTerminator(
B);
1315template <
typename ContextT>
1318 Out <<
" Uniformity analysis not computed (no branch divergence).\n";
1324template <
typename ContextT>
1325void llvm::ModifiedPostOrder<ContextT>::computeStackPO(
1329 while (!Stack.empty()) {
1330 auto *NextBB = Stack.back();
1331 if (Finalized.
count(NextBB)) {
1335 LLVM_DEBUG(
dbgs() <<
" visiting " << CI.getSSAContext().print(NextBB)
1337 auto *NestedCycle = CI.getCycle(NextBB);
1340 while (NestedCycle->getParentCycle() !=
Cycle)
1341 NestedCycle = NestedCycle->getParentCycle();
1344 NestedCycle->getExitBlocks(NestedExits);
1345 bool PushedNodes =
false;
1346 for (
auto *NestedExitBB : NestedExits) {
1348 << CI.getSSAContext().print(NestedExitBB) <<
"\n");
1351 if (Finalized.
count(NestedExitBB))
1354 Stack.push_back(NestedExitBB);
1356 << CI.getSSAContext().print(NestedExitBB) <<
"\n");
1361 computeCyclePO(CI, NestedCycle, Finalized);
1368 bool PushedNodes =
false;
1371 << CI.getSSAContext().print(SuccBB) <<
"\n");
1374 if (Finalized.
count(SuccBB))
1377 Stack.push_back(SuccBB);
1378 LLVM_DEBUG(
dbgs() <<
" pushed succ: " << CI.getSSAContext().print(SuccBB)
1384 << CI.getSSAContext().print(NextBB) <<
"\n");
1386 Finalized.
insert(NextBB);
1387 appendBlock(*NextBB);
1393template <
typename ContextT>
1394void ModifiedPostOrder<ContextT>::computeCyclePO(
1395 const CycleInfoT &CI,
const CycleT *
Cycle,
1399 auto *CycleHeader =
Cycle->getHeader();
1402 << CI.getSSAContext().print(CycleHeader) <<
"\n");
1403 assert(!Finalized.count(CycleHeader));
1404 Finalized.insert(CycleHeader);
1408 << CI.getSSAContext().print(CycleHeader) <<
"\n");
1409 appendBlock(*CycleHeader,
Cycle->isReducible());
1413 LLVM_DEBUG(
dbgs() <<
" examine succ: " << CI.getSSAContext().print(BB)
1415 if (!
Cycle->contains(BB))
1417 if (BB == CycleHeader)
1419 if (!Finalized.count(BB)) {
1420 LLVM_DEBUG(
dbgs() <<
" pushed succ: " << CI.getSSAContext().print(BB)
1422 Stack.push_back(BB);
1427 computeStackPO(Stack, CI,
Cycle, Finalized);
1433template <
typename ContextT>
1437 auto *
F = CI.getFunction();
1439 Stack.push_back(&
F->front());
1440 computeStackPO(Stack, CI,
nullptr, Finalized);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file defines the DenseSet and SmallDenseSet classes.
uint64_t IntrinsicInst * II
This file defines the SmallPtrSet class.
This file defines the SparseBitVector class.
Implements a dense probed hash-table based set.
Compute divergence starting with a divergent branch.
typename SyncDependenceAnalysisT::BlockLabelMap BlockLabelMapT
const ModifiedPO & CyclePOT
GenericSyncDependenceAnalysis< ContextT > SyncDependenceAnalysisT
typename ContextT::DominatorTreeT DominatorTreeT
bool computeJoin(const BlockT &SuccBlock, const BlockT &PushedLabel)
const BlockT & DivTermBlock
std::unique_ptr< DivergenceDescriptorT > DivDesc
void printDefs(raw_ostream &Out)
typename ContextT::FunctionT FunctionT
GenericCycleInfo< ContextT > CycleInfoT
const DominatorTreeT & DT
ModifiedPostOrder< ContextT > ModifiedPO
std::unique_ptr< DivergenceDescriptorT > computeJoinPoints()
BlockLabelMapT & BlockLabels
SparseBitVector FreshLabels
bool visitCycleExitEdge(const BlockT &ExitBlock, const BlockT &Label)
typename ContextT::ValueRefT ValueRefT
typename ContextT::BlockT BlockT
typename CycleInfoT::CycleT CycleT
DivergencePropagator(const ModifiedPO &CyclePOT, const DominatorTreeT &DT, const CycleInfoT &CI, const BlockT &DivTermBlock)
bool visitEdge(const BlockT &SuccBlock, const BlockT &Label)
typename SyncDependenceAnalysisT::DivergenceDescriptor DivergenceDescriptorT
Cycle information for a function.
bool contains(const BlockT *Block) const
Return whether Block is contained in the cycle.
const GenericCycle * getParentCycle() const
Locate join blocks for disjoint paths starting at a divergent branch.
GenericSyncDependenceAnalysis(const ContextT &Context, const DominatorTreeT &DT, const CycleInfoT &CI)
ModifiedPostOrder< ContextT > ModifiedPO
DivergencePropagator< ContextT > DivergencePropagatorT
DenseMap< const BlockT *, const BlockT * > BlockLabelMap
SmallPtrSet< const BlockT *, 4 > ConstBlockSet
typename ContextT::DominatorTreeT DominatorTreeT
GenericCycleInfo< ContextT > CycleInfoT
typename ContextT::FunctionT FunctionT
typename ContextT::InstructionT InstructionT
typename ContextT::BlockT BlockT
typename ContextT::ValueRefT ValueRefT
typename CycleInfoT::CycleT CycleT
const DivergenceDescriptor & getJoinBlocks(const BlockT *DivTermBlock)
Computes divergent join points and cycle exits caused by branch divergence in Term.
A helper class to return the specified delimiter string after the first invocation of operator String...
Construct a specially modified post-order traversal of cycles.
typename ContextT::FunctionT FunctionT
typename CycleInfoT::CycleT CycleT
bool isReducibleCycleHeader(const BlockT *BB) const
void appendBlock(const BlockT &BB, bool IsReducibleCycleHeader=false)
const BlockT * operator[](size_t Idx) const
ModifiedPostOrder(const ContextT &C)
unsigned count(BlockT *BB) const
void compute(const CycleInfoT &CI)
Generically compute the modified post order.
GenericCycleInfo< ContextT > CycleInfoT
unsigned getIndex(const BlockT *BB) const
typename std::vector< BlockT * >::const_iterator const_iterator
typename ContextT::DominatorTreeT DominatorTreeT
typename ContextT::BlockT BlockT
Simple wrapper around std::function<void(raw_ostream&)>.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
LLVM Value Representation.
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
@ C
The default llvm calling convention, compatible with C.
This is an optimization pass for GlobalISel generic memory operations.
static const CycleT * getIntDivCycle(const CycleT *Cycle, const BlockT *DivTermBlock, const BlockT *JoinBlock, const DominatorTreeT &DT, ContextT &Context)
Return the outermost cycle made divergent by branch inside it.
static const CycleT * getExtDivCycle(const CycleT *Cycle, const BlockT *DivTermBlock, const BlockT *JoinBlock)
Return the outermost cycle made divergent by branch outside it.
auto successors(const MachineBasicBlock *BB)
static bool insertIfNotContained(SmallVector< CycleT * > &Cycles, CycleT *Candidate)
Add Candidate to Cycles if it is not already contained in Cycles.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
auto succ_size(const MachineBasicBlock *BB)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
auto instrs(const MachineBasicBlock &BB)
static const CycleT * getOutermostDivergentCycle(const CycleT *Cycle, const BlockT *DivTermBlock, const BlockT *JoinBlock, const DominatorTreeT &DT, ContextT &Context)
Information discovered by the sync dependence analysis for each divergent branch.
ConstBlockSet CycleDivBlocks
ConstBlockSet JoinDivBlocks
BlockLabelMap BlockLabels