LLVM 17.0.0git
GenericUniformityImpl.h
Go to the documentation of this file.
1//===- GenericUniformityImpl.h -----------------------*- C++ -*------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This template implementation resides in a separate file so that it
10// does not get injected into every .cpp file that includes the
11// generic header.
12//
13// DO NOT INCLUDE THIS FILE WHEN MERELY USING UNIFORMITYINFO.
14//
15// This file should only be included by files that implement a
16// specialization of the relvant templates. Currently these are:
17// - UniformityAnalysis.cpp
18//
19// Note: The DEBUG_TYPE macro should be defined before using this
20// file so that any use of LLVM_DEBUG is associated with the
21// including file rather than this file.
22//
23//===----------------------------------------------------------------------===//
24///
25/// \file
26/// \brief Implementation of uniformity analysis.
27///
28/// The algorithm is a fixed point iteration that starts with the assumption
29/// that all control flow and all values are uniform. Starting from sources of
30/// divergence (whose discovery must be implemented by a CFG- or even
31/// target-specific derived class), divergence of values is propagated from
32/// definition to uses in a straight-forward way. The main complexity lies in
33/// the propagation of the impact of divergent control flow on the divergence of
34/// values (sync dependencies).
35///
36//===----------------------------------------------------------------------===//
37
38#ifndef LLVM_ADT_GENERICUNIFORMITYIMPL_H
39#define LLVM_ADT_GENERICUNIFORMITYIMPL_H
40
42
48#include <set>
49
50#define DEBUG_TYPE "uniformity"
51
52using namespace llvm;
53
54namespace llvm {
56template <typename Range> auto unique(Range &&R) {
57 return std::unique(adl_begin(R), adl_end(R));
58}
59
60/// Construct a specially modified post-order traversal of cycles.
61///
62/// The ModifiedPO is contructed using a virtually modified CFG as follows:
63///
64/// 1. The successors of pre-entry nodes (predecessors of an cycle
65/// entry that are outside the cycle) are replaced by the
66/// successors of the successors of the header.
67/// 2. Successors of the cycle header are replaced by the exit blocks
68/// of the cycle.
69///
70/// Effectively, we produce a depth-first numbering with the following
71/// properties:
72///
73/// 1. Nodes after a cycle are numbered earlier than the cycle header.
74/// 2. The header is numbered earlier than the nodes in the cycle.
75/// 3. The numbering of the nodes within the cycle forms an interval
76/// starting with the header.
77///
78/// Effectively, the virtual modification arranges the nodes in a
79/// cycle as a DAG with the header as the sole leaf, and successors of
80/// the header as the roots. A reverse traversal of this numbering has
81/// the following invariant on the unmodified original CFG:
82///
83/// Each node is visited after all its predecessors, except if that
84/// predecessor is the cycle header.
85///
86template <typename ContextT> class ModifiedPostOrder {
87public:
88 using BlockT = typename ContextT::BlockT;
89 using FunctionT = typename ContextT::FunctionT;
90 using DominatorTreeT = typename ContextT::DominatorTreeT;
91
93 using CycleT = typename CycleInfoT::CycleT;
94 using const_iterator = typename std::vector<BlockT *>::const_iterator;
95
96 ModifiedPostOrder(const ContextT &C) : Context(C) {}
97
98 bool empty() const { return m_order.empty(); }
99 size_t size() const { return m_order.size(); }
100
101 void clear() { m_order.clear(); }
102 void compute(const CycleInfoT &CI);
103
104 unsigned count(BlockT *BB) const { return POIndex.count(BB); }
105 const BlockT *operator[](size_t idx) const { return m_order[idx]; }
106
107 void appendBlock(const BlockT &BB, bool isReducibleCycleHeader = false) {
108 POIndex[&BB] = m_order.size();
109 m_order.push_back(&BB);
110 LLVM_DEBUG(dbgs() << "ModifiedPO(" << POIndex[&BB]
111 << "): " << Context.print(&BB) << "\n");
113 ReducibleCycleHeaders.insert(&BB);
114 }
115
116 unsigned getIndex(const BlockT *BB) const {
117 assert(POIndex.count(BB));
118 return POIndex.lookup(BB);
119 }
120
121 bool isReducibleCycleHeader(const BlockT *BB) const {
122 return ReducibleCycleHeaders.contains(BB);
123 }
124
125private:
128 SmallPtrSet<const BlockT *, 32> ReducibleCycleHeaders;
129 const ContextT &Context;
130
131 void computeCyclePO(const CycleInfoT &CI, const CycleT *Cycle,
132 SmallPtrSetImpl<BlockT *> &Finalized);
133
134 void computeStackPO(SmallVectorImpl<BlockT *> &Stack, const CycleInfoT &CI,
135 const CycleT *Cycle,
136 SmallPtrSetImpl<BlockT *> &Finalized);
137};
138
139template <typename> class DivergencePropagator;
140
141/// \class GenericSyncDependenceAnalysis
142///
143/// \brief Locate join blocks for disjoint paths starting at a divergent branch.
144///
145/// An analysis per divergent branch that returns the set of basic
146/// blocks whose phi nodes become divergent due to divergent control.
147/// These are the blocks that are reachable by two disjoint paths from
148/// the branch, or cycle exits reachable along a path that is disjoint
149/// from a path to the cycle latch.
150
151// --- Above line is not a doxygen comment; intentionally left blank ---
152//
153// Originally implemented in SyncDependenceAnalysis.cpp for DivergenceAnalysis.
154//
155// The SyncDependenceAnalysis is used in the UniformityAnalysis to model
156// control-induced divergence in phi nodes.
157//
158// -- Reference --
159// The algorithm is an extension of Section 5 of
160//
161// An abstract interpretation for SPMD divergence
162// on reducible control flow graphs.
163// Julian Rosemann, Simon Moll and Sebastian Hack
164// POPL '21
165//
166//
167// -- Sync dependence --
168// Sync dependence characterizes the control flow aspect of the
169// propagation of branch divergence. For example,
170//
171// %cond = icmp slt i32 %tid, 10
172// br i1 %cond, label %then, label %else
173// then:
174// br label %merge
175// else:
176// br label %merge
177// merge:
178// %a = phi i32 [ 0, %then ], [ 1, %else ]
179//
180// Suppose %tid holds the thread ID. Although %a is not data dependent on %tid
181// because %tid is not on its use-def chains, %a is sync dependent on %tid
182// because the branch "br i1 %cond" depends on %tid and affects which value %a
183// is assigned to.
184//
185//
186// -- Reduction to SSA construction --
187// There are two disjoint paths from A to X, if a certain variant of SSA
188// construction places a phi node in X under the following set-up scheme.
189//
190// This variant of SSA construction ignores incoming undef values.
191// That is paths from the entry without a definition do not result in
192// phi nodes.
193//
194// entry
195// / \
196// A \
197// / \ Y
198// B C /
199// \ / \ /
200// D E
201// \ /
202// F
203//
204// Assume that A contains a divergent branch. We are interested
205// in the set of all blocks where each block is reachable from A
206// via two disjoint paths. This would be the set {D, F} in this
207// case.
208// To generally reduce this query to SSA construction we introduce
209// a virtual variable x and assign to x different values in each
210// successor block of A.
211//
212// entry
213// / \
214// A \
215// / \ Y
216// x = 0 x = 1 /
217// \ / \ /
218// D E
219// \ /
220// F
221//
222// Our flavor of SSA construction for x will construct the following
223//
224// entry
225// / \
226// A \
227// / \ Y
228// x0 = 0 x1 = 1 /
229// \ / \ /
230// x2 = phi E
231// \ /
232// x3 = phi
233//
234// The blocks D and F contain phi nodes and are thus each reachable
235// by two disjoins paths from A.
236//
237// -- Remarks --
238// * In case of cycle exits we need to check for temporal divergence.
239// To this end, we check whether the definition of x differs between the
240// cycle exit and the cycle header (_after_ SSA construction).
241//
242// * In the presence of irreducible control flow, the fixed point is
243// reached only after multiple iterations. This is because labels
244// reaching the header of a cycle must be repropagated through the
245// cycle. This is true even in a reducible cycle, since the labels
246// may have been produced by a nested irreducible cycle.
247//
248// * Note that SyncDependenceAnalysis is not concerned with the points
249// of convergence in an irreducible cycle. It's only purpose is to
250// identify join blocks. The "diverged entry" criterion is
251// separately applied on join blocks to determine if an entire
252// irreducible cycle is assumed to be divergent.
253//
254// * Relevant related work:
255// A simple algorithm for global data flow analysis problems.
256// Matthew S. Hecht and Jeffrey D. Ullman.
257// SIAM Journal on Computing, 4(4):519–532, December 1975.
258//
259template <typename ContextT> class GenericSyncDependenceAnalysis {
260public:
261 using BlockT = typename ContextT::BlockT;
262 using DominatorTreeT = typename ContextT::DominatorTreeT;
263 using FunctionT = typename ContextT::FunctionT;
264 using ValueRefT = typename ContextT::ValueRefT;
265 using InstructionT = typename ContextT::InstructionT;
266
268 using CycleT = typename CycleInfoT::CycleT;
269
272
273 // * if BlockLabels[B] == C then C is the dominating definition at
274 // block B
275 // * if BlockLabels[B] == nullptr then we haven't seen B yet
276 // * if BlockLabels[B] == B then:
277 // - B is a join point of disjoint paths from X, or,
278 // - B is an immediate successor of X (initial value), or,
279 // - B is X
281
282 /// Information discovered by the sync dependence analysis for each
283 /// divergent branch.
285 // Join points of diverged paths.
287 // Divergent cycle exits
289 // Labels assigned to blocks on diverged paths.
291 };
292
294
295 GenericSyncDependenceAnalysis(const ContextT &Context,
296 const DominatorTreeT &DT, const CycleInfoT &CI);
297
298 /// \brief Computes divergent join points and cycle exits caused by branch
299 /// divergence in \p Term.
300 ///
301 /// This returns a pair of sets:
302 /// * The set of blocks which are reachable by disjoint paths from
303 /// \p Term.
304 /// * The set also contains cycle exits if there two disjoint paths:
305 /// one from \p Term to the cycle exit and another from \p Term to
306 /// the cycle header.
307 const DivergenceDescriptor &getJoinBlocks(const BlockT *DivTermBlock);
308
309private:
310 static DivergenceDescriptor EmptyDivergenceDesc;
311
312 ModifiedPO CyclePO;
313
314 const DominatorTreeT &DT;
315 const CycleInfoT &CI;
316
318 CachedControlDivDescs;
319};
320
321/// \brief Analysis that identifies uniform values in a data-parallel
322/// execution.
323///
324/// This analysis propagates divergence in a data-parallel context
325/// from sources of divergence to all users. It can be instantiated
326/// for an IR that provides a suitable SSAContext.
327template <typename ContextT> class GenericUniformityAnalysisImpl {
328public:
329 using BlockT = typename ContextT::BlockT;
330 using FunctionT = typename ContextT::FunctionT;
331 using ValueRefT = typename ContextT::ValueRefT;
332 using ConstValueRefT = typename ContextT::ConstValueRefT;
333 using UseT = typename ContextT::UseT;
334 using InstructionT = typename ContextT::InstructionT;
335 using DominatorTreeT = typename ContextT::DominatorTreeT;
336
338 using CycleT = typename CycleInfoT::CycleT;
339
342 typename SyncDependenceAnalysisT::DivergenceDescriptor;
343 using BlockLabelMapT = typename SyncDependenceAnalysisT::BlockLabelMap;
344
346 const CycleInfoT &CI,
348 : Context(CI.getSSAContext()), F(F), CI(CI), TTI(TTI), DT(DT),
349 SDA(Context, DT, CI) {}
350
352
353 const FunctionT &getFunction() const { return F; }
354
355 /// \brief Mark \p UniVal as a value that is always uniform.
356 void addUniformOverride(const InstructionT &Instr);
357
358 /// \brief Mark \p DivVal as a value that is always divergent.
359 /// \returns Whether the tracked divergence state of \p DivVal changed.
360 bool markDivergent(const InstructionT &I);
361 bool markDivergent(ConstValueRefT DivVal);
363 bool AllDefsDivergent = true);
364
365 /// \brief Propagate divergence to all instructions in the region.
366 /// Divergence is seeded by calls to \p markDivergent.
367 void compute();
368
369 /// \brief Whether any value was marked or analyzed to be divergent.
370 bool hasDivergence() const { return !DivergentValues.empty(); }
371
372 /// \brief Whether \p Val will always return a uniform value regardless of its
373 /// operands
374 bool isAlwaysUniform(const InstructionT &Instr) const;
375
376 bool hasDivergentDefs(const InstructionT &I) const;
377
378 bool isDivergent(const InstructionT &I) const {
379 if (I.isTerminator()) {
380 return DivergentTermBlocks.contains(I.getParent());
381 }
382 return hasDivergentDefs(I);
383 };
384
385 /// \brief Whether \p Val is divergent at its definition.
386 bool isDivergent(ConstValueRefT V) const { return DivergentValues.count(V); }
387
388 bool isDivergentUse(const UseT &U) const;
389
390 bool hasDivergentTerminator(const BlockT &B) const {
392 }
393
394 void print(raw_ostream &out) const;
395
396protected:
397 /// \brief Value/block pair representing a single phi input.
398 struct PhiInput {
401
404 };
405
406 const ContextT &Context;
407 const FunctionT &F;
409 const TargetTransformInfo *TTI = nullptr;
410
411 // Detected/marked divergent values.
412 std::set<ConstValueRefT> DivergentValues;
414
415 // Internal worklist for divergence propagation.
416 std::vector<const InstructionT *> Worklist;
417
418 /// \brief Mark \p Term as divergent and push all Instructions that become
419 /// divergent as a result on the worklist.
420 void analyzeControlDivergence(const InstructionT &Term);
421
422private:
423 const DominatorTreeT &DT;
424
425 // Recognized cycles with divergent exits.
426 SmallPtrSet<const CycleT *, 16> DivergentExitCycles;
427
428 // Cycles assumed to be divergent.
429 //
430 // We don't use a set here because every insertion needs an explicit
431 // traversal of all existing members.
432 SmallVector<const CycleT *> AssumedDivergent;
433
434 // The SDA links divergent branches to divergent control-flow joins.
436
437 // Set of known-uniform values.
439
440 /// \brief Mark all nodes in \p JoinBlock as divergent and push them on
441 /// the worklist.
442 void taintAndPushAllDefs(const BlockT &JoinBlock);
443
444 /// \brief Mark all phi nodes in \p JoinBlock as divergent and push them on
445 /// the worklist.
446 void taintAndPushPhiNodes(const BlockT &JoinBlock);
447
448 /// \brief Identify all Instructions that become divergent because \p DivExit
449 /// is a divergent cycle exit of \p DivCycle. Mark those instructions as
450 /// divergent and push them on the worklist.
451 void propagateCycleExitDivergence(const BlockT &DivExit,
452 const CycleT &DivCycle);
453
454 /// \brief Internal implementation function for propagateCycleExitDivergence.
455 void analyzeCycleExitDivergence(const CycleT &OuterDivCycle);
456
457 /// \brief Mark all instruction as divergent that use a value defined in \p
458 /// OuterDivCycle. Push their users on the worklist.
459 void analyzeTemporalDivergence(const InstructionT &I,
460 const CycleT &OuterDivCycle);
461
462 /// \brief Push all users of \p Val (in the region) to the worklist.
463 void pushUsers(const InstructionT &I);
464 void pushUsers(ConstValueRefT V);
465
466 bool usesValueFromCycle(const InstructionT &I, const CycleT &DefCycle) const;
467
468 /// \brief Whether \p Def is divergent when read in \p ObservingBlock.
469 bool isTemporalDivergent(const BlockT &ObservingBlock,
470 const InstructionT &Def) const;
471};
472
473template <typename ImplT>
475 delete Impl;
476}
477
478/// Compute divergence starting with a divergent branch.
479template <typename ContextT> class DivergencePropagator {
480public:
481 using BlockT = typename ContextT::BlockT;
482 using DominatorTreeT = typename ContextT::DominatorTreeT;
483 using FunctionT = typename ContextT::FunctionT;
484 using ValueRefT = typename ContextT::ValueRefT;
485
487 using CycleT = typename CycleInfoT::CycleT;
488
492 typename SyncDependenceAnalysisT::DivergenceDescriptor;
493 using BlockLabelMapT = typename SyncDependenceAnalysisT::BlockLabelMap;
494
499 const ContextT &Context;
500
501 // Track blocks that receive a new label. Every time we relabel a
502 // cycle header, we another pass over the modified post-order in
503 // order to propagate the header label. The bit vector also allows
504 // us to skip labels that have not changed.
506
507 // divergent join and cycle exit descriptor.
508 std::unique_ptr<DivergenceDescriptorT> DivDesc;
510
512 const CycleInfoT &CI, const BlockT &DivTermBlock)
514 Context(CI.getSSAContext()), DivDesc(new DivergenceDescriptorT),
516
518 Out << "Propagator::BlockLabels {\n";
519 for (int BlockIdx = (int)CyclePOT.size() - 1; BlockIdx >= 0; --BlockIdx) {
520 const auto *Block = CyclePOT[BlockIdx];
521 const auto *Label = BlockLabels[Block];
522 Out << Context.print(Block) << "(" << BlockIdx << ") : ";
523 if (!Label) {
524 Out << "<null>\n";
525 } else {
526 Out << Context.print(Label) << "\n";
527 }
528 }
529 Out << "}\n";
530 }
531
532 // Push a definition (\p PushedLabel) to \p SuccBlock and return whether this
533 // causes a divergent join.
534 bool computeJoin(const BlockT &SuccBlock, const BlockT &PushedLabel) {
535 const auto *OldLabel = BlockLabels[&SuccBlock];
536
537 LLVM_DEBUG(dbgs() << "labeling " << Context.print(&SuccBlock) << ":\n"
538 << "\tpushed label: " << Context.print(&PushedLabel)
539 << "\n"
540 << "\told label: " << Context.print(OldLabel) << "\n");
541
542 // Early exit if there is no change in the label.
543 if (OldLabel == &PushedLabel)
544 return false;
545
546 if (OldLabel != &SuccBlock) {
547 auto SuccIdx = CyclePOT.getIndex(&SuccBlock);
548 // Assigning a new label, mark this in FreshLabels.
549 LLVM_DEBUG(dbgs() << "\tfresh label: " << SuccIdx << "\n");
550 FreshLabels.set(SuccIdx);
551 }
552
553 // This is not a join if the succ was previously unlabeled.
554 if (!OldLabel) {
555 LLVM_DEBUG(dbgs() << "\tnew label: " << Context.print(&PushedLabel)
556 << "\n");
557 BlockLabels[&SuccBlock] = &PushedLabel;
558 return false;
559 }
560
561 // This is a new join. Label the join block as itself, and not as
562 // the pushed label.
563 LLVM_DEBUG(dbgs() << "\tnew label: " << Context.print(&SuccBlock) << "\n");
564 BlockLabels[&SuccBlock] = &SuccBlock;
565
566 return true;
567 }
568
569 // visiting a virtual cycle exit edge from the cycle header --> temporal
570 // divergence on join
571 bool visitCycleExitEdge(const BlockT &ExitBlock, const BlockT &Label) {
572 if (!computeJoin(ExitBlock, Label))
573 return false;
574
575 // Identified a divergent cycle exit
576 DivDesc->CycleDivBlocks.insert(&ExitBlock);
577 LLVM_DEBUG(dbgs() << "\tDivergent cycle exit: " << Context.print(&ExitBlock)
578 << "\n");
579 return true;
580 }
581
582 // process \p SuccBlock with reaching definition \p Label
583 bool visitEdge(const BlockT &SuccBlock, const BlockT &Label) {
584 if (!computeJoin(SuccBlock, Label))
585 return false;
586
587 // Divergent, disjoint paths join.
588 DivDesc->JoinDivBlocks.insert(&SuccBlock);
589 LLVM_DEBUG(dbgs() << "\tDivergent join: " << Context.print(&SuccBlock)
590 << "\n");
591 return true;
592 }
593
594 std::unique_ptr<DivergenceDescriptorT> computeJoinPoints() {
596
597 LLVM_DEBUG(dbgs() << "SDA:computeJoinPoints: "
598 << Context.print(&DivTermBlock) << "\n");
599
600 // Early stopping criterion
601 int FloorIdx = CyclePOT.size() - 1;
602 const BlockT *FloorLabel = nullptr;
603 int DivTermIdx = CyclePOT.getIndex(&DivTermBlock);
604
605 // Bootstrap with branch targets
606 auto const *DivTermCycle = CI.getCycle(&DivTermBlock);
607 for (const auto *SuccBlock : successors(&DivTermBlock)) {
608 if (DivTermCycle && !DivTermCycle->contains(SuccBlock)) {
609 // If DivTerm exits the cycle immediately, computeJoin() might
610 // not reach SuccBlock with a different label. We need to
611 // check for this exit now.
612 DivDesc->CycleDivBlocks.insert(SuccBlock);
613 LLVM_DEBUG(dbgs() << "\tImmediate divergent cycle exit: "
614 << Context.print(SuccBlock) << "\n");
615 }
616 auto SuccIdx = CyclePOT.getIndex(SuccBlock);
617 visitEdge(*SuccBlock, *SuccBlock);
618 FloorIdx = std::min<int>(FloorIdx, SuccIdx);
619 }
620
621 while (true) {
622 auto BlockIdx = FreshLabels.find_last();
623 if (BlockIdx == -1 || BlockIdx < FloorIdx)
624 break;
625
626 LLVM_DEBUG(dbgs() << "Current labels:\n"; printDefs(dbgs()));
627
628 FreshLabels.reset(BlockIdx);
629 if (BlockIdx == DivTermIdx) {
630 LLVM_DEBUG(dbgs() << "Skipping DivTermBlock\n");
631 continue;
632 }
633
634 const auto *Block = CyclePOT[BlockIdx];
635 LLVM_DEBUG(dbgs() << "visiting " << Context.print(Block) << " at index "
636 << BlockIdx << "\n");
637
638 const auto *Label = BlockLabels[Block];
639 assert(Label);
640
641 bool CausedJoin = false;
642 int LoweredFloorIdx = FloorIdx;
643
644 // If the current block is the header of a reducible cycle that
645 // contains the divergent branch, then the label should be
646 // propagated to the cycle exits. Such a header is the "last
647 // possible join" of any disjoint paths within this cycle. This
648 // prevents detection of spurious joins at the entries of any
649 // irreducible child cycles.
650 //
651 // This conclusion about the header is true for any choice of DFS:
652 //
653 // If some DFS has a reducible cycle C with header H, then for
654 // any other DFS, H is the header of a cycle C' that is a
655 // superset of C. For a divergent branch inside the subgraph
656 // C, any join node inside C is either H, or some node
657 // encountered without passing through H.
658 //
659 auto getReducibleParent = [&](const BlockT *Block) -> const CycleT * {
660 if (!CyclePOT.isReducibleCycleHeader(Block))
661 return nullptr;
662 const auto *BlockCycle = CI.getCycle(Block);
663 if (BlockCycle->contains(&DivTermBlock))
664 return BlockCycle;
665 return nullptr;
666 };
667
668 if (const auto *BlockCycle = getReducibleParent(Block)) {
669 SmallVector<BlockT *, 4> BlockCycleExits;
670 BlockCycle->getExitBlocks(BlockCycleExits);
671 for (auto *BlockCycleExit : BlockCycleExits) {
672 CausedJoin |= visitCycleExitEdge(*BlockCycleExit, *Label);
673 LoweredFloorIdx =
674 std::min<int>(LoweredFloorIdx, CyclePOT.getIndex(BlockCycleExit));
675 }
676 } else {
677 for (const auto *SuccBlock : successors(Block)) {
678 CausedJoin |= visitEdge(*SuccBlock, *Label);
679 LoweredFloorIdx =
680 std::min<int>(LoweredFloorIdx, CyclePOT.getIndex(SuccBlock));
681 }
682 }
683
684 // Floor update
685 if (CausedJoin) {
686 // 1. Different labels pushed to successors
687 FloorIdx = LoweredFloorIdx;
688 } else if (FloorLabel != Label) {
689 // 2. No join caused BUT we pushed a label that is different than the
690 // last pushed label
691 FloorIdx = LoweredFloorIdx;
692 FloorLabel = Label;
693 }
694 }
695
696 LLVM_DEBUG(dbgs() << "Final labeling:\n"; printDefs(dbgs()));
697
698 // Check every cycle containing DivTermBlock for exit divergence.
699 // A cycle has exit divergence if the label of an exit block does
700 // not match the label of its header.
701 for (const auto *Cycle = CI.getCycle(&DivTermBlock); Cycle;
703 if (Cycle->isReducible()) {
704 // The exit divergence of a reducible cycle is recorded while
705 // propagating labels.
706 continue;
707 }
709 Cycle->getExitBlocks(Exits);
710 auto *Header = Cycle->getHeader();
711 auto *HeaderLabel = BlockLabels[Header];
712 for (const auto *Exit : Exits) {
713 if (BlockLabels[Exit] != HeaderLabel) {
714 // Identified a divergent cycle exit
715 DivDesc->CycleDivBlocks.insert(Exit);
716 LLVM_DEBUG(dbgs() << "\tDivergent cycle exit: " << Context.print(Exit)
717 << "\n");
718 }
719 }
720 }
721
722 return std::move(DivDesc);
723 }
724};
725
726template <typename ContextT>
729
730template <typename ContextT>
732 const ContextT &Context, const DominatorTreeT &DT, const CycleInfoT &CI)
733 : CyclePO(Context), DT(DT), CI(CI) {
734 CyclePO.compute(CI);
735}
736
737template <typename ContextT>
739 const BlockT *DivTermBlock) -> const DivergenceDescriptor & {
740 // trivial case
741 if (succ_size(DivTermBlock) <= 1) {
742 return EmptyDivergenceDesc;
743 }
744
745 // already available in cache?
746 auto ItCached = CachedControlDivDescs.find(DivTermBlock);
747 if (ItCached != CachedControlDivDescs.end())
748 return *ItCached->second;
749
750 // compute all join points
751 DivergencePropagatorT Propagator(CyclePO, DT, CI, *DivTermBlock);
752 auto DivDesc = Propagator.computeJoinPoints();
753
754 auto printBlockSet = [&](ConstBlockSet &Blocks) {
755 return Printable([&](raw_ostream &Out) {
756 Out << "[";
757 ListSeparator LS;
758 for (const auto *BB : Blocks) {
759 Out << LS << CI.getSSAContext().print(BB);
760 }
761 Out << "]\n";
762 });
763 };
764
766 dbgs() << "\nResult (" << CI.getSSAContext().print(DivTermBlock)
767 << "):\n JoinDivBlocks: " << printBlockSet(DivDesc->JoinDivBlocks)
768 << " CycleDivBlocks: " << printBlockSet(DivDesc->CycleDivBlocks)
769 << "\n");
770 (void)printBlockSet;
771
772 auto ItInserted =
773 CachedControlDivDescs.try_emplace(DivTermBlock, std::move(DivDesc));
774 assert(ItInserted.second);
775 return *ItInserted.first->second;
776}
777
778template <typename ContextT>
780 const InstructionT &I) {
781 if (I.isTerminator()) {
782 if (DivergentTermBlocks.insert(I.getParent()).second) {
783 LLVM_DEBUG(dbgs() << "marked divergent term block: "
784 << Context.print(I.getParent()) << "\n");
785 return true;
786 }
787 return false;
788 }
789
790 if (isAlwaysUniform(I))
791 return false;
792
793 return markDefsDivergent(I);
794}
795
796template <typename ContextT>
798 ConstValueRefT Val) {
799 if (DivergentValues.insert(Val).second) {
800 LLVM_DEBUG(dbgs() << "marked divergent: " << Context.print(Val) << "\n");
801 return true;
802 }
803 return false;
804}
805
806template <typename ContextT>
808 const InstructionT &Instr) {
809 UniformOverrides.insert(&Instr);
810}
811
812template <typename ContextT>
814 const InstructionT &I, const CycleT &OuterDivCycle) {
815 if (isDivergent(I))
816 return;
817
818 LLVM_DEBUG(dbgs() << "Analyze temporal divergence: " << Context.print(&I)
819 << "\n");
820 if (isAlwaysUniform(I))
821 return;
822
823 if (!usesValueFromCycle(I, OuterDivCycle))
824 return;
825
826 if (markDivergent(I))
827 Worklist.push_back(&I);
828}
829
830// Mark all external users of values defined inside \param
831// OuterDivCycle as divergent.
832//
833// This follows all live out edges wherever they may lead. Potential
834// users of values defined inside DivCycle could be anywhere in the
835// dominance region of DivCycle (including its fringes for phi nodes).
836// A cycle C dominates a block B iff every path from the entry block
837// to B must pass through a block contained in C. If C is a reducible
838// cycle (or natural loop), C dominates B iff the header of C
839// dominates B. But in general, we iteratively examine cycle cycle
840// exits and their successors.
841template <typename ContextT>
843 const CycleT &OuterDivCycle) {
844 // Set of blocks that are dominated by the cycle, i.e., each is only
845 // reachable from paths that pass through the cycle.
847
848 // The boundary of DomRegion, formed by blocks that are not
849 // dominated by the cycle.
850 SmallVector<BlockT *> DomFrontier;
851 OuterDivCycle.getExitBlocks(DomFrontier);
852
853 // Returns true if BB is dominated by the cycle.
854 auto isInDomRegion = [&](BlockT *BB) {
855 for (auto *P : predecessors(BB)) {
856 if (OuterDivCycle.contains(P))
857 continue;
858 if (DomRegion.count(P))
859 continue;
860 return false;
861 }
862 return true;
863 };
864
865 // Keep advancing the frontier along successor edges, while
866 // promoting blocks to DomRegion.
867 while (true) {
868 bool Promoted = false;
870 for (auto *W : DomFrontier) {
871 if (!isInDomRegion(W)) {
872 Temp.push_back(W);
873 continue;
874 }
875 DomRegion.insert(W);
876 Promoted = true;
877 for (auto *Succ : successors(W)) {
878 if (DomRegion.contains(Succ))
879 continue;
880 Temp.push_back(Succ);
881 }
882 }
883 if (!Promoted)
884 break;
885
886 // Restore the set property for the temporary vector
887 llvm::sort(Temp);
888 Temp.erase(std::unique(Temp.begin(), Temp.end()), Temp.end());
889
890 DomFrontier = Temp;
891 }
892
893 // At DomFrontier, only the PHI nodes are affected by temporal
894 // divergence.
895 for (const auto *UserBlock : DomFrontier) {
896 LLVM_DEBUG(dbgs() << "Analyze phis after cycle exit: "
897 << Context.print(UserBlock) << "\n");
898 for (const auto &Phi : UserBlock->phis()) {
899 LLVM_DEBUG(dbgs() << " " << Context.print(&Phi) << "\n");
900 analyzeTemporalDivergence(Phi, OuterDivCycle);
901 }
902 }
903
904 // All instructions inside the dominance region are affected by
905 // temporal divergence.
906 for (const auto *UserBlock : DomRegion) {
907 LLVM_DEBUG(dbgs() << "Analyze non-phi users after cycle exit: "
908 << Context.print(UserBlock) << "\n");
909 for (const auto &I : *UserBlock) {
910 LLVM_DEBUG(dbgs() << " " << Context.print(&I) << "\n");
911 analyzeTemporalDivergence(I, OuterDivCycle);
912 }
913 }
914}
915
916template <typename ContextT>
918 const BlockT &DivExit, const CycleT &InnerDivCycle) {
919 LLVM_DEBUG(dbgs() << "\tpropCycleExitDiv " << Context.print(&DivExit)
920 << "\n");
921 auto *DivCycle = &InnerDivCycle;
922 auto *OuterDivCycle = DivCycle;
923 auto *ExitLevelCycle = CI.getCycle(&DivExit);
924 const unsigned CycleExitDepth =
925 ExitLevelCycle ? ExitLevelCycle->getDepth() : 0;
926
927 // Find outer-most cycle that does not contain \p DivExit
928 while (DivCycle && DivCycle->getDepth() > CycleExitDepth) {
929 LLVM_DEBUG(dbgs() << " Found exiting cycle: "
930 << Context.print(DivCycle->getHeader()) << "\n");
931 OuterDivCycle = DivCycle;
932 DivCycle = DivCycle->getParentCycle();
933 }
934 LLVM_DEBUG(dbgs() << "\tOuter-most exiting cycle: "
935 << Context.print(OuterDivCycle->getHeader()) << "\n");
936
937 if (!DivergentExitCycles.insert(OuterDivCycle).second)
938 return;
939
940 // Exit divergence does not matter if the cycle itself is assumed to
941 // be divergent.
942 for (const auto *C : AssumedDivergent) {
943 if (C->contains(OuterDivCycle))
944 return;
945 }
946
947 analyzeCycleExitDivergence(*OuterDivCycle);
948}
949
950template <typename ContextT>
952 const BlockT &BB) {
953 LLVM_DEBUG(dbgs() << "taintAndPushAllDefs " << Context.print(&BB) << "\n");
954 for (const auto &I : instrs(BB)) {
955 // Terminators do not produce values; they are divergent only if
956 // the condition is divergent. That is handled when the divergent
957 // condition is placed in the worklist.
958 if (I.isTerminator())
959 break;
960
961 if (markDivergent(I))
962 Worklist.push_back(&I);
963 }
964}
965
966/// Mark divergent phi nodes in a join block
967template <typename ContextT>
969 const BlockT &JoinBlock) {
970 LLVM_DEBUG(dbgs() << "taintAndPushPhiNodes in " << Context.print(&JoinBlock)
971 << "\n");
972 for (const auto &Phi : JoinBlock.phis()) {
973 // FIXME: The non-undef value is not constant per se; it just happens to be
974 // uniform and may not dominate this PHI. So assuming that the same value
975 // reaches along all incoming edges may itself be undefined behaviour. This
976 // particular interpretation of the undef value was added to
977 // DivergenceAnalysis in the following review:
978 //
979 // https://reviews.llvm.org/D19013
980 if (ContextT::isConstantOrUndefValuePhi(Phi))
981 continue;
982 if (markDivergent(Phi))
983 Worklist.push_back(&Phi);
984 }
985}
986
987/// Add \p Candidate to \p Cycles if it is not already contained in \p Cycles.
988///
989/// \return true iff \p Candidate was added to \p Cycles.
990template <typename CycleT>
992 CycleT *Candidate) {
993 if (llvm::any_of(Cycles,
994 [Candidate](CycleT *C) { return C->contains(Candidate); }))
995 return false;
996 Cycles.push_back(Candidate);
997 return true;
998}
999
1000/// Return the outermost cycle made divergent by branch outside it.
1001///
1002/// If two paths that diverged outside an irreducible cycle join
1003/// inside that cycle, then that whole cycle is assumed to be
1004/// divergent. This does not apply if the cycle is reducible.
1005template <typename CycleT, typename BlockT>
1006static const CycleT *getExtDivCycle(const CycleT *Cycle,
1007 const BlockT *DivTermBlock,
1008 const BlockT *JoinBlock) {
1009 assert(Cycle);
1010 assert(Cycle->contains(JoinBlock));
1011
1012 if (Cycle->contains(DivTermBlock))
1013 return nullptr;
1014
1015 if (Cycle->isReducible()) {
1016 assert(Cycle->getHeader() == JoinBlock);
1017 return nullptr;
1018 }
1019
1020 const auto *Parent = Cycle->getParentCycle();
1021 while (Parent && !Parent->contains(DivTermBlock)) {
1022 // If the join is inside a child, then the parent must be
1023 // irreducible. The only join in a reducible cyle is its own
1024 // header.
1025 assert(!Parent->isReducible());
1026 Cycle = Parent;
1027 Parent = Cycle->getParentCycle();
1028 }
1029
1030 LLVM_DEBUG(dbgs() << "cycle made divergent by external branch\n");
1031 return Cycle;
1032}
1033
1034/// Return the outermost cycle made divergent by branch inside it.
1035///
1036/// This checks the "diverged entry" criterion defined in the
1037/// docs/ConvergenceAnalysis.html.
1038template <typename ContextT, typename CycleT, typename BlockT,
1039 typename DominatorTreeT>
1040static const CycleT *
1041getIntDivCycle(const CycleT *Cycle, const BlockT *DivTermBlock,
1042 const BlockT *JoinBlock, const DominatorTreeT &DT,
1043 ContextT &Context) {
1044 LLVM_DEBUG(dbgs() << "examine join " << Context.print(JoinBlock)
1045 << "for internal branch " << Context.print(DivTermBlock)
1046 << "\n");
1047 if (DT.properlyDominates(DivTermBlock, JoinBlock))
1048 return nullptr;
1049
1050 // Find the smallest common cycle, if one exists.
1051 assert(Cycle && Cycle->contains(JoinBlock));
1052 while (Cycle && !Cycle->contains(DivTermBlock)) {
1054 }
1055 if (!Cycle || Cycle->isReducible())
1056 return nullptr;
1057
1058 if (DT.properlyDominates(Cycle->getHeader(), JoinBlock))
1059 return nullptr;
1060
1061 LLVM_DEBUG(dbgs() << " header " << Context.print(Cycle->getHeader())
1062 << " does not dominate join\n");
1063
1064 const auto *Parent = Cycle->getParentCycle();
1065 while (Parent && !DT.properlyDominates(Parent->getHeader(), JoinBlock)) {
1066 LLVM_DEBUG(dbgs() << " header " << Context.print(Parent->getHeader())
1067 << " does not dominate join\n");
1068 Cycle = Parent;
1069 Parent = Parent->getParentCycle();
1070 }
1071
1072 LLVM_DEBUG(dbgs() << " cycle made divergent by internal branch\n");
1073 return Cycle;
1074}
1075
1076template <typename ContextT, typename CycleT, typename BlockT,
1077 typename DominatorTreeT>
1078static const CycleT *
1079getOutermostDivergentCycle(const CycleT *Cycle, const BlockT *DivTermBlock,
1080 const BlockT *JoinBlock, const DominatorTreeT &DT,
1081 ContextT &Context) {
1082 if (!Cycle)
1083 return nullptr;
1084
1085 // First try to expand Cycle to the largest that contains JoinBlock
1086 // but not DivTermBlock.
1087 const auto *Ext = getExtDivCycle(Cycle, DivTermBlock, JoinBlock);
1088
1089 // Continue expanding to the largest cycle that contains both.
1090 const auto *Int = getIntDivCycle(Cycle, DivTermBlock, JoinBlock, DT, Context);
1091
1092 if (Int)
1093 return Int;
1094 return Ext;
1095}
1096
1097template <typename ContextT>
1099 const BlockT &ObservingBlock, const InstructionT &Def) const {
1100 const BlockT *DefBlock = Def.getParent();
1101 for (const CycleT *Cycle = CI.getCycle(DefBlock);
1102 Cycle && !Cycle->contains(&ObservingBlock);
1103 Cycle = Cycle->getParentCycle()) {
1104 if (DivergentExitCycles.contains(Cycle)) {
1105 return true;
1106 }
1107 }
1108 return false;
1109}
1110
1111template <typename ContextT>
1113 const InstructionT &Term) {
1114 const auto *DivTermBlock = Term.getParent();
1115 DivergentTermBlocks.insert(DivTermBlock);
1116 LLVM_DEBUG(dbgs() << "analyzeControlDiv " << Context.print(DivTermBlock)
1117 << "\n");
1118
1119 // Don't propagate divergence from unreachable blocks.
1120 if (!DT.isReachableFromEntry(DivTermBlock))
1121 return;
1122
1123 const auto &DivDesc = SDA.getJoinBlocks(DivTermBlock);
1125
1126 // Iterate over all blocks now reachable by a disjoint path join
1127 for (const auto *JoinBlock : DivDesc.JoinDivBlocks) {
1128 const auto *Cycle = CI.getCycle(JoinBlock);
1129 LLVM_DEBUG(dbgs() << "visiting join block " << Context.print(JoinBlock)
1130 << "\n");
1131 if (const auto *Outermost = getOutermostDivergentCycle(
1132 Cycle, DivTermBlock, JoinBlock, DT, Context)) {
1133 LLVM_DEBUG(dbgs() << "found divergent cycle\n");
1134 DivCycles.push_back(Outermost);
1135 continue;
1136 }
1137 taintAndPushPhiNodes(*JoinBlock);
1138 }
1139
1140 // Sort by order of decreasing depth. This allows later cycles to be skipped
1141 // because they are already contained in earlier ones.
1142 llvm::sort(DivCycles, [](const CycleT *A, const CycleT *B) {
1143 return A->getDepth() > B->getDepth();
1144 });
1145
1146 // Cycles that are assumed divergent due to the diverged entry
1147 // criterion potentially contain temporal divergence depending on
1148 // the DFS chosen. Conservatively, all values produced in such a
1149 // cycle are assumed divergent. "Cycle invariant" values may be
1150 // assumed uniform, but that requires further analysis.
1151 for (auto *C : DivCycles) {
1152 if (!insertIfNotContained(AssumedDivergent, C))
1153 continue;
1154 LLVM_DEBUG(dbgs() << "process divergent cycle\n");
1155 for (const BlockT *BB : C->blocks()) {
1156 taintAndPushAllDefs(*BB);
1157 }
1158 }
1159
1160 const auto *BranchCycle = CI.getCycle(DivTermBlock);
1161 assert(DivDesc.CycleDivBlocks.empty() || BranchCycle);
1162 for (const auto *DivExitBlock : DivDesc.CycleDivBlocks) {
1163 propagateCycleExitDivergence(*DivExitBlock, *BranchCycle);
1164 }
1165}
1166
1167template <typename ContextT>
1169 // Initialize worklist.
1170 auto DivValuesCopy = DivergentValues;
1171 for (const auto DivVal : DivValuesCopy) {
1172 assert(isDivergent(DivVal) && "Worklist invariant violated!");
1173 pushUsers(DivVal);
1174 }
1175
1176 // All values on the Worklist are divergent.
1177 // Their users may not have been updated yet.
1178 while (!Worklist.empty()) {
1179 const InstructionT *I = Worklist.back();
1180 Worklist.pop_back();
1181
1182 LLVM_DEBUG(dbgs() << "worklist pop: " << Context.print(I) << "\n");
1183
1184 if (I->isTerminator()) {
1185 analyzeControlDivergence(*I);
1186 continue;
1187 }
1188
1189 // propagate value divergence to users
1190 assert(isDivergent(*I) && "Worklist invariant violated!");
1191 pushUsers(*I);
1192 }
1193}
1194
1195template <typename ContextT>
1197 const InstructionT &Instr) const {
1198 return UniformOverrides.contains(&Instr);
1199}
1200
1201template <typename ContextT>
1203 FunctionT &Func, const DominatorTreeT &DT, const CycleInfoT &CI,
1204 const TargetTransformInfo *TTI)
1205 : F(&Func) {
1206 DA.reset(new ImplT{Func, DT, CI, TTI});
1207 DA->initialize();
1208 DA->compute();
1209}
1210
1211template <typename ContextT>
1213 bool haveDivergentArgs = false;
1214
1215 if (DivergentValues.empty()) {
1216 assert(DivergentTermBlocks.empty());
1217 assert(DivergentExitCycles.empty());
1218 OS << "ALL VALUES UNIFORM\n";
1219 return;
1220 }
1221
1222 for (const auto &entry : DivergentValues) {
1223 const BlockT *parent = Context.getDefBlock(entry);
1224 if (!parent) {
1225 if (!haveDivergentArgs) {
1226 OS << "DIVERGENT ARGUMENTS:\n";
1227 haveDivergentArgs = true;
1228 }
1229 OS << " DIVERGENT: " << Context.print(entry) << '\n';
1230 }
1231 }
1232
1233 if (!AssumedDivergent.empty()) {
1234 OS << "CYCLES ASSSUMED DIVERGENT:\n";
1235 for (const CycleT *cycle : AssumedDivergent) {
1236 OS << " " << cycle->print(Context) << '\n';
1237 }
1238 }
1239
1240 if (!DivergentExitCycles.empty()) {
1241 OS << "CYCLES WITH DIVERGENT EXIT:\n";
1242 for (const CycleT *cycle : DivergentExitCycles) {
1243 OS << " " << cycle->print(Context) << '\n';
1244 }
1245 }
1246
1247 for (auto &block : F) {
1248 OS << "\nBLOCK " << Context.print(&block) << '\n';
1249
1250 OS << "DEFINITIONS\n";
1252 Context.appendBlockDefs(defs, block);
1253 for (auto value : defs) {
1254 if (isDivergent(value))
1255 OS << " DIVERGENT: ";
1256 else
1257 OS << " ";
1258 OS << Context.print(value) << '\n';
1259 }
1260
1261 OS << "TERMINATORS\n";
1263 Context.appendBlockTerms(terms, block);
1264 bool divergentTerminators = hasDivergentTerminator(block);
1265 for (auto *T : terms) {
1266 if (divergentTerminators)
1267 OS << " DIVERGENT: ";
1268 else
1269 OS << " ";
1270 OS << Context.print(T) << '\n';
1271 }
1272
1273 OS << "END BLOCK\n";
1274 }
1275}
1276
1277template <typename ContextT>
1279 return DA->hasDivergence();
1280}
1281
1282/// Whether \p V is divergent at its definition.
1283template <typename ContextT>
1285 return DA->isDivergent(V);
1286}
1287
1288template <typename ContextT>
1290 return DA->isDivergent(*I);
1291}
1292
1293template <typename ContextT>
1295 return DA->isDivergentUse(U);
1296}
1297
1298template <typename ContextT>
1300 return DA->hasDivergentTerminator(B);
1301}
1302
1303/// \brief T helper function for printing.
1304template <typename ContextT>
1306 DA->print(out);
1307}
1308
1309template <typename ContextT>
1311 SmallVectorImpl<BlockT *> &Stack, const CycleInfoT &CI, const CycleT *Cycle,
1312 SmallPtrSetImpl<BlockT *> &Finalized) {
1313 LLVM_DEBUG(dbgs() << "inside computeStackPO\n");
1314 while (!Stack.empty()) {
1315 auto *NextBB = Stack.back();
1316 if (Finalized.count(NextBB)) {
1317 Stack.pop_back();
1318 continue;
1319 }
1320 LLVM_DEBUG(dbgs() << " visiting " << CI.getSSAContext().print(NextBB)
1321 << "\n");
1322 auto *NestedCycle = CI.getCycle(NextBB);
1323 if (Cycle != NestedCycle && (!Cycle || Cycle->contains(NestedCycle))) {
1324 LLVM_DEBUG(dbgs() << " found a cycle\n");
1325 while (NestedCycle->getParentCycle() != Cycle)
1326 NestedCycle = NestedCycle->getParentCycle();
1327
1328 SmallVector<BlockT *, 3> NestedExits;
1329 NestedCycle->getExitBlocks(NestedExits);
1330 bool PushedNodes = false;
1331 for (auto *NestedExitBB : NestedExits) {
1332 LLVM_DEBUG(dbgs() << " examine exit: "
1333 << CI.getSSAContext().print(NestedExitBB) << "\n");
1334 if (Cycle && !Cycle->contains(NestedExitBB))
1335 continue;
1336 if (Finalized.count(NestedExitBB))
1337 continue;
1338 PushedNodes = true;
1339 Stack.push_back(NestedExitBB);
1340 LLVM_DEBUG(dbgs() << " pushed exit: "
1341 << CI.getSSAContext().print(NestedExitBB) << "\n");
1342 }
1343 if (!PushedNodes) {
1344 // All loop exits finalized -> finish this node
1345 Stack.pop_back();
1346 computeCyclePO(CI, NestedCycle, Finalized);
1347 }
1348 continue;
1349 }
1350
1351 LLVM_DEBUG(dbgs() << " no nested cycle, going into DAG\n");
1352 // DAG-style
1353 bool PushedNodes = false;
1354 for (auto *SuccBB : successors(NextBB)) {
1355 LLVM_DEBUG(dbgs() << " examine succ: "
1356 << CI.getSSAContext().print(SuccBB) << "\n");
1357 if (Cycle && !Cycle->contains(SuccBB))
1358 continue;
1359 if (Finalized.count(SuccBB))
1360 continue;
1361 PushedNodes = true;
1362 Stack.push_back(SuccBB);
1363 LLVM_DEBUG(dbgs() << " pushed succ: " << CI.getSSAContext().print(SuccBB)
1364 << "\n");
1365 }
1366 if (!PushedNodes) {
1367 // Never push nodes twice
1368 LLVM_DEBUG(dbgs() << " finishing node: "
1369 << CI.getSSAContext().print(NextBB) << "\n");
1370 Stack.pop_back();
1371 Finalized.insert(NextBB);
1372 appendBlock(*NextBB);
1373 }
1374 }
1375 LLVM_DEBUG(dbgs() << "exited computeStackPO\n");
1376}
1377
1378template <typename ContextT>
1380 const CycleInfoT &CI, const CycleT *Cycle,
1381 SmallPtrSetImpl<BlockT *> &Finalized) {
1382 LLVM_DEBUG(dbgs() << "inside computeCyclePO\n");
1384 auto *CycleHeader = Cycle->getHeader();
1385
1386 LLVM_DEBUG(dbgs() << " noted header: "
1387 << CI.getSSAContext().print(CycleHeader) << "\n");
1388 assert(!Finalized.count(CycleHeader));
1389 Finalized.insert(CycleHeader);
1390
1391 // Visit the header last
1392 LLVM_DEBUG(dbgs() << " finishing header: "
1393 << CI.getSSAContext().print(CycleHeader) << "\n");
1394 appendBlock(*CycleHeader, Cycle->isReducible());
1395
1396 // Initialize with immediate successors
1397 for (auto *BB : successors(CycleHeader)) {
1398 LLVM_DEBUG(dbgs() << " examine succ: " << CI.getSSAContext().print(BB)
1399 << "\n");
1400 if (!Cycle->contains(BB))
1401 continue;
1402 if (BB == CycleHeader)
1403 continue;
1404 if (!Finalized.count(BB)) {
1405 LLVM_DEBUG(dbgs() << " pushed succ: " << CI.getSSAContext().print(BB)
1406 << "\n");
1407 Stack.push_back(BB);
1408 }
1409 }
1410
1411 // Compute PO inside region
1412 computeStackPO(Stack, CI, Cycle, Finalized);
1413
1414 LLVM_DEBUG(dbgs() << "exited computeCyclePO\n");
1415}
1416
1417/// \brief Generically compute the modified post order.
1418template <typename ContextT>
1420 SmallPtrSet<BlockT *, 32> Finalized;
1422 auto *F = CI.getFunction();
1423 Stack.reserve(24); // FIXME made-up number
1424 Stack.push_back(GraphTraits<FunctionT *>::getEntryNode(F));
1425 computeStackPO(Stack, CI, nullptr, Finalized);
1426}
1427
1428} // namespace llvm
1429
1430#undef DEBUG_TYPE
1431
1432#endif // LLVM_ADT_GENERICUNIFORMITYIMPL_H
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Given that RA is a live value
#define LLVM_DEBUG(X)
Definition: Debug.h:101
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
print Instructions which execute on loop entry
LLVMContext & Context
#define P(N)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
This file defines the SmallPtrSet class.
This file defines the SparseBitVector class.
This file contains some functions that are useful when dealing with strings.
unify loop Fixup each natural loop to have a single exit block
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:202
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151
Compute divergence starting with a divergent branch.
typename SyncDependenceAnalysisT::BlockLabelMap BlockLabelMapT
GenericSyncDependenceAnalysis< ContextT > SyncDependenceAnalysisT
typename ContextT::DominatorTreeT DominatorTreeT
bool computeJoin(const BlockT &SuccBlock, const BlockT &PushedLabel)
std::unique_ptr< DivergenceDescriptorT > DivDesc
void printDefs(raw_ostream &Out)
typename ContextT::FunctionT FunctionT
ModifiedPostOrder< ContextT > ModifiedPO
std::unique_ptr< DivergenceDescriptorT > computeJoinPoints()
bool visitCycleExitEdge(const BlockT &ExitBlock, const BlockT &Label)
typename ContextT::ValueRefT ValueRefT
typename ContextT::BlockT BlockT
typename SyncDependenceAnalysisT::DivergenceDescriptor DivergenceDescriptorT
typename CycleInfoT::CycleT CycleT
DivergencePropagator(const ModifiedPO &CyclePOT, const DominatorTreeT &DT, const CycleInfoT &CI, const BlockT &DivTermBlock)
bool visitEdge(const BlockT &SuccBlock, const BlockT &Label)
Cycle information for a function.
FunctionT * getFunction() const
GenericCycle< ContextT > CycleT
CycleT * getCycle(const BlockT *Block) const
Find the innermost cycle containing a given block.
A possibly irreducible generalization of a Loop.
BlockT * getHeader() const
bool isReducible() const
Whether the cycle is a natural loop.
void getExitBlocks(SmallVectorImpl< BlockT * > &TmpStorage) const
Return all of the successor blocks of this cycle.
bool contains(const BlockT *Block) const
Return whether Block is contained in the cycle.
const GenericCycle * getParentCycle() const
Locate join blocks for disjoint paths starting at a divergent branch.
GenericSyncDependenceAnalysis(const ContextT &Context, const DominatorTreeT &DT, const CycleInfoT &CI)
ModifiedPostOrder< ContextT > ModifiedPO
typename ContextT::DominatorTreeT DominatorTreeT
typename ContextT::FunctionT FunctionT
typename ContextT::InstructionT InstructionT
typename ContextT::ValueRefT ValueRefT
const DivergenceDescriptor & getJoinBlocks(const BlockT *DivTermBlock)
Computes divergent join points and cycle exits caused by branch divergence in Term.
Analysis that identifies uniform values in a data-parallel execution.
bool isAlwaysUniform(const InstructionT &Instr) const
Whether Val will always return a uniform value regardless of its operands.
typename ContextT::ValueRefT ValueRefT
void analyzeControlDivergence(const InstructionT &Term)
Mark Term as divergent and push all Instructions that become divergent as a result on the worklist.
std::set< ConstValueRefT > DivergentValues
bool isDivergent(ConstValueRefT V) const
Whether Val is divergent at its definition.
bool isDivergentUse(const UseT &U) const
bool hasDivergentDefs(const InstructionT &I) const
typename ContextT::InstructionT InstructionT
typename ContextT::ConstValueRefT ConstValueRefT
typename SyncDependenceAnalysisT::BlockLabelMap BlockLabelMapT
bool hasDivergence() const
Whether any value was marked or analyzed to be divergent.
GenericUniformityAnalysisImpl(const FunctionT &F, const DominatorTreeT &DT, const CycleInfoT &CI, const TargetTransformInfo *TTI)
typename ContextT::DominatorTreeT DominatorTreeT
void compute()
Propagate divergence to all instructions in the region.
bool hasDivergentTerminator(const BlockT &B) const
typename ContextT::FunctionT FunctionT
bool isDivergent(const InstructionT &I) const
std::vector< const InstructionT * > Worklist
bool markDefsDivergent(const InstructionT &Instr, bool AllDefsDivergent=true)
bool markDivergent(const InstructionT &I)
Mark DivVal as a value that is always divergent.
SmallPtrSet< const BlockT *, 32 > DivergentTermBlocks
void addUniformOverride(const InstructionT &Instr)
Mark UniVal as a value that is always uniform.
typename SyncDependenceAnalysisT::DivergenceDescriptor DivergenceDescriptorT
GenericSyncDependenceAnalysis< ContextT > SyncDependenceAnalysisT
bool hasDivergentTerminator(const BlockT &B)
bool isDivergentUse(const UseT &U) const
Whether U is divergent.
typename ContextT::FunctionT FunctionT
bool isDivergent(ConstValueRefT V) const
Whether V is divergent at its definition.
void print(raw_ostream &Out) const
T helper function for printing.
typename ContextT::ConstValueRefT ConstValueRefT
typename ContextT::BlockT BlockT
typename ContextT::InstructionT InstructionT
typename ContextT::UseT UseT
bool hasDivergence() const
Whether any divergence was detected.
typename ContextT::DominatorTreeT DominatorTreeT
Construct a specially modified post-order traversal of cycles.
typename ContextT::FunctionT FunctionT
const BlockT * operator[](size_t idx) const
typename CycleInfoT::CycleT CycleT
bool isReducibleCycleHeader(const BlockT *BB) const
ModifiedPostOrder(const ContextT &C)
unsigned count(BlockT *BB) const
void compute(const CycleInfoT &CI)
Generically compute the modified post order.
GenericCycleInfo< ContextT > CycleInfoT
void appendBlock(const BlockT &BB, bool isReducibleCycleHeader=false)
unsigned getIndex(const BlockT *BB) const
typename std::vector< BlockT * >::const_iterator const_iterator
typename ContextT::DominatorTreeT DominatorTreeT
typename ContextT::BlockT BlockT
Simple wrapper around std::function<void(raw_ostream&)>.
Definition: Printable.h:38
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:344
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:383
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:389
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
iterator erase(const_iterator CI)
Definition: SmallVector.h:741
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
void set(unsigned Idx)
void reset(unsigned Idx)
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
static const CycleT * getIntDivCycle(const CycleT *Cycle, const BlockT *DivTermBlock, const BlockT *JoinBlock, const DominatorTreeT &DT, ContextT &Context)
Return the outermost cycle made divergent by branch inside it.
static const CycleT * getExtDivCycle(const CycleT *Cycle, const BlockT *DivTermBlock, const BlockT *JoinBlock)
Return the outermost cycle made divergent by branch outside it.
constexpr auto adl_begin(RangeT &&range) -> decltype(adl_detail::begin_impl(std::forward< RangeT >(range)))
Returns the begin iterator to range using std::begin and function found through Argument-Dependent Lo...
Definition: STLExtras.h:93
auto successors(const MachineBasicBlock *BB)
static bool insertIfNotContained(SmallVector< CycleT * > &Cycles, CycleT *Candidate)
Add Candidate to Cycles if it is not already contained in Cycles.
constexpr auto adl_end(RangeT &&range) -> decltype(adl_detail::end_impl(std::forward< RangeT >(range)))
Returns the end iterator to range using std::end and functions found through Argument-Dependent Looku...
Definition: STLExtras.h:101
static void printBlockSet(ConstBlockSet &Blocks, raw_ostream &Out)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1826
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr)
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1744
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
auto unique(Range &&R)
CycleInfo::CycleT Cycle
Definition: CycleAnalysis.h:28
auto predecessors(const MachineBasicBlock *BB)
auto instrs(const MachineBasicBlock &BB)
unsigned succ_size(const MachineBasicBlock *BB)
static const CycleT * getOutermostDivergentCycle(const CycleT *Cycle, const BlockT *DivTermBlock, const BlockT *JoinBlock, const DominatorTreeT &DT, ContextT &Context)
Information discovered by the sync dependence analysis for each divergent branch.
Value/block pair representing a single phi input.
PhiInput(ConstValueRefT value, BlockT *predBlock)