LLVM 23.0.0git
GenericUniformityImpl.h
Go to the documentation of this file.
1//===- GenericUniformityImpl.h -----------------------*- C++ -*------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This template implementation resides in a separate file so that it
10// does not get injected into every .cpp file that includes the
11// generic header.
12//
13// DO NOT INCLUDE THIS FILE WHEN MERELY USING UNIFORMITYINFO.
14//
15// This file should only be included by files that implement a
16// specialization of the relvant templates. Currently these are:
17// - UniformityAnalysis.cpp
18//
19// Note: The DEBUG_TYPE macro should be defined before using this
20// file so that any use of LLVM_DEBUG is associated with the
21// including file rather than this file.
22//
23//===----------------------------------------------------------------------===//
24///
25/// \file
26/// \brief Implementation of uniformity analysis.
27///
28/// The algorithm is a fixed point iteration that starts with the assumption
29/// that all control flow and all values are uniform. Starting from sources of
30/// divergence (whose discovery must be implemented by a CFG- or even
31/// target-specific derived class), divergence of values is propagated from
32/// definition to uses in a straight-forward way. The main complexity lies in
33/// the propagation of the impact of divergent control flow on the divergence of
34/// values (sync dependencies).
35///
36/// NOTE: In general, no interface exists for a transform to update
37/// (Machine)UniformityInfo. Additionally, (Machine)CycleAnalysis is a
38/// transitive dependence, but it also does not provide an interface for
39/// updating itself. Given that, transforms should not preserve uniformity in
40/// their getAnalysisUsage() callback.
41///
42//===----------------------------------------------------------------------===//
43
44#ifndef LLVM_ADT_GENERICUNIFORMITYIMPL_H
45#define LLVM_ADT_GENERICUNIFORMITYIMPL_H
46
48
49#include "llvm/ADT/DenseSet.h"
50#include "llvm/ADT/STLExtras.h"
55
56#define DEBUG_TYPE "uniformity"
57
58namespace llvm {
59
60// Forward decl from llvm/CodeGen/MachineInstr.h
61class MachineInstr;
62
63/// Construct a specially modified post-order traversal of cycles.
64///
65/// The ModifiedPO is contructed using a virtually modified CFG as follows:
66///
67/// 1. The successors of pre-entry nodes (predecessors of an cycle
68/// entry that are outside the cycle) are replaced by the
69/// successors of the successors of the header.
70/// 2. Successors of the cycle header are replaced by the exit blocks
71/// of the cycle.
72///
73/// Effectively, we produce a depth-first numbering with the following
74/// properties:
75///
76/// 1. Nodes after a cycle are numbered earlier than the cycle header.
77/// 2. The header is numbered earlier than the nodes in the cycle.
78/// 3. The numbering of the nodes within the cycle forms an interval
79/// starting with the header.
80///
81/// Effectively, the virtual modification arranges the nodes in a
82/// cycle as a DAG with the header as the sole leaf, and successors of
83/// the header as the roots. A reverse traversal of this numbering has
84/// the following invariant on the unmodified original CFG:
85///
86/// Each node is visited after all its predecessors, except if that
87/// predecessor is the cycle header.
88///
89template <typename ContextT> class ModifiedPostOrder {
90public:
91 using BlockT = typename ContextT::BlockT;
92 using FunctionT = typename ContextT::FunctionT;
93 using DominatorTreeT = typename ContextT::DominatorTreeT;
94
96 using CycleT = typename CycleInfoT::CycleT;
97 using const_iterator = typename std::vector<BlockT *>::const_iterator;
98
99 ModifiedPostOrder(const ContextT &C) : Context(C) {}
100
101 bool empty() const { return m_order.empty(); }
102 size_t size() const { return m_order.size(); }
103
104 void clear() { m_order.clear(); }
105 void compute(const CycleInfoT &CI);
106
107 unsigned count(BlockT *BB) const { return POIndex.count(BB); }
108 const BlockT *operator[](size_t idx) const { return m_order[idx]; }
109
110 void appendBlock(const BlockT &BB, bool isReducibleCycleHeader = false) {
111 POIndex[&BB] = m_order.size();
112 m_order.push_back(&BB);
113 LLVM_DEBUG(dbgs() << "ModifiedPO(" << POIndex[&BB]
114 << "): " << Context.print(&BB) << "\n");
116 ReducibleCycleHeaders.insert(&BB);
117 }
118
119 unsigned getIndex(const BlockT *BB) const {
120 assert(POIndex.count(BB));
121 return POIndex.lookup(BB);
122 }
123
124 bool isReducibleCycleHeader(const BlockT *BB) const {
125 return ReducibleCycleHeaders.contains(BB);
126 }
127
128private:
131 SmallPtrSet<const BlockT *, 32> ReducibleCycleHeaders;
132 const ContextT &Context;
133
134 void computeCyclePO(const CycleInfoT &CI, const CycleT *Cycle,
136
137 void computeStackPO(SmallVectorImpl<const BlockT *> &Stack,
138 const CycleInfoT &CI, const CycleT *Cycle,
140};
141
142template <typename> class DivergencePropagator;
143
144/// \class GenericSyncDependenceAnalysis
145///
146/// \brief Locate join blocks for disjoint paths starting at a divergent branch.
147///
148/// An analysis per divergent branch that returns the set of basic
149/// blocks whose phi nodes become divergent due to divergent control.
150/// These are the blocks that are reachable by two disjoint paths from
151/// the branch, or cycle exits reachable along a path that is disjoint
152/// from a path to the cycle latch.
153
154// --- Above line is not a doxygen comment; intentionally left blank ---
155//
156// Originally implemented in SyncDependenceAnalysis.cpp for DivergenceAnalysis.
157//
158// The SyncDependenceAnalysis is used in the UniformityAnalysis to model
159// control-induced divergence in phi nodes.
160//
161// -- Reference --
162// The algorithm is an extension of Section 5 of
163//
164// An abstract interpretation for SPMD divergence
165// on reducible control flow graphs.
166// Julian Rosemann, Simon Moll and Sebastian Hack
167// POPL '21
168//
169//
170// -- Sync dependence --
171// Sync dependence characterizes the control flow aspect of the
172// propagation of branch divergence. For example,
173//
174// %cond = icmp slt i32 %tid, 10
175// br i1 %cond, label %then, label %else
176// then:
177// br label %merge
178// else:
179// br label %merge
180// merge:
181// %a = phi i32 [ 0, %then ], [ 1, %else ]
182//
183// Suppose %tid holds the thread ID. Although %a is not data dependent on %tid
184// because %tid is not on its use-def chains, %a is sync dependent on %tid
185// because the branch "br i1 %cond" depends on %tid and affects which value %a
186// is assigned to.
187//
188//
189// -- Reduction to SSA construction --
190// There are two disjoint paths from A to X, if a certain variant of SSA
191// construction places a phi node in X under the following set-up scheme.
192//
193// This variant of SSA construction ignores incoming undef values.
194// That is paths from the entry without a definition do not result in
195// phi nodes.
196//
197// entry
198// / \
199// A \
200// / \ Y
201// B C /
202// \ / \ /
203// D E
204// \ /
205// F
206//
207// Assume that A contains a divergent branch. We are interested
208// in the set of all blocks where each block is reachable from A
209// via two disjoint paths. This would be the set {D, F} in this
210// case.
211// To generally reduce this query to SSA construction we introduce
212// a virtual variable x and assign to x different values in each
213// successor block of A.
214//
215// entry
216// / \
217// A \
218// / \ Y
219// x = 0 x = 1 /
220// \ / \ /
221// D E
222// \ /
223// F
224//
225// Our flavor of SSA construction for x will construct the following
226//
227// entry
228// / \
229// A \
230// / \ Y
231// x0 = 0 x1 = 1 /
232// \ / \ /
233// x2 = phi E
234// \ /
235// x3 = phi
236//
237// The blocks D and F contain phi nodes and are thus each reachable
238// by two disjoins paths from A.
239//
240// -- Remarks --
241// * In case of cycle exits we need to check for temporal divergence.
242// To this end, we check whether the definition of x differs between the
243// cycle exit and the cycle header (_after_ SSA construction).
244//
245// * In the presence of irreducible control flow, the fixed point is
246// reached only after multiple iterations. This is because labels
247// reaching the header of a cycle must be repropagated through the
248// cycle. This is true even in a reducible cycle, since the labels
249// may have been produced by a nested irreducible cycle.
250//
251// * Note that SyncDependenceAnalysis is not concerned with the points
252// of convergence in an irreducible cycle. It's only purpose is to
253// identify join blocks. The "diverged entry" criterion is
254// separately applied on join blocks to determine if an entire
255// irreducible cycle is assumed to be divergent.
256//
257// * Relevant related work:
258// A simple algorithm for global data flow analysis problems.
259// Matthew S. Hecht and Jeffrey D. Ullman.
260// SIAM Journal on Computing, 4(4):519–532, December 1975.
261//
262template <typename ContextT> class GenericSyncDependenceAnalysis {
263public:
264 using BlockT = typename ContextT::BlockT;
265 using DominatorTreeT = typename ContextT::DominatorTreeT;
266 using FunctionT = typename ContextT::FunctionT;
267 using ValueRefT = typename ContextT::ValueRefT;
268 using InstructionT = typename ContextT::InstructionT;
269
271 using CycleT = typename CycleInfoT::CycleT;
272
275
276 // * if BlockLabels[B] == C then C is the dominating definition at
277 // block B
278 // * if BlockLabels[B] == nullptr then we haven't seen B yet
279 // * if BlockLabels[B] == B then:
280 // - B is a join point of disjoint paths from X, or,
281 // - B is an immediate successor of X (initial value), or,
282 // - B is X
284
285 /// Information discovered by the sync dependence analysis for each
286 /// divergent branch.
288 // Join points of diverged paths.
290 // Divergent cycle exits
292 // Labels assigned to blocks on diverged paths.
294 };
295
297
298 GenericSyncDependenceAnalysis(const ContextT &Context,
299 const DominatorTreeT &DT, const CycleInfoT &CI);
300
301 /// \brief Computes divergent join points and cycle exits caused by branch
302 /// divergence in \p Term.
303 ///
304 /// This returns a pair of sets:
305 /// * The set of blocks which are reachable by disjoint paths from
306 /// \p Term.
307 /// * The set also contains cycle exits if there two disjoint paths:
308 /// one from \p Term to the cycle exit and another from \p Term to
309 /// the cycle header.
310 const DivergenceDescriptor &getJoinBlocks(const BlockT *DivTermBlock);
311
312private:
313 static inline DivergenceDescriptor EmptyDivergenceDesc;
314
315 ModifiedPO CyclePO;
316
317 const DominatorTreeT &DT;
318 const CycleInfoT &CI;
319
321 CachedControlDivDescs;
322};
323
324/// \brief Analysis that identifies uniform values in a data-parallel
325/// execution.
326///
327/// This analysis propagates divergence in a data-parallel context
328/// from sources of divergence to all users. It can be instantiated
329/// for an IR that provides a suitable SSAContext.
330template <typename ContextT> class GenericUniformityAnalysisImpl {
331public:
332 using BlockT = typename ContextT::BlockT;
333 using FunctionT = typename ContextT::FunctionT;
334 using ValueRefT = typename ContextT::ValueRefT;
335 using ConstValueRefT = typename ContextT::ConstValueRefT;
336 using UseT = typename ContextT::UseT;
337 using InstructionT = typename ContextT::InstructionT;
338 using DominatorTreeT = typename ContextT::DominatorTreeT;
339
341 using CycleT = typename CycleInfoT::CycleT;
342
345 typename SyncDependenceAnalysisT::DivergenceDescriptor;
346 using BlockLabelMapT = typename SyncDependenceAnalysisT::BlockLabelMap;
347
349 std::tuple<ConstValueRefT, InstructionT *, const CycleT *>;
350
353 : Context(CI.getSSAContext()), F(*Context.getFunction()), CI(CI),
354 TTI(TTI), DT(DT), SDA(Context, DT, CI) {}
355
357
358 const FunctionT &getFunction() const { return F; }
359
360 /// \brief Mark \p UniVal as a value that is always uniform.
361 void addUniformOverride(const InstructionT &Instr);
362
363 /// \brief Examine \p I for divergent outputs and add to the worklist.
364 void markDivergent(const InstructionT &I);
365
366 /// \brief Mark \p DivVal as a divergent value by removing it from
367 /// UniformValues. \returns Whether the tracked divergence state of
368 /// \p DivVal changed.
369 bool markDivergent(ConstValueRefT DivVal);
370
371 /// \brief Mark outputs of \p Instr as divergent.
372 /// \returns Whether the tracked divergence state of any output has changed.
373 bool markDefsDivergent(const InstructionT &Instr);
374
375 /// \brief Propagate divergence to all instructions in the region.
376 /// Divergence is seeded by calls to \p markDivergent.
377 void compute();
378
379 /// \brief Whether \p Val will always return a uniform value regardless of its
380 /// operands
381 bool isAlwaysUniform(const InstructionT &Instr) const;
382
383 bool hasDivergentDefs(const InstructionT &I) const;
384
385 bool isDivergent(const InstructionT &I) const {
386 if (I.isTerminator()) {
387 return DivergentTermBlocks.contains(I.getParent());
388 }
389 return hasDivergentDefs(I);
390 };
391
392 /// \brief Whether \p Val is divergent at its definition.
393 /// When the target has no branch divergence, compute() is never called
394 /// and everything is uniform. Otherwise, values not in UniformValues
395 /// (e.g. newly created) are conservatively treated as divergent.
398 return false;
399 // Only values that were present during analysis are tracked in
400 // UniformValues (Instructions/Arguments for IR, Registers for MIR).
401 // Other values (e.g. constants, globals) are always uniform but are
402 // not added to UniformValues; this check avoids false divergence.
403 if (ContextT::isAlwaysUniform(V))
404 return false;
405 return !UniformValues.contains(V);
406 }
407
408 bool isDivergentUse(const UseT &U) const;
409
410 bool hasDivergentTerminator(const BlockT &B) const {
411 return DivergentTermBlocks.contains(&B);
412 }
413
414 void print(raw_ostream &out) const;
415
416 /// Print divergent arguments and return true if any were found.
417 /// IR specialization iterates F.args(); default is a no-op.
418 bool printDivergentArgs(raw_ostream &out) const;
419
421
423 const CycleT *);
424
425 /// Check if an instruction with Custom uniformity can be proven uniform
426 /// based on its operands. This queries the target-specific callback.
427 bool isCustomUniform(const InstructionT &I) const;
428
429 /// \brief Add an instruction that requires custom uniformity analysis.
431
432protected:
433 const ContextT &Context;
434 const FunctionT &F;
436 const TargetTransformInfo *TTI = nullptr;
437
438 // Whether the target has branch divergence. Set at the start of compute(),
439 // which is only called when the target has branch divergence. When false,
440 // isDivergent() returns false for all values.
442
444
445 // Values known to be uniform. Populated in initialize() with all values,
446 // then values are removed as divergence is propagated. After analysis,
447 // values not in this set are conservatively treated as divergent.
449
450 // Internal worklist for divergence propagation.
451 std::vector<const InstructionT *> Worklist;
452
453 // Set of instructions that require custom uniformity analysis based on
454 // operand uniformity.
456
457 /// \brief Mark \p Term as divergent and push all Instructions that become
458 /// divergent as a result on the worklist.
459 void analyzeControlDivergence(const InstructionT &Term);
460
461private:
462 const DominatorTreeT &DT;
463
464 // Recognized cycles with divergent exits.
465 SmallPtrSet<const CycleT *, 16> DivergentExitCycles;
466
467 // Cycles assumed to be divergent.
468 //
469 // We don't use a set here because every insertion needs an explicit
470 // traversal of all existing members.
471 SmallVector<const CycleT *> AssumedDivergent;
472
473 // The SDA links divergent branches to divergent control-flow joins.
475
476 // Set of known-uniform values.
478
479 /// \brief Mark all nodes in \p JoinBlock as divergent and push them on
480 /// the worklist.
481 void taintAndPushAllDefs(const BlockT &JoinBlock);
482
483 /// \brief Mark all phi nodes in \p JoinBlock as divergent and push them on
484 /// the worklist.
485 void taintAndPushPhiNodes(const BlockT &JoinBlock);
486
487 /// \brief Identify all Instructions that become divergent because \p DivExit
488 /// is a divergent cycle exit of \p DivCycle. Mark those instructions as
489 /// divergent and push them on the worklist.
490 void propagateCycleExitDivergence(const BlockT &DivExit,
491 const CycleT &DivCycle);
492
493 /// Mark as divergent all external uses of values defined in \p DefCycle.
494 void analyzeCycleExitDivergence(const CycleT &DefCycle);
495
496 /// \brief Mark as divergent all uses of \p I that are outside \p DefCycle.
497 void propagateTemporalDivergence(const InstructionT &I,
498 const CycleT &DefCycle);
499
500 /// \brief Push all users of \p Val (in the region) to the worklist.
501 void pushUsers(const InstructionT &I);
502 void pushUsers(ConstValueRefT V);
503
504 bool usesValueFromCycle(const InstructionT &I, const CycleT &DefCycle) const;
505
506 /// \brief Whether \p Def is divergent when read in \p ObservingBlock.
507 bool isTemporalDivergent(const BlockT &ObservingBlock,
508 const InstructionT &Def) const;
509};
510
511template <typename ImplT>
515
516/// Compute divergence starting with a divergent branch.
517template <typename ContextT> class DivergencePropagator {
518public:
519 using BlockT = typename ContextT::BlockT;
520 using DominatorTreeT = typename ContextT::DominatorTreeT;
521 using FunctionT = typename ContextT::FunctionT;
522 using ValueRefT = typename ContextT::ValueRefT;
523
525 using CycleT = typename CycleInfoT::CycleT;
526
530 typename SyncDependenceAnalysisT::DivergenceDescriptor;
531 using BlockLabelMapT = typename SyncDependenceAnalysisT::BlockLabelMap;
532
537 const ContextT &Context;
538
539 // Track blocks that receive a new label. Every time we relabel a
540 // cycle header, we another pass over the modified post-order in
541 // order to propagate the header label. The bit vector also allows
542 // us to skip labels that have not changed.
544
545 // divergent join and cycle exit descriptor.
546 std::unique_ptr<DivergenceDescriptorT> DivDesc;
548
554
556 Out << "Propagator::BlockLabels {\n";
557 for (int BlockIdx = (int)CyclePOT.size() - 1; BlockIdx >= 0; --BlockIdx) {
558 const auto *Block = CyclePOT[BlockIdx];
559 const auto *Label = BlockLabels[Block];
560 Out << Context.print(Block) << "(" << BlockIdx << ") : ";
561 if (!Label) {
562 Out << "<null>\n";
563 } else {
564 Out << Context.print(Label) << "\n";
565 }
566 }
567 Out << "}\n";
568 }
569
570 // Push a definition (\p PushedLabel) to \p SuccBlock and return whether this
571 // causes a divergent join.
572 bool computeJoin(const BlockT &SuccBlock, const BlockT &PushedLabel) {
573 const auto *OldLabel = BlockLabels[&SuccBlock];
574
575 LLVM_DEBUG(dbgs() << "labeling " << Context.print(&SuccBlock) << ":\n"
576 << "\tpushed label: " << Context.print(&PushedLabel)
577 << "\n"
578 << "\told label: " << Context.print(OldLabel) << "\n");
579
580 // Early exit if there is no change in the label.
581 if (OldLabel == &PushedLabel)
582 return false;
583
584 if (OldLabel != &SuccBlock) {
585 auto SuccIdx = CyclePOT.getIndex(&SuccBlock);
586 // Assigning a new label, mark this in FreshLabels.
587 LLVM_DEBUG(dbgs() << "\tfresh label: " << SuccIdx << "\n");
588 FreshLabels.set(SuccIdx);
589 }
590
591 // This is not a join if the succ was previously unlabeled.
592 if (!OldLabel) {
593 LLVM_DEBUG(dbgs() << "\tnew label: " << Context.print(&PushedLabel)
594 << "\n");
595 BlockLabels[&SuccBlock] = &PushedLabel;
596 return false;
597 }
598
599 // This is a new join. Label the join block as itself, and not as
600 // the pushed label.
601 LLVM_DEBUG(dbgs() << "\tnew label: " << Context.print(&SuccBlock) << "\n");
602 BlockLabels[&SuccBlock] = &SuccBlock;
603
604 return true;
605 }
606
607 // visiting a virtual cycle exit edge from the cycle header --> temporal
608 // divergence on join
609 bool visitCycleExitEdge(const BlockT &ExitBlock, const BlockT &Label) {
610 if (!computeJoin(ExitBlock, Label))
611 return false;
612
613 // Identified a divergent cycle exit
614 DivDesc->CycleDivBlocks.insert(&ExitBlock);
615 LLVM_DEBUG(dbgs() << "\tDivergent cycle exit: " << Context.print(&ExitBlock)
616 << "\n");
617 return true;
618 }
619
620 // process \p SuccBlock with reaching definition \p Label
621 bool visitEdge(const BlockT &SuccBlock, const BlockT &Label) {
622 if (!computeJoin(SuccBlock, Label))
623 return false;
624
625 // Divergent, disjoint paths join.
626 DivDesc->JoinDivBlocks.insert(&SuccBlock);
627 LLVM_DEBUG(dbgs() << "\tDivergent join: " << Context.print(&SuccBlock)
628 << "\n");
629 return true;
630 }
631
632 std::unique_ptr<DivergenceDescriptorT> computeJoinPoints() {
634
635 LLVM_DEBUG(dbgs() << "SDA:computeJoinPoints: "
636 << Context.print(&DivTermBlock) << "\n");
637
638 int DivTermIdx = CyclePOT.getIndex(&DivTermBlock);
639 auto const *DivTermCycle = CI.getCycle(&DivTermBlock);
640
641 // Locate the largest ancestor cycle that is not reducible and does not
642 // contain a reducible ancestor. This is done with a lambda that is defined
643 // and invoked in the same statement.
644 const CycleT *IrreducibleAncestor = [](const CycleT *C) -> const CycleT * {
645 if (!C)
646 return nullptr;
647 if (C->isReducible())
648 return nullptr;
649 while (const CycleT *P = C->getParentCycle()) {
650 if (P->isReducible())
651 return C;
652 C = P;
653 }
654 assert(!C->getParentCycle());
655 assert(!C->isReducible());
656 return C;
657 }(DivTermCycle);
658
659 // Bootstrap with branch targets
660 for (const auto *SuccBlock : successors(&DivTermBlock)) {
661 if (DivTermCycle && !DivTermCycle->contains(SuccBlock)) {
662 // If DivTerm exits the cycle immediately, computeJoin() might
663 // not reach SuccBlock with a different label. We need to
664 // check for this exit now.
665 DivDesc->CycleDivBlocks.insert(SuccBlock);
666 LLVM_DEBUG(dbgs() << "\tImmediate divergent cycle exit: "
667 << Context.print(SuccBlock) << "\n");
668 }
669 visitEdge(*SuccBlock, *SuccBlock);
670 }
671
672 // Technically propagation can continue until it reaches the last node.
673 //
674 // For efficiency, propagation can stop if FreshLabels.count()==1. But
675 // For irreducible cycles, let propagation continue until it reaches
676 // out of irreducible cycles (see code for details.)
677 while (true) {
678 auto BlockIdx = FreshLabels.find_last();
679 if (BlockIdx == -1)
680 break;
681
682 const auto *Block = CyclePOT[BlockIdx];
683 // If no irreducible cycle, stop if freshLable.count() = 1 and Block
684 // is the IPD. If it is in any irreducible cycle, continue propagation.
685 if (FreshLabels.count() == 1 &&
686 (!IrreducibleAncestor || !IrreducibleAncestor->contains(Block)))
687 break;
688
689 LLVM_DEBUG(dbgs() << "Current labels:\n"; printDefs(dbgs()));
690
691 FreshLabels.reset(BlockIdx);
692 if (BlockIdx == DivTermIdx) {
693 LLVM_DEBUG(dbgs() << "Skipping DivTermBlock\n");
694 continue;
695 }
696
697 LLVM_DEBUG(dbgs() << "visiting " << Context.print(Block) << " at index "
698 << BlockIdx << "\n");
699
700 const auto *Label = BlockLabels[Block];
701 assert(Label);
702
703 // If the current block is the header of a reducible cycle, then the label
704 // should be propagated to the cycle exits. If this cycle contains the
705 // branch, then those exits are divergent exits. This is true for any DFS.
706 //
707 // If some DFS has a reducible cycle C with header H, then for
708 // any other DFS, H is the header of a cycle C' that is a
709 // superset of C.
710 //
711 // - For a divergent branch inside the subgraph C, any join node inside
712 // C is either H, or some node encountered by paths within C, without
713 // passing through H.
714 //
715 // - For a divergent branch outside the subgraph C, H is the only node
716 // in C reachable from multiple paths since it is the only entry to C.
717 LLVM_DEBUG(dbgs() << "Check for reducible cycle: " << Context.print(Block)
718 << '\n');
719 if (CyclePOT.isReducibleCycleHeader(Block)) {
720 const auto *BlockCycle = CI.getCycle(Block);
721 LLVM_DEBUG(dbgs() << BlockCycle->print(Context) << '\n');
722 SmallVector<BlockT *, 4> BlockCycleExits;
723 BlockCycle->getExitBlocks(BlockCycleExits);
724 bool BranchIsInside = BlockCycle->contains(&DivTermBlock);
725 for (auto *BlockCycleExit : BlockCycleExits) {
726 if (BranchIsInside)
727 visitCycleExitEdge(*BlockCycleExit, *Label);
728 else
729 visitEdge(*BlockCycleExit, *Label);
730 }
731 } else {
732 for (const auto *SuccBlock : successors(Block))
733 visitEdge(*SuccBlock, *Label);
734 }
735 }
736
737 LLVM_DEBUG(dbgs() << "Final labeling:\n"; printDefs(dbgs()));
738
739 // Check every cycle containing DivTermBlock for exit divergence.
740 // A cycle has exit divergence if the label of an exit block does
741 // not match the label of its header.
742 for (const auto *Cycle = CI.getCycle(&DivTermBlock); Cycle;
743 Cycle = Cycle->getParentCycle()) {
744 if (Cycle->isReducible()) {
745 // The exit divergence of a reducible cycle is recorded while
746 // propagating labels.
747 continue;
748 }
750 Cycle->getExitBlocks(Exits);
751 auto *Header = Cycle->getHeader();
752 auto *HeaderLabel = BlockLabels[Header];
753 for (const auto *Exit : Exits) {
754 if (BlockLabels[Exit] != HeaderLabel) {
755 // Identified a divergent cycle exit
756 DivDesc->CycleDivBlocks.insert(Exit);
757 LLVM_DEBUG(dbgs() << "\tDivergent cycle exit: " << Context.print(Exit)
758 << "\n");
759 }
760 }
761 }
762
763 return std::move(DivDesc);
764 }
765};
766
767template <typename ContextT>
769 const ContextT &Context, const DominatorTreeT &DT, const CycleInfoT &CI)
770 : CyclePO(Context), DT(DT), CI(CI) {
771 CyclePO.compute(CI);
772}
773
774template <typename ContextT>
776 const BlockT *DivTermBlock) -> const DivergenceDescriptor & {
777 // trivial case
778 if (succ_size(DivTermBlock) <= 1) {
779 return EmptyDivergenceDesc;
780 }
781
782 // already available in cache?
783 auto ItCached = CachedControlDivDescs.find(DivTermBlock);
784 if (ItCached != CachedControlDivDescs.end())
785 return *ItCached->second;
786
787 // compute all join points
788 DivergencePropagatorT Propagator(CyclePO, DT, CI, *DivTermBlock);
789 auto DivDesc = Propagator.computeJoinPoints();
790
791 auto printBlockSet = [&](ConstBlockSet &Blocks) {
792 return Printable([&](raw_ostream &Out) {
793 Out << "[";
794 ListSeparator LS;
795 for (const auto *BB : Blocks) {
796 Out << LS << CI.getSSAContext().print(BB);
797 }
798 Out << "]\n";
799 });
800 };
801
803 dbgs() << "\nResult (" << CI.getSSAContext().print(DivTermBlock)
804 << "):\n JoinDivBlocks: " << printBlockSet(DivDesc->JoinDivBlocks)
805 << " CycleDivBlocks: " << printBlockSet(DivDesc->CycleDivBlocks)
806 << "\n");
807 (void)printBlockSet;
808
809 auto ItInserted =
810 CachedControlDivDescs.try_emplace(DivTermBlock, std::move(DivDesc));
811 assert(ItInserted.second);
812 return *ItInserted.first->second;
813}
814
815template <typename ContextT>
817 const InstructionT &I) {
818 if (isAlwaysUniform(I))
819 return;
820 // For custom uniformity candidates, check if the instruction can be
821 // proven uniform based on which operands are uniform/divergent.
822 // The candidate will be re-evaluated as operands become divergent.
823 if (CustomUniformityCandidates.contains(&I)) {
824 if (isCustomUniform(I))
825 return;
826 }
827 bool Marked = false;
828 if (I.isTerminator()) {
829 Marked = DivergentTermBlocks.insert(I.getParent()).second;
830 if (Marked) {
831 LLVM_DEBUG(dbgs() << "marked divergent term block: "
832 << Context.print(I.getParent()) << "\n");
833 }
834 } else {
835 Marked = markDefsDivergent(I);
836 }
837
838 if (Marked)
839 Worklist.push_back(&I);
840}
841
842template <typename ContextT>
844 ConstValueRefT Val) {
845 if (UniformValues.erase(Val)) {
846 LLVM_DEBUG(dbgs() << "marked divergent: " << Context.print(Val) << "\n");
847 return true;
848 }
849 return false;
850}
851
852template <typename ContextT>
854 const InstructionT &Instr) {
855 UniformOverrides.insert(&Instr);
856}
857
858template <typename ContextT>
863
864// Mark as divergent all external uses of values defined in \p DefCycle.
865//
866// A value V defined by a block B inside \p DefCycle may be used outside the
867// cycle only if the use is a PHI in some exit block, or B dominates some exit
868// block. Thus, we check uses as follows:
869//
870// - Check all PHIs in all exit blocks for inputs defined inside \p DefCycle.
871// - For every block B inside \p DefCycle that dominates at least one exit
872// block, check all uses outside \p DefCycle.
873//
874// FIXME: This function does not distinguish between divergent and uniform
875// exits. For each divergent exit, only the values that are live at that exit
876// need to be propagated as divergent at their use outside the cycle.
877template <typename ContextT>
878void GenericUniformityAnalysisImpl<ContextT>::analyzeCycleExitDivergence(
879 const CycleT &DefCycle) {
881 DefCycle.getExitBlocks(Exits);
882 for (auto *Exit : Exits) {
883 for (auto &Phi : Exit->phis()) {
884 if (usesValueFromCycle(Phi, DefCycle)) {
885 markDivergent(Phi);
886 }
887 }
888 }
889
890 for (auto *BB : DefCycle.blocks()) {
891 if (!llvm::any_of(Exits,
892 [&](BlockT *Exit) { return DT.dominates(BB, Exit); }))
893 continue;
894 for (auto &II : *BB) {
895 propagateTemporalDivergence(II, DefCycle);
896 }
897 }
898}
899
900template <typename ContextT>
901void GenericUniformityAnalysisImpl<ContextT>::propagateCycleExitDivergence(
902 const BlockT &DivExit, const CycleT &InnerDivCycle) {
903 LLVM_DEBUG(dbgs() << "\tpropCycleExitDiv " << Context.print(&DivExit)
904 << "\n");
905 auto *DivCycle = &InnerDivCycle;
906 auto *OuterDivCycle = DivCycle;
907 auto *ExitLevelCycle = CI.getCycle(&DivExit);
908 const unsigned CycleExitDepth =
909 ExitLevelCycle ? ExitLevelCycle->getDepth() : 0;
910
911 // Find outer-most cycle that does not contain \p DivExit
912 while (DivCycle && DivCycle->getDepth() > CycleExitDepth) {
913 LLVM_DEBUG(dbgs() << " Found exiting cycle: "
914 << Context.print(DivCycle->getHeader()) << "\n");
915 OuterDivCycle = DivCycle;
916 DivCycle = DivCycle->getParentCycle();
917 }
918 LLVM_DEBUG(dbgs() << "\tOuter-most exiting cycle: "
919 << Context.print(OuterDivCycle->getHeader()) << "\n");
920
921 if (!DivergentExitCycles.insert(OuterDivCycle).second)
922 return;
923
924 // Exit divergence does not matter if the cycle itself is assumed to
925 // be divergent.
926 for (const auto *C : AssumedDivergent) {
927 if (C->contains(OuterDivCycle))
928 return;
929 }
930
931 analyzeCycleExitDivergence(*OuterDivCycle);
932}
933
934template <typename ContextT>
935void GenericUniformityAnalysisImpl<ContextT>::taintAndPushAllDefs(
936 const BlockT &BB) {
937 LLVM_DEBUG(dbgs() << "taintAndPushAllDefs " << Context.print(&BB) << "\n");
938 for (const auto &I : instrs(BB)) {
939 // Terminators do not produce values; they are divergent only if
940 // the condition is divergent. That is handled when the divergent
941 // condition is placed in the worklist.
942 if (I.isTerminator())
943 break;
944
945 markDivergent(I);
946 }
947}
948
949/// Mark divergent phi nodes in a join block
950template <typename ContextT>
951void GenericUniformityAnalysisImpl<ContextT>::taintAndPushPhiNodes(
952 const BlockT &JoinBlock) {
953 LLVM_DEBUG(dbgs() << "taintAndPushPhiNodes in " << Context.print(&JoinBlock)
954 << "\n");
955 for (const auto &Phi : JoinBlock.phis()) {
956 // FIXME: The non-undef value is not constant per se; it just happens to be
957 // uniform and may not dominate this PHI. So assuming that the same value
958 // reaches along all incoming edges may itself be undefined behaviour. This
959 // particular interpretation of the undef value was added to
960 // DivergenceAnalysis in the following review:
961 //
962 // https://reviews.llvm.org/D19013
963 if (ContextT::isConstantOrUndefValuePhi(Phi))
964 continue;
965 markDivergent(Phi);
966 }
967}
968
969/// Add \p Candidate to \p Cycles if it is not already contained in \p Cycles.
970///
971/// \return true iff \p Candidate was added to \p Cycles.
972template <typename CycleT>
974 CycleT *Candidate) {
975 if (llvm::any_of(Cycles,
976 [Candidate](CycleT *C) { return C->contains(Candidate); }))
977 return false;
978 Cycles.push_back(Candidate);
979 return true;
980}
981
982/// Return the outermost cycle made divergent by branch outside it.
983///
984/// If two paths that diverged outside an irreducible cycle join
985/// inside that cycle, then that whole cycle is assumed to be
986/// divergent. This does not apply if the cycle is reducible.
987template <typename CycleT, typename BlockT>
988static const CycleT *getExtDivCycle(const CycleT *Cycle,
989 const BlockT *DivTermBlock,
990 const BlockT *JoinBlock) {
991 assert(Cycle);
992 assert(Cycle->contains(JoinBlock));
993
994 if (Cycle->contains(DivTermBlock))
995 return nullptr;
996
997 const auto *OriginalCycle = Cycle;
998 const auto *Parent = Cycle->getParentCycle();
999 while (Parent && !Parent->contains(DivTermBlock)) {
1000 Cycle = Parent;
1001 Parent = Cycle->getParentCycle();
1002 }
1003
1004 // If the original cycle is not the outermost cycle, then the outermost cycle
1005 // is irreducible. If the outermost cycle were reducible, then external
1006 // diverged paths would not reach the original inner cycle.
1007 (void)OriginalCycle;
1008 assert(Cycle == OriginalCycle || !Cycle->isReducible());
1009
1010 if (Cycle->isReducible()) {
1011 assert(Cycle->getHeader() == JoinBlock);
1012 return nullptr;
1013 }
1014
1015 LLVM_DEBUG(dbgs() << "cycle made divergent by external branch\n");
1016 return Cycle;
1017}
1018
1019/// Return the outermost cycle made divergent by branch inside it.
1020///
1021/// This checks the "diverged entry" criterion defined in the
1022/// docs/ConvergenceAnalysis.html.
1023template <typename ContextT, typename CycleT, typename BlockT,
1024 typename DominatorTreeT>
1025static const CycleT *
1026getIntDivCycle(const CycleT *Cycle, const BlockT *DivTermBlock,
1027 const BlockT *JoinBlock, const DominatorTreeT &DT,
1028 ContextT &Context) {
1029 LLVM_DEBUG(dbgs() << "examine join " << Context.print(JoinBlock)
1030 << " for internal branch " << Context.print(DivTermBlock)
1031 << "\n");
1032 if (DT.properlyDominates(DivTermBlock, JoinBlock))
1033 return nullptr;
1034
1035 // Find the smallest common cycle, if one exists.
1036 assert(Cycle && Cycle->contains(JoinBlock));
1037 while (Cycle && !Cycle->contains(DivTermBlock)) {
1038 Cycle = Cycle->getParentCycle();
1039 }
1040 if (!Cycle || Cycle->isReducible())
1041 return nullptr;
1042
1043 if (DT.properlyDominates(Cycle->getHeader(), JoinBlock))
1044 return nullptr;
1045
1046 LLVM_DEBUG(dbgs() << " header " << Context.print(Cycle->getHeader())
1047 << " does not dominate join\n");
1048
1049 const auto *Parent = Cycle->getParentCycle();
1050 while (Parent && !DT.properlyDominates(Parent->getHeader(), JoinBlock)) {
1051 LLVM_DEBUG(dbgs() << " header " << Context.print(Parent->getHeader())
1052 << " does not dominate join\n");
1053 Cycle = Parent;
1054 Parent = Parent->getParentCycle();
1055 }
1056
1057 LLVM_DEBUG(dbgs() << " cycle made divergent by internal branch\n");
1058 return Cycle;
1059}
1060
1061template <typename ContextT, typename CycleT, typename BlockT,
1062 typename DominatorTreeT>
1063static const CycleT *
1064getOutermostDivergentCycle(const CycleT *Cycle, const BlockT *DivTermBlock,
1065 const BlockT *JoinBlock, const DominatorTreeT &DT,
1066 ContextT &Context) {
1067 if (!Cycle)
1068 return nullptr;
1069
1070 // First try to expand Cycle to the largest that contains JoinBlock
1071 // but not DivTermBlock.
1072 const auto *Ext = getExtDivCycle(Cycle, DivTermBlock, JoinBlock);
1073
1074 // Continue expanding to the largest cycle that contains both.
1075 const auto *Int = getIntDivCycle(Cycle, DivTermBlock, JoinBlock, DT, Context);
1076
1077 if (Int)
1078 return Int;
1079 return Ext;
1080}
1081
1082template <typename ContextT>
1083bool GenericUniformityAnalysisImpl<ContextT>::isTemporalDivergent(
1084 const BlockT &ObservingBlock, const InstructionT &Def) const {
1085 const BlockT *DefBlock = Def.getParent();
1086 for (const CycleT *Cycle = CI.getCycle(DefBlock);
1087 Cycle && !Cycle->contains(&ObservingBlock);
1088 Cycle = Cycle->getParentCycle()) {
1089 if (DivergentExitCycles.contains(Cycle)) {
1090 return true;
1091 }
1092 }
1093 return false;
1094}
1095
1096template <typename ContextT>
1098 const InstructionT &Term) {
1099 const auto *DivTermBlock = Term.getParent();
1100 DivergentTermBlocks.insert(DivTermBlock);
1101 LLVM_DEBUG(dbgs() << "analyzeControlDiv " << Context.print(DivTermBlock)
1102 << "\n");
1103
1104 // Don't propagate divergence from unreachable blocks.
1105 if (!DT.isReachableFromEntry(DivTermBlock))
1106 return;
1107
1108 const auto &DivDesc = SDA.getJoinBlocks(DivTermBlock);
1110
1111 // Iterate over all blocks now reachable by a disjoint path join
1112 for (const auto *JoinBlock : DivDesc.JoinDivBlocks) {
1113 const auto *Cycle = CI.getCycle(JoinBlock);
1114 LLVM_DEBUG(dbgs() << "visiting join block " << Context.print(JoinBlock)
1115 << "\n");
1116 if (const auto *Outermost = getOutermostDivergentCycle(
1117 Cycle, DivTermBlock, JoinBlock, DT, Context)) {
1118 LLVM_DEBUG(dbgs() << "found divergent cycle\n");
1119 DivCycles.push_back(Outermost);
1120 continue;
1121 }
1122 taintAndPushPhiNodes(*JoinBlock);
1123 }
1124
1125 // Sort by order of decreasing depth. This allows later cycles to be skipped
1126 // because they are already contained in earlier ones.
1127 llvm::sort(DivCycles, [](const CycleT *A, const CycleT *B) {
1128 return A->getDepth() > B->getDepth();
1129 });
1130
1131 // Cycles that are assumed divergent due to the diverged entry
1132 // criterion potentially contain temporal divergence depending on
1133 // the DFS chosen. Conservatively, all values produced in such a
1134 // cycle are assumed divergent. "Cycle invariant" values may be
1135 // assumed uniform, but that requires further analysis.
1136 for (auto *C : DivCycles) {
1137 if (!insertIfNotContained(AssumedDivergent, C))
1138 continue;
1139 LLVM_DEBUG(dbgs() << "process divergent cycle\n");
1140 for (const BlockT *BB : C->blocks()) {
1141 taintAndPushAllDefs(*BB);
1142 }
1143 }
1144
1145 const auto *BranchCycle = CI.getCycle(DivTermBlock);
1146 assert(DivDesc.CycleDivBlocks.empty() || BranchCycle);
1147 for (const auto *DivExitBlock : DivDesc.CycleDivBlocks) {
1148 propagateCycleExitDivergence(*DivExitBlock, *BranchCycle);
1149 }
1150}
1151
1152template <typename ContextT>
1154 HasBranchDivergence = true;
1155
1156 // All values on the Worklist are divergent.
1157 // Their users may not have been updated yet.
1158 while (!Worklist.empty()) {
1159 const InstructionT *I = Worklist.back();
1160 Worklist.pop_back();
1161
1162 LLVM_DEBUG(dbgs() << "worklist pop: " << Context.print(I) << "\n");
1163
1164 if (I->isTerminator()) {
1166 continue;
1167 }
1168
1169 // propagate value divergence to users
1170 assert(isDivergent(*I) && "Worklist invariant violated!");
1171 pushUsers(*I);
1172 }
1173}
1174
1175template <typename ContextT>
1181
1182template <typename ContextT>
1184 const InstructionT &Instr) const {
1185 return UniformOverrides.contains(&Instr);
1186}
1187
1188template <typename ContextT>
1193
1194template <typename ContextT>
1196 const DominatorTreeT &DT, const CycleInfoT &CI,
1197 const TargetTransformInfo *TTI) {
1198 DA.reset(new ImplT{DT, CI, TTI});
1199}
1200
1201template <typename ContextT>
1203 // When we print Value, LLVM IR instruction, we want to print extra new line.
1204 // In LLVM IR print function for Value does not print new line at the end.
1205 // In MIR print for MachineInstr prints new line at the end.
1206 constexpr bool IsMIR = std::is_same<InstructionT, MachineInstr>::value;
1207 std::string NewLine = IsMIR ? "" : "\n";
1208
1209 bool FoundDivergence = false;
1210
1211 FoundDivergence |= printDivergentArgs(OS);
1212
1213 if (!AssumedDivergent.empty()) {
1214 FoundDivergence = true;
1215 OS << "CYCLES ASSUMED DIVERGENT:\n";
1216 for (const CycleT *cycle : AssumedDivergent) {
1217 OS << " " << cycle->print(Context) << '\n';
1218 }
1219 }
1220
1221 if (!DivergentExitCycles.empty()) {
1222 FoundDivergence = true;
1223 OS << "CYCLES WITH DIVERGENT EXIT:\n";
1224 for (const CycleT *cycle : DivergentExitCycles) {
1225 OS << " " << cycle->print(Context) << '\n';
1226 }
1227 }
1228
1229 if (!TemporalDivergenceList.empty()) {
1230 FoundDivergence = true;
1231 OS << "\nTEMPORAL DIVERGENCE LIST:\n";
1232
1233 for (auto [Val, UseInst, Cycle] : TemporalDivergenceList) {
1234 OS << "Value :" << Context.print(Val) << NewLine
1235 << "Used by :" << Context.print(UseInst) << NewLine
1236 << "Outside cycle :" << Cycle->print(Context) << "\n\n";
1237 }
1238 }
1239
1240 for (auto &block : F) {
1241 OS << "\nBLOCK " << Context.print(&block) << '\n';
1242
1243 OS << "DEFINITIONS\n";
1245 Context.appendBlockDefs(defs, block);
1246 for (auto value : defs) {
1247 if (isDivergent(value)) {
1248 FoundDivergence = true;
1249 OS << " DIVERGENT: ";
1250 } else {
1251 OS << " ";
1252 }
1253 OS << Context.print(value) << NewLine;
1254 }
1255
1256 OS << "TERMINATORS\n";
1258 Context.appendBlockTerms(terms, block);
1259 bool divergentTerminators = hasDivergentTerminator(block);
1260 if (divergentTerminators)
1261 FoundDivergence = true;
1262 for (auto *T : terms) {
1263 if (divergentTerminators)
1264 OS << " DIVERGENT: ";
1265 else
1266 OS << " ";
1267 OS << Context.print(T) << NewLine;
1268 }
1269
1270 OS << "END BLOCK\n";
1271 }
1272
1273 if (!FoundDivergence)
1274 OS << "ALL VALUES UNIFORM\n";
1275}
1276
1277template <typename ContextT>
1281 return make_range(DA->TemporalDivergenceList.begin(),
1282 DA->TemporalDivergenceList.end());
1283}
1284
1285template <typename ContextT>
1286const typename ContextT::FunctionT &
1288 return DA->getFunction();
1289}
1290
1291/// Whether \p V is divergent at its definition.
1292template <typename ContextT>
1294 return DA->isDivergent(V);
1295}
1296
1297template <typename ContextT>
1299 return DA->isDivergent(*I);
1300}
1301
1302template <typename ContextT>
1304 return DA->isDivergentUse(U);
1305}
1306
1307template <typename ContextT>
1309 return DA->hasDivergentTerminator(B);
1310}
1311
1312/// \brief T helper function for printing.
1313template <typename ContextT>
1315 DA->print(out);
1316}
1317
1318template <typename ContextT>
1319void llvm::ModifiedPostOrder<ContextT>::computeStackPO(
1320 SmallVectorImpl<const BlockT *> &Stack, const CycleInfoT &CI,
1321 const CycleT *Cycle, SmallPtrSetImpl<const BlockT *> &Finalized) {
1322 LLVM_DEBUG(dbgs() << "inside computeStackPO\n");
1323 while (!Stack.empty()) {
1324 auto *NextBB = Stack.back();
1325 if (Finalized.count(NextBB)) {
1326 Stack.pop_back();
1327 continue;
1328 }
1329 LLVM_DEBUG(dbgs() << " visiting " << CI.getSSAContext().print(NextBB)
1330 << "\n");
1331 auto *NestedCycle = CI.getCycle(NextBB);
1332 if (Cycle != NestedCycle && (!Cycle || Cycle->contains(NestedCycle))) {
1333 LLVM_DEBUG(dbgs() << " found a cycle\n");
1334 while (NestedCycle->getParentCycle() != Cycle)
1335 NestedCycle = NestedCycle->getParentCycle();
1336
1337 SmallVector<BlockT *, 3> NestedExits;
1338 NestedCycle->getExitBlocks(NestedExits);
1339 bool PushedNodes = false;
1340 for (auto *NestedExitBB : NestedExits) {
1341 LLVM_DEBUG(dbgs() << " examine exit: "
1342 << CI.getSSAContext().print(NestedExitBB) << "\n");
1343 if (Cycle && !Cycle->contains(NestedExitBB))
1344 continue;
1345 if (Finalized.count(NestedExitBB))
1346 continue;
1347 PushedNodes = true;
1348 Stack.push_back(NestedExitBB);
1349 LLVM_DEBUG(dbgs() << " pushed exit: "
1350 << CI.getSSAContext().print(NestedExitBB) << "\n");
1351 }
1352 if (!PushedNodes) {
1353 // All loop exits finalized -> finish this node
1354 Stack.pop_back();
1355 computeCyclePO(CI, NestedCycle, Finalized);
1356 }
1357 continue;
1358 }
1359
1360 LLVM_DEBUG(dbgs() << " no nested cycle, going into DAG\n");
1361 // DAG-style
1362 bool PushedNodes = false;
1363 for (auto *SuccBB : successors(NextBB)) {
1364 LLVM_DEBUG(dbgs() << " examine succ: "
1365 << CI.getSSAContext().print(SuccBB) << "\n");
1366 if (Cycle && !Cycle->contains(SuccBB))
1367 continue;
1368 if (Finalized.count(SuccBB))
1369 continue;
1370 PushedNodes = true;
1371 Stack.push_back(SuccBB);
1372 LLVM_DEBUG(dbgs() << " pushed succ: " << CI.getSSAContext().print(SuccBB)
1373 << "\n");
1374 }
1375 if (!PushedNodes) {
1376 // Never push nodes twice
1377 LLVM_DEBUG(dbgs() << " finishing node: "
1378 << CI.getSSAContext().print(NextBB) << "\n");
1379 Stack.pop_back();
1380 Finalized.insert(NextBB);
1381 appendBlock(*NextBB);
1382 }
1383 }
1384 LLVM_DEBUG(dbgs() << "exited computeStackPO\n");
1385}
1386
1387template <typename ContextT>
1388void ModifiedPostOrder<ContextT>::computeCyclePO(
1389 const CycleInfoT &CI, const CycleT *Cycle,
1391 LLVM_DEBUG(dbgs() << "inside computeCyclePO\n");
1393 auto *CycleHeader = Cycle->getHeader();
1394
1395 LLVM_DEBUG(dbgs() << " noted header: "
1396 << CI.getSSAContext().print(CycleHeader) << "\n");
1397 assert(!Finalized.count(CycleHeader));
1398 Finalized.insert(CycleHeader);
1399
1400 // Visit the header last
1401 LLVM_DEBUG(dbgs() << " finishing header: "
1402 << CI.getSSAContext().print(CycleHeader) << "\n");
1403 appendBlock(*CycleHeader, Cycle->isReducible());
1404
1405 // Initialize with immediate successors
1406 for (auto *BB : successors(CycleHeader)) {
1407 LLVM_DEBUG(dbgs() << " examine succ: " << CI.getSSAContext().print(BB)
1408 << "\n");
1409 if (!Cycle->contains(BB))
1410 continue;
1411 if (BB == CycleHeader)
1412 continue;
1413 if (!Finalized.count(BB)) {
1414 LLVM_DEBUG(dbgs() << " pushed succ: " << CI.getSSAContext().print(BB)
1415 << "\n");
1416 Stack.push_back(BB);
1417 }
1418 }
1419
1420 // Compute PO inside region
1421 computeStackPO(Stack, CI, Cycle, Finalized);
1422
1423 LLVM_DEBUG(dbgs() << "exited computeCyclePO\n");
1424}
1425
1426/// \brief Generically compute the modified post order.
1427template <typename ContextT>
1431 auto *F = CI.getFunction();
1432 Stack.reserve(24); // FIXME made-up number
1433 Stack.push_back(&F->front());
1434 computeStackPO(Stack, CI, nullptr, Finalized);
1435}
1436
1437} // namespace llvm
1438
1439#undef DEBUG_TYPE
1440
1441#endif // LLVM_ADT_GENERICUNIFORMITYIMPL_H
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file defines the DenseSet and SmallDenseSet classes.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define T
uint64_t IntrinsicInst * II
#define P(N)
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SparseBitVector class.
This file contains some functions that are useful when dealing with strings.
#define LLVM_DEBUG(...)
Definition Debug.h:114
unify loop Fixup each natural loop to have a single exit block
Implements a dense probed hash-table based set.
Definition DenseSet.h:279
Compute divergence starting with a divergent branch.
typename SyncDependenceAnalysisT::BlockLabelMap BlockLabelMapT
GenericSyncDependenceAnalysis< ContextT > SyncDependenceAnalysisT
typename ContextT::DominatorTreeT DominatorTreeT
bool computeJoin(const BlockT &SuccBlock, const BlockT &PushedLabel)
std::unique_ptr< DivergenceDescriptorT > DivDesc
void printDefs(raw_ostream &Out)
typename ContextT::FunctionT FunctionT
GenericCycleInfo< ContextT > CycleInfoT
ModifiedPostOrder< ContextT > ModifiedPO
std::unique_ptr< DivergenceDescriptorT > computeJoinPoints()
bool visitCycleExitEdge(const BlockT &ExitBlock, const BlockT &Label)
typename ContextT::ValueRefT ValueRefT
typename ContextT::BlockT BlockT
typename CycleInfoT::CycleT CycleT
DivergencePropagator(const ModifiedPO &CyclePOT, const DominatorTreeT &DT, const CycleInfoT &CI, const BlockT &DivTermBlock)
bool visitEdge(const BlockT &SuccBlock, const BlockT &Label)
typename SyncDependenceAnalysisT::DivergenceDescriptor DivergenceDescriptorT
Cycle information for a function.
bool contains(const BlockT *Block) const
Return whether Block is contained in the cycle.
const GenericCycle * getParentCycle() const
Locate join blocks for disjoint paths starting at a divergent branch.
GenericSyncDependenceAnalysis(const ContextT &Context, const DominatorTreeT &DT, const CycleInfoT &CI)
ModifiedPostOrder< ContextT > ModifiedPO
DivergencePropagator< ContextT > DivergencePropagatorT
DenseMap< const BlockT *, const BlockT * > BlockLabelMap
SmallPtrSet< const BlockT *, 4 > ConstBlockSet
typename ContextT::DominatorTreeT DominatorTreeT
GenericCycleInfo< ContextT > CycleInfoT
typename ContextT::FunctionT FunctionT
typename ContextT::InstructionT InstructionT
typename ContextT::ValueRefT ValueRefT
const DivergenceDescriptor & getJoinBlocks(const BlockT *DivTermBlock)
Computes divergent join points and cycle exits caused by branch divergence in Term.
SmallVector< TemporalDivergenceTuple, 8 > TemporalDivergenceList
bool isAlwaysUniform(const InstructionT &Instr) const
Whether Val will always return a uniform value regardless of its operands.
bool isCustomUniform(const InstructionT &I) const
Check if an instruction with Custom uniformity can be proven uniform based on its operands.
typename ContextT::ValueRefT ValueRefT
void analyzeControlDivergence(const InstructionT &Term)
Mark Term as divergent and push all Instructions that become divergent as a result on the worklist.
void addCustomUniformityCandidate(const InstructionT *I)
Add an instruction that requires custom uniformity analysis.
bool isDivergent(ConstValueRefT V) const
Whether Val is divergent at its definition.
bool isDivergentUse(const UseT &U) const
bool hasDivergentDefs(const InstructionT &I) const
SmallPtrSet< const InstructionT *, 8 > CustomUniformityCandidates
typename ContextT::InstructionT InstructionT
typename ContextT::ConstValueRefT ConstValueRefT
typename SyncDependenceAnalysisT::BlockLabelMap BlockLabelMapT
bool printDivergentArgs(raw_ostream &out) const
Print divergent arguments and return true if any were found.
typename ContextT::DominatorTreeT DominatorTreeT
void compute()
Propagate divergence to all instructions in the region.
bool hasDivergentTerminator(const BlockT &B) const
GenericUniformityAnalysisImpl(const DominatorTreeT &DT, const CycleInfoT &CI, const TargetTransformInfo *TTI)
bool markDefsDivergent(const InstructionT &Instr)
Mark outputs of Instr as divergent.
typename ContextT::FunctionT FunctionT
bool isDivergent(const InstructionT &I) const
std::tuple< ConstValueRefT, InstructionT *, const CycleT * > TemporalDivergenceTuple
std::vector< const InstructionT * > Worklist
void markDivergent(const InstructionT &I)
Examine I for divergent outputs and add to the worklist.
SmallPtrSet< const BlockT *, 32 > DivergentTermBlocks
GenericCycleInfo< ContextT > CycleInfoT
void addUniformOverride(const InstructionT &Instr)
Mark UniVal as a value that is always uniform.
void recordTemporalDivergence(ConstValueRefT, const InstructionT *, const CycleT *)
typename SyncDependenceAnalysisT::DivergenceDescriptor DivergenceDescriptorT
GenericSyncDependenceAnalysis< ContextT > SyncDependenceAnalysisT
bool hasDivergentTerminator(const BlockT &B)
bool isDivergentUse(const UseT &U) const
Whether U is divergent.
bool isDivergent(ConstValueRefT V) const
Whether V is divergent at its definition.
void print(raw_ostream &Out) const
T helper function for printing.
std::tuple< ConstValueRefT, InstructionT *, const CycleT * > TemporalDivergenceTuple
typename ContextT::ConstValueRefT ConstValueRefT
typename ContextT::BlockT BlockT
typename ContextT::InstructionT InstructionT
const FunctionT & getFunction() const
The GPU kernel this analysis result is for.
GenericCycleInfo< ContextT > CycleInfoT
typename ContextT::DominatorTreeT DominatorTreeT
iterator_range< TemporalDivergenceTuple * > getTemporalDivergenceList() const
A helper class to return the specified delimiter string after the first invocation of operator String...
Construct a specially modified post-order traversal of cycles.
typename ContextT::FunctionT FunctionT
const BlockT * operator[](size_t idx) const
typename CycleInfoT::CycleT CycleT
bool isReducibleCycleHeader(const BlockT *BB) const
ModifiedPostOrder(const ContextT &C)
unsigned count(BlockT *BB) const
void compute(const CycleInfoT &CI)
Generically compute the modified post order.
GenericCycleInfo< ContextT > CycleInfoT
void appendBlock(const BlockT &BB, bool isReducibleCycleHeader=false)
unsigned getIndex(const BlockT *BB) const
typename std::vector< BlockT * >::const_iterator const_iterator
typename ContextT::DominatorTreeT DominatorTreeT
typename ContextT::BlockT BlockT
Simple wrapper around std::function<void(raw_ostream&)>.
Definition Printable.h:38
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
This is an optimization pass for GlobalISel generic memory operations.
static const CycleT * getIntDivCycle(const CycleT *Cycle, const BlockT *DivTermBlock, const BlockT *JoinBlock, const DominatorTreeT &DT, ContextT &Context)
Return the outermost cycle made divergent by branch inside it.
static const CycleT * getExtDivCycle(const CycleT *Cycle, const BlockT *DivTermBlock, const BlockT *JoinBlock)
Return the outermost cycle made divergent by branch outside it.
auto successors(const MachineBasicBlock *BB)
static bool insertIfNotContained(SmallVector< CycleT * > &Cycles, CycleT *Candidate)
Add Candidate to Cycles if it is not already contained in Cycles.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
CycleInfo::CycleT Cycle
Definition CycleInfo.h:26
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1636
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
auto succ_size(const MachineBasicBlock *BB)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
TargetTransformInfo TTI
auto instrs(const MachineBasicBlock &BB)
static const CycleT * getOutermostDivergentCycle(const CycleT *Cycle, const BlockT *DivTermBlock, const BlockT *JoinBlock, const DominatorTreeT &DT, ContextT &Context)
Information discovered by the sync dependence analysis for each divergent branch.