LLVM 22.0.0git
StructurizeCFG.cpp
Go to the documentation of this file.
1//===- StructurizeCFG.cpp -------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "llvm/ADT/DenseMap.h"
12#include "llvm/ADT/MapVector.h"
14#include "llvm/ADT/STLExtras.h"
23#include "llvm/IR/BasicBlock.h"
24#include "llvm/IR/CFG.h"
25#include "llvm/IR/Constants.h"
26#include "llvm/IR/Dominators.h"
27#include "llvm/IR/Function.h"
28#include "llvm/IR/InstrTypes.h"
29#include "llvm/IR/Instruction.h"
31#include "llvm/IR/Metadata.h"
32#include "llvm/IR/PassManager.h"
35#include "llvm/IR/Type.h"
36#include "llvm/IR/Use.h"
37#include "llvm/IR/Value.h"
38#include "llvm/IR/ValueHandle.h"
40#include "llvm/Pass.h"
43#include "llvm/Support/Debug.h"
51#include <cassert>
52#include <utility>
53
54using namespace llvm;
55using namespace llvm::PatternMatch;
56
57#define DEBUG_TYPE "structurizecfg"
58
59// The name for newly created blocks.
60const char FlowBlockName[] = "Flow";
61
62namespace {
63
64static cl::opt<bool> ForceSkipUniformRegions(
65 "structurizecfg-skip-uniform-regions",
67 cl::desc("Force whether the StructurizeCFG pass skips uniform regions"),
68 cl::init(false));
69
70static cl::opt<bool>
71 RelaxedUniformRegions("structurizecfg-relaxed-uniform-regions", cl::Hidden,
72 cl::desc("Allow relaxed uniform region checks"),
73 cl::init(true));
74
75// Definition of the complex types used in this pass.
76
77using BBValuePair = std::pair<BasicBlock *, Value *>;
78
79using RNVector = SmallVector<RegionNode *, 8>;
80using BBVector = SmallVector<BasicBlock *, 8>;
81using BranchVector = SmallVector<BranchInst *, 8>;
82using BBValueVector = SmallVector<BBValuePair, 2>;
83
85
87using BB2BBVecMap = MapVector<BasicBlock *, BBVector>;
88
89using BBPhiMap = DenseMap<BasicBlock *, PhiMap>;
90
91using MaybeCondBranchWeights = std::optional<class CondBranchWeights>;
92
93class CondBranchWeights {
94 uint32_t TrueWeight;
95 uint32_t FalseWeight;
96
97 CondBranchWeights(uint32_t T, uint32_t F) : TrueWeight(T), FalseWeight(F) {}
98
99public:
100 static MaybeCondBranchWeights tryParse(const BranchInst &Br) {
101 assert(Br.isConditional());
102
103 uint64_t T, F;
104 if (!extractBranchWeights(Br, T, F))
105 return std::nullopt;
106
107 return CondBranchWeights(T, F);
108 }
109
110 static void setMetadata(BranchInst &Br,
111 const MaybeCondBranchWeights &Weights) {
112 assert(Br.isConditional());
113 if (!Weights)
114 return;
115 uint32_t Arr[] = {Weights->TrueWeight, Weights->FalseWeight};
116 setBranchWeights(Br, Arr, false);
117 }
118
119 CondBranchWeights invert() const {
120 return CondBranchWeights{FalseWeight, TrueWeight};
121 }
122};
123
124struct PredInfo {
125 Value *Pred;
126 MaybeCondBranchWeights Weights;
127};
128
132using Val2BBMap = DenseMap<Value *, BasicBlock *>;
133
134// A traits type that is intended to be used in graph algorithms. The graph
135// traits starts at an entry node, and traverses the RegionNodes that are in
136// the Nodes set.
137struct SubGraphTraits {
138 using NodeRef = std::pair<RegionNode *, SmallDenseSet<RegionNode *> *>;
139 using BaseSuccIterator = GraphTraits<RegionNode *>::ChildIteratorType;
140
141 // This wraps a set of Nodes into the iterator, so we know which edges to
142 // filter out.
143 class WrappedSuccIterator
144 : public iterator_adaptor_base<
145 WrappedSuccIterator, BaseSuccIterator,
146 std::iterator_traits<BaseSuccIterator>::iterator_category, NodeRef,
147 std::ptrdiff_t, NodeRef *, NodeRef> {
149
150 public:
151 WrappedSuccIterator(BaseSuccIterator It, SmallDenseSet<RegionNode *> *Nodes)
152 : iterator_adaptor_base(It), Nodes(Nodes) {}
153
154 NodeRef operator*() const { return {*I, Nodes}; }
155 };
156
157 static bool filterAll(const NodeRef &N) { return true; }
158 static bool filterSet(const NodeRef &N) { return N.second->count(N.first); }
159
160 using ChildIteratorType =
161 filter_iterator<WrappedSuccIterator, bool (*)(const NodeRef &)>;
162
163 static NodeRef getEntryNode(Region *R) {
164 return {GraphTraits<Region *>::getEntryNode(R), nullptr};
165 }
166
167 static NodeRef getEntryNode(NodeRef N) { return N; }
168
169 static iterator_range<ChildIteratorType> children(const NodeRef &N) {
170 auto *filter = N.second ? &filterSet : &filterAll;
171 return make_filter_range(
174 {GraphTraits<RegionNode *>::child_end(N.first), N.second}),
175 filter);
176 }
177
178 static ChildIteratorType child_begin(const NodeRef &N) {
179 return children(N).begin();
180 }
181
182 static ChildIteratorType child_end(const NodeRef &N) {
183 return children(N).end();
184 }
185};
186
187/// Finds the nearest common dominator of a set of BasicBlocks.
188///
189/// For every BB you add to the set, you can specify whether we "remember" the
190/// block. When you get the common dominator, you can also ask whether it's one
191/// of the blocks we remembered.
192class NearestCommonDominator {
193 DominatorTree *DT;
194 BasicBlock *Result = nullptr;
195 bool ResultIsRemembered = false;
196
197 /// Add BB to the resulting dominator.
198 void addBlock(BasicBlock *BB, bool Remember) {
199 if (!Result) {
200 Result = BB;
201 ResultIsRemembered = Remember;
202 return;
203 }
204
205 BasicBlock *NewResult = DT->findNearestCommonDominator(Result, BB);
206 if (NewResult != Result)
207 ResultIsRemembered = false;
208 if (NewResult == BB)
209 ResultIsRemembered |= Remember;
210 Result = NewResult;
211 }
212
213public:
214 explicit NearestCommonDominator(DominatorTree *DomTree) : DT(DomTree) {}
215
216 void addBlock(BasicBlock *BB) {
217 addBlock(BB, /* Remember = */ false);
218 }
219
220 void addAndRememberBlock(BasicBlock *BB) {
221 addBlock(BB, /* Remember = */ true);
222 }
223
224 /// Get the nearest common dominator of all the BBs added via addBlock() and
225 /// addAndRememberBlock().
226 BasicBlock *result() { return Result; }
227
228 /// Is the BB returned by getResult() one of the blocks we added to the set
229 /// with addAndRememberBlock()?
230 bool resultIsRememberedBlock() { return ResultIsRemembered; }
231};
232
233/// Transforms the control flow graph on one single entry/exit region
234/// at a time.
235///
236/// After the transform all "If"/"Then"/"Else" style control flow looks like
237/// this:
238///
239/// \verbatim
240/// 1
241/// ||
242/// | |
243/// 2 |
244/// | /
245/// |/
246/// 3
247/// || Where:
248/// | | 1 = "If" block, calculates the condition
249/// 4 | 2 = "Then" subregion, runs if the condition is true
250/// | / 3 = "Flow" blocks, newly inserted flow blocks, rejoins the flow
251/// |/ 4 = "Else" optional subregion, runs if the condition is false
252/// 5 5 = "End" block, also rejoins the control flow
253/// \endverbatim
254///
255/// Control flow is expressed as a branch where the true exit goes into the
256/// "Then"/"Else" region, while the false exit skips the region
257/// The condition for the optional "Else" region is expressed as a PHI node.
258/// The incoming values of the PHI node are true for the "If" edge and false
259/// for the "Then" edge.
260///
261/// Additionally to that even complicated loops look like this:
262///
263/// \verbatim
264/// 1
265/// ||
266/// | |
267/// 2 ^ Where:
268/// | / 1 = "Entry" block
269/// |/ 2 = "Loop" optional subregion, with all exits at "Flow" block
270/// 3 3 = "Flow" block, with back edge to entry block
271/// |
272/// \endverbatim
273///
274/// The back edge of the "Flow" block is always on the false side of the branch
275/// while the true side continues the general flow. So the loop condition
276/// consist of a network of PHI nodes where the true incoming values expresses
277/// breaks and the false values expresses continue states.
278
279class StructurizeCFG {
280 Type *Boolean;
281 ConstantInt *BoolTrue;
282 ConstantInt *BoolFalse;
283 Value *BoolPoison;
285 Function *Func;
286 Region *ParentRegion;
287
288 UniformityInfo *UA = nullptr;
289 DominatorTree *DT;
290
292 BBSet Visited;
293 BBSet FlowSet;
294
295 SmallVector<WeakVH, 8> AffectedPhis;
296 BBPhiMap DeletedPhis;
297 BB2BBVecMap AddedPhis;
298
299 PredMap Predicates;
300 BranchVector Conditions;
301
302 BB2BBMap Loops;
303 PredMap LoopPreds;
304 BranchVector LoopConds;
305
306 Val2BBMap HoistedValues;
307
308 RegionNode *PrevNode;
309
310 void hoistZeroCostElseBlockPhiValues(BasicBlock *ElseBB, BasicBlock *ThenBB);
311
312 bool isHoistableInstruction(Instruction *I, BasicBlock *BB,
313 BasicBlock *HoistTo);
314
315 void orderNodes();
316
317 void analyzeLoops(RegionNode *N);
318
319 PredInfo buildCondition(BranchInst *Term, unsigned Idx, bool Invert);
320
321 void gatherPredicates(RegionNode *N);
322
323 void collectInfos();
324
325 void insertConditions(bool Loops, SSAUpdaterBulk &PhiInserter);
326
327 void simplifyConditions();
328
329 void delPhiValues(BasicBlock *From, BasicBlock *To);
330
331 void addPhiValues(BasicBlock *From, BasicBlock *To);
332
333 void findUndefBlocks(BasicBlock *PHIBlock,
334 const SmallPtrSet<BasicBlock *, 8> &Incomings,
335 SmallVector<BasicBlock *> &UndefBlks) const;
336
337 void mergeIfCompatible(EquivalenceClasses<PHINode *> &PhiClasses, PHINode *A,
338 PHINode *B);
339
340 void setPhiValues();
341
342 void simplifyAffectedPhis();
343
344 void simplifyHoistedPhis();
345
346 DebugLoc killTerminator(BasicBlock *BB);
347
348 void changeExit(RegionNode *Node, BasicBlock *NewExit,
349 bool IncludeDominator);
350
351 BasicBlock *getNextFlow(BasicBlock *Dominator);
352
353 std::pair<BasicBlock *, DebugLoc> needPrefix(bool NeedEmpty);
354
355 BasicBlock *needPostfix(BasicBlock *Flow, bool ExitUseAllowed);
356
357 void setPrevNode(BasicBlock *BB);
358
359 bool dominatesPredicates(BasicBlock *BB, RegionNode *Node);
360
361 bool isPredictableTrue(RegionNode *Node);
362
363 void wireFlow(bool ExitUseAllowed, BasicBlock *LoopEnd);
364
365 void handleLoops(bool ExitUseAllowed, BasicBlock *LoopEnd);
366
367 void createFlow();
368
369 void rebuildSSA();
370
371public:
372 void init(Region *R);
373 bool run(Region *R, DominatorTree *DT, const TargetTransformInfo *TTI);
374 bool makeUniformRegion(Region *R, UniformityInfo &UA);
375};
376
377class StructurizeCFGLegacyPass : public RegionPass {
378 bool SkipUniformRegions;
379
380public:
381 static char ID;
382
383 explicit StructurizeCFGLegacyPass(bool SkipUniformRegions_ = false)
384 : RegionPass(ID), SkipUniformRegions(SkipUniformRegions_) {
385 if (ForceSkipUniformRegions.getNumOccurrences())
386 SkipUniformRegions = ForceSkipUniformRegions.getValue();
388 }
389
390 bool runOnRegion(Region *R, RGPassManager &RGM) override {
391 StructurizeCFG SCFG;
392 SCFG.init(R);
393 if (SkipUniformRegions) {
394 UniformityInfo &UA =
395 getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
396 if (SCFG.makeUniformRegion(R, UA))
397 return false;
398 }
399 Function *F = R->getEntry()->getParent();
400 const TargetTransformInfo *TTI =
401 &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(*F);
402 DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
403 return SCFG.run(R, DT, TTI);
404 }
405
406 StringRef getPassName() const override { return "Structurize control flow"; }
407
408 void getAnalysisUsage(AnalysisUsage &AU) const override {
409 if (SkipUniformRegions)
414
417 }
418};
419
420} // end anonymous namespace
421
422char StructurizeCFGLegacyPass::ID = 0;
423
424INITIALIZE_PASS_BEGIN(StructurizeCFGLegacyPass, "structurizecfg",
425 "Structurize the CFG", false, false)
429INITIALIZE_PASS_END(StructurizeCFGLegacyPass, "structurizecfg",
430 "Structurize the CFG", false, false)
431
432/// Checks whether an instruction is zero cost instruction and checks if the
433/// operands are from different BB. If so, this instruction can be coalesced
434/// if its hoisted to predecessor block. So, this returns true.
435bool StructurizeCFG::isHoistableInstruction(Instruction *I, BasicBlock *BB,
436 BasicBlock *HoistTo) {
437 if (I->getParent() != BB || isa<PHINode>(I))
438 return false;
439
440 // If the instruction is not a zero cost instruction, return false.
441 auto Cost = TTI->getInstructionCost(I, TargetTransformInfo::TCK_Latency);
443 Cost.isValid()
444 ? Cost.getValue()
446 if (CostVal != 0)
447 return false;
448
449 // Check if all operands are available at the hoisting destination.
450 for (auto &Op : I->operands()) {
451 if (auto *OpI = dyn_cast<Instruction>(Op)) {
452 // Operand must dominate the hoisting destination.
453 if (!DT->dominates(OpI->getParent(), HoistTo))
454 return false;
455 }
456 }
457
458 return true;
459}
460
461/// Structurization can introduce unnecessary VGPR copies due to register
462/// coalescing interference. For example, if the Else block has a zero-cost
463/// instruction and the Then block modifies the VGPR value, only one value is
464/// live at a time in merge block before structurization. After structurization,
465/// the coalescer may incorrectly treat the Then value as live in the Else block
466/// (via the path Then → Flow → Else), leading to unnecessary VGPR copies.
467///
468/// This function examines phi nodes whose incoming values are zero-cost
469/// instructions in the Else block. It identifies such values that can be safely
470/// hoisted and moves them to the nearest common dominator of Then and Else
471/// blocks. A follow-up function after setting PhiNodes assigns the hoisted
472/// value to poison phi nodes along the if→flow edge, aiding register coalescing
473/// and minimizing unnecessary live ranges.
474void StructurizeCFG::hoistZeroCostElseBlockPhiValues(BasicBlock *ElseBB,
475 BasicBlock *ThenBB) {
476
477 BasicBlock *ElseSucc = ElseBB->getSingleSuccessor();
478 BasicBlock *CommonDominator = DT->findNearestCommonDominator(ElseBB, ThenBB);
479
480 if (!ElseSucc || !CommonDominator)
481 return;
482 Instruction *Term = CommonDominator->getTerminator();
483 for (PHINode &Phi : ElseSucc->phis()) {
484 Value *ElseVal = Phi.getIncomingValueForBlock(ElseBB);
485 auto *Inst = dyn_cast<Instruction>(ElseVal);
486 if (!Inst || !isHoistableInstruction(Inst, ElseBB, CommonDominator))
487 continue;
488 Inst->removeFromParent();
489 Inst->insertInto(CommonDominator, Term->getIterator());
490 HoistedValues[Inst] = CommonDominator;
491 }
492}
493
494/// Build up the general order of nodes, by performing a topological sort of the
495/// parent region's nodes, while ensuring that there is no outer cycle node
496/// between any two inner cycle nodes.
497void StructurizeCFG::orderNodes() {
498 Order.resize(std::distance(GraphTraits<Region *>::nodes_begin(ParentRegion),
499 GraphTraits<Region *>::nodes_end(ParentRegion)));
500 if (Order.empty())
501 return;
502
503 SmallDenseSet<RegionNode *> Nodes;
504 auto EntryNode = SubGraphTraits::getEntryNode(ParentRegion);
505
506 // A list of range indices of SCCs in Order, to be processed.
508 unsigned I = 0, E = Order.size();
509 while (true) {
510 // Run through all the SCCs in the subgraph starting with Entry.
511 for (auto SCCI =
513 EntryNode);
514 !SCCI.isAtEnd(); ++SCCI) {
515 auto &SCC = *SCCI;
516
517 // An SCC up to the size of 2, can be reduced to an entry (the last node),
518 // and a possible additional node. Therefore, it is already in order, and
519 // there is no need to add it to the work-list.
520 unsigned Size = SCC.size();
521 if (Size > 2)
522 WorkList.emplace_back(I, I + Size);
523
524 // Add the SCC nodes to the Order array.
525 for (const auto &N : SCC) {
526 assert(I < E && "SCC size mismatch!");
527 Order[I++] = N.first;
528 }
529 }
530 assert(I == E && "SCC size mismatch!");
531
532 // If there are no more SCCs to order, then we are done.
533 if (WorkList.empty())
534 break;
535
536 std::tie(I, E) = WorkList.pop_back_val();
537
538 // Collect the set of nodes in the SCC's subgraph. These are only the
539 // possible child nodes; we do not add the entry (last node) otherwise we
540 // will have the same exact SCC all over again.
541 Nodes.clear();
542 Nodes.insert(Order.begin() + I, Order.begin() + E - 1);
543
544 // Update the entry node.
545 EntryNode.first = Order[E - 1];
546 EntryNode.second = &Nodes;
547 }
548}
549
550/// Determine the end of the loops
551void StructurizeCFG::analyzeLoops(RegionNode *N) {
552 if (N->isSubRegion()) {
553 // Test for exit as back edge
554 BasicBlock *Exit = N->getNodeAs<Region>()->getExit();
555 if (Visited.count(Exit))
556 Loops[Exit] = N->getEntry();
557
558 } else {
559 // Test for successors as back edge
560 BasicBlock *BB = N->getNodeAs<BasicBlock>();
561 if (BranchInst *Term = dyn_cast<BranchInst>(BB->getTerminator()))
562 for (BasicBlock *Succ : Term->successors())
563 if (Visited.count(Succ))
564 Loops[Succ] = BB;
565 }
566}
567
568/// Build the condition for one edge
569PredInfo StructurizeCFG::buildCondition(BranchInst *Term, unsigned Idx,
570 bool Invert) {
571 Value *Cond = Invert ? BoolFalse : BoolTrue;
572 MaybeCondBranchWeights Weights;
573
574 if (Term->isConditional()) {
575 Cond = Term->getCondition();
576 Weights = CondBranchWeights::tryParse(*Term);
577
578 if (Idx != (unsigned)Invert) {
580 if (Weights)
581 Weights = Weights->invert();
582 }
583 }
584 return {Cond, Weights};
585}
586
587/// Analyze the predecessors of each block and build up predicates
588void StructurizeCFG::gatherPredicates(RegionNode *N) {
589 RegionInfo *RI = ParentRegion->getRegionInfo();
590 BasicBlock *BB = N->getEntry();
591 BBPredicates &Pred = Predicates[BB];
592 BBPredicates &LPred = LoopPreds[BB];
593
594 for (BasicBlock *P : predecessors(BB)) {
595 // Ignore it if it's a branch from outside into our region entry
596 if (!ParentRegion->contains(P) || !dyn_cast<BranchInst>(P->getTerminator()))
597 continue;
598
599 Region *R = RI->getRegionFor(P);
600 if (R == ParentRegion) {
601 // It's a top level block in our region
602 BranchInst *Term = cast<BranchInst>(P->getTerminator());
603 for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
604 BasicBlock *Succ = Term->getSuccessor(i);
605 if (Succ != BB)
606 continue;
607
608 if (Visited.count(P)) {
609 // Normal forward edge
610 if (Term->isConditional()) {
611 // Try to treat it like an ELSE block
612 BasicBlock *Other = Term->getSuccessor(!i);
613 if (Visited.count(Other) && !Loops.count(Other) &&
614 !Pred.count(Other) && !Pred.count(P)) {
615 hoistZeroCostElseBlockPhiValues(Succ, Other);
616 Pred[Other] = {BoolFalse, std::nullopt};
617 Pred[P] = {BoolTrue, std::nullopt};
618 continue;
619 }
620 }
621 Pred[P] = buildCondition(Term, i, false);
622 } else {
623 // Back edge
624 LPred[P] = buildCondition(Term, i, true);
625 }
626 }
627 } else {
628 // It's an exit from a sub region
629 while (R->getParent() != ParentRegion)
630 R = R->getParent();
631
632 // Edge from inside a subregion to its entry, ignore it
633 if (*R == *N)
634 continue;
635
636 BasicBlock *Entry = R->getEntry();
637 if (Visited.count(Entry))
638 Pred[Entry] = {BoolTrue, std::nullopt};
639 else
640 LPred[Entry] = {BoolFalse, std::nullopt};
641 }
642 }
643}
644
645/// Collect various loop and predicate infos
646void StructurizeCFG::collectInfos() {
647 // Reset predicate
648 Predicates.clear();
649
650 // and loop infos
651 Loops.clear();
652 LoopPreds.clear();
653
654 // Reset the visited nodes
655 Visited.clear();
656
657 for (RegionNode *RN : reverse(Order)) {
658 LLVM_DEBUG(dbgs() << "Visiting: "
659 << (RN->isSubRegion() ? "SubRegion with entry: " : "")
660 << RN->getEntry()->getName() << "\n");
661
662 // Analyze all the conditions leading to a node
663 gatherPredicates(RN);
664
665 // Remember that we've seen this node
666 Visited.insert(RN->getEntry());
667
668 // Find the last back edges
669 analyzeLoops(RN);
670 }
671}
672
673/// Insert the missing branch conditions
674void StructurizeCFG::insertConditions(bool Loops, SSAUpdaterBulk &PhiInserter) {
675 BranchVector &Conds = Loops ? LoopConds : Conditions;
676 Value *Default = Loops ? BoolTrue : BoolFalse;
677
678 for (BranchInst *Term : Conds) {
679 assert(Term->isConditional());
680
681 BasicBlock *Parent = Term->getParent();
682 BasicBlock *SuccTrue = Term->getSuccessor(0);
683 BasicBlock *SuccFalse = Term->getSuccessor(1);
684
685 unsigned Variable = PhiInserter.AddVariable("", Boolean);
686 PhiInserter.AddAvailableValue(Variable, Loops ? SuccFalse : Parent,
687 Default);
688
689 BBPredicates &Preds = Loops ? LoopPreds[SuccFalse] : Predicates[SuccTrue];
690
691 NearestCommonDominator Dominator(DT);
692 Dominator.addBlock(Parent);
693
694 PredInfo ParentInfo{nullptr, std::nullopt};
695 for (auto [BB, PI] : Preds) {
696 if (BB == Parent) {
697 ParentInfo = PI;
698 break;
699 }
700 PhiInserter.AddAvailableValue(Variable, BB, PI.Pred);
701 Dominator.addAndRememberBlock(BB);
702 }
703
704 if (ParentInfo.Pred) {
705 Term->setCondition(ParentInfo.Pred);
706 CondBranchWeights::setMetadata(*Term, ParentInfo.Weights);
707 } else {
708 if (!Dominator.resultIsRememberedBlock())
709 PhiInserter.AddAvailableValue(Variable, Dominator.result(), Default);
710
711 PhiInserter.AddUse(Variable, &Term->getOperandUse(0));
712 }
713 }
714}
715
716/// Simplify any inverted conditions that were built by buildConditions.
717void StructurizeCFG::simplifyConditions() {
718 SmallVector<Instruction *> InstToErase;
719 for (auto &I : concat<PredMap::value_type>(Predicates, LoopPreds)) {
720 auto &Preds = I.second;
721 for (auto [BB, PI] : Preds) {
722 Instruction *Inverted;
723 if (match(PI.Pred, m_Not(m_OneUse(m_Instruction(Inverted)))) &&
724 !PI.Pred->use_empty()) {
725 if (auto *InvertedCmp = dyn_cast<CmpInst>(Inverted)) {
726 InvertedCmp->setPredicate(InvertedCmp->getInversePredicate());
727 PI.Pred->replaceAllUsesWith(InvertedCmp);
728 InstToErase.push_back(cast<Instruction>(PI.Pred));
729 }
730 }
731 }
732 }
733 for (auto *I : InstToErase)
734 I->eraseFromParent();
735}
736
737/// Remove all PHI values coming from "From" into "To" and remember
738/// them in DeletedPhis
739void StructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) {
740 PhiMap &Map = DeletedPhis[To];
741 for (PHINode &Phi : To->phis()) {
742 bool Recorded = false;
743 while (Phi.getBasicBlockIndex(From) != -1) {
744 Value *Deleted = Phi.removeIncomingValue(From, false);
745 Map[&Phi].push_back(std::make_pair(From, Deleted));
746 if (!Recorded) {
747 AffectedPhis.push_back(&Phi);
748 Recorded = true;
749 }
750 }
751 }
752}
753
754/// Add a dummy PHI value as soon as we knew the new predecessor
755void StructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) {
756 for (PHINode &Phi : To->phis()) {
757 Value *Poison = PoisonValue::get(Phi.getType());
758 Phi.addIncoming(Poison, From);
759 }
760 AddedPhis[To].push_back(From);
761}
762
763/// When we are reconstructing a PHI inside \p PHIBlock with incoming values
764/// from predecessors \p Incomings, we have a chance to mark the available value
765/// from some blocks as undefined. The function will find out all such blocks
766/// and return in \p UndefBlks.
767void StructurizeCFG::findUndefBlocks(
768 BasicBlock *PHIBlock, const SmallPtrSet<BasicBlock *, 8> &Incomings,
769 SmallVector<BasicBlock *> &UndefBlks) const {
770 // We may get a post-structured CFG like below:
771 //
772 // | P1
773 // |/
774 // F1
775 // |\
776 // | N
777 // |/
778 // F2
779 // |\
780 // | P2
781 // |/
782 // F3
783 // |\
784 // B
785 //
786 // B is the block that has a PHI being reconstructed. P1/P2 are predecessors
787 // of B before structurization. F1/F2/F3 are flow blocks inserted during
788 // structurization process. Block N is not a predecessor of B before
789 // structurization, but are placed between the predecessors(P1/P2) of B after
790 // structurization. This usually means that threads went to N never take the
791 // path N->F2->F3->B. For example, the threads take the branch F1->N may
792 // always take the branch F2->P2. So, when we are reconstructing a PHI
793 // originally in B, we can safely say the incoming value from N is undefined.
794 SmallPtrSet<BasicBlock *, 8> VisitedBlock;
795 SmallVector<BasicBlock *, 8> Stack;
796 if (PHIBlock == ParentRegion->getExit()) {
797 for (auto P : predecessors(PHIBlock)) {
798 if (ParentRegion->contains(P))
799 Stack.push_back(P);
800 }
801 } else {
802 append_range(Stack, predecessors(PHIBlock));
803 }
804
805 // Do a backward traversal over the CFG, and stop further searching if
806 // the block is not a Flow. If a block is neither flow block nor the
807 // incoming predecessor, then the incoming value from the block is
808 // undefined value for the PHI being reconstructed.
809 while (!Stack.empty()) {
810 BasicBlock *Current = Stack.pop_back_val();
811 if (!VisitedBlock.insert(Current).second)
812 continue;
813
814 if (FlowSet.contains(Current))
815 llvm::append_range(Stack, predecessors(Current));
816 else if (!Incomings.contains(Current))
817 UndefBlks.push_back(Current);
818 }
819}
820
821// If two phi nodes have compatible incoming values (for each
822// incoming block, either they have the same incoming value or only one phi
823// node has an incoming value), let them share the merged incoming values. The
824// merge process is guided by the equivalence information from \p PhiClasses.
825// The function will possibly update the incoming values of leader phi in
826// DeletedPhis.
827void StructurizeCFG::mergeIfCompatible(
828 EquivalenceClasses<PHINode *> &PhiClasses, PHINode *A, PHINode *B) {
829 auto ItA = PhiClasses.findLeader(PhiClasses.insert(A));
830 auto ItB = PhiClasses.findLeader(PhiClasses.insert(B));
831 // They are already in the same class, no work needed.
832 if (ItA == ItB)
833 return;
834
835 PHINode *LeaderA = *ItA;
836 PHINode *LeaderB = *ItB;
837 BBValueVector &IncomingA = DeletedPhis[LeaderA->getParent()][LeaderA];
838 BBValueVector &IncomingB = DeletedPhis[LeaderB->getParent()][LeaderB];
839
840 DenseMap<BasicBlock *, Value *> Mergeable(IncomingA.begin(), IncomingA.end());
841 for (auto [BB, V] : IncomingB) {
842 auto BBIt = Mergeable.find(BB);
843 if (BBIt != Mergeable.end() && BBIt->second != V)
844 return;
845 // Either IncomingA does not have this value or IncomingA has the same
846 // value.
847 Mergeable.insert({BB, V});
848 }
849
850 // Update the incoming value of leaderA.
851 IncomingA.assign(Mergeable.begin(), Mergeable.end());
852 PhiClasses.unionSets(ItA, ItB);
853}
854
855/// Add the real PHI value as soon as everything is set up
856void StructurizeCFG::setPhiValues() {
857 SmallVector<PHINode *, 8> InsertedPhis;
858 SSAUpdater Updater(&InsertedPhis);
859 DenseMap<BasicBlock *, SmallVector<BasicBlock *>> UndefBlksMap;
860
861 // Find phi nodes that have compatible incoming values (either they have
862 // the same value for the same block or only one phi node has an incoming
863 // value, see example below). We only search again the phi's that are
864 // referenced by another phi, which is the case we care about.
865 //
866 // For example (-- means no incoming value):
867 // phi1 : BB1:phi2 BB2:v BB3:--
868 // phi2: BB1:-- BB2:v BB3:w
869 //
870 // Then we can merge these incoming values and let phi1, phi2 use the
871 // same set of incoming values:
872 //
873 // phi1&phi2: BB1:phi2 BB2:v BB3:w
874 //
875 // By doing this, phi1 and phi2 would share more intermediate phi nodes.
876 // This would help reduce the number of phi nodes during SSA reconstruction
877 // and ultimately result in fewer COPY instructions.
878 //
879 // This should be correct, because if a phi node does not have incoming
880 // value from certain block, this means the block is not the predecessor
881 // of the parent block, so we actually don't care about its incoming value.
882 EquivalenceClasses<PHINode *> PhiClasses;
883 for (const auto &[To, From] : AddedPhis) {
884 auto OldPhiIt = DeletedPhis.find(To);
885 if (OldPhiIt == DeletedPhis.end())
886 continue;
887
888 PhiMap &BlkPhis = OldPhiIt->second;
889 SmallVector<BasicBlock *> &UndefBlks = UndefBlksMap[To];
890 SmallPtrSet<BasicBlock *, 8> Incomings;
891
892 // Get the undefined blocks shared by all the phi nodes.
893 if (!BlkPhis.empty()) {
894 Incomings.insert_range(llvm::make_first_range(BlkPhis.front().second));
895 findUndefBlocks(To, Incomings, UndefBlks);
896 }
897
898 for (const auto &[Phi, Incomings] : OldPhiIt->second) {
899 SmallVector<PHINode *> IncomingPHIs;
900 for (const auto &[BB, V] : Incomings) {
901 // First, for each phi, check whether it has incoming value which is
902 // another phi.
903 if (PHINode *P = dyn_cast<PHINode>(V))
904 IncomingPHIs.push_back(P);
905 }
906
907 for (auto *OtherPhi : IncomingPHIs) {
908 // Skip phis that are unrelated to the phi reconstruction for now.
909 if (!DeletedPhis.contains(OtherPhi->getParent()))
910 continue;
911 mergeIfCompatible(PhiClasses, Phi, OtherPhi);
912 }
913 }
914 }
915
916 for (const auto &AddedPhi : AddedPhis) {
917 BasicBlock *To = AddedPhi.first;
918 const BBVector &From = AddedPhi.second;
919
920 auto It = DeletedPhis.find(To);
921 if (It == DeletedPhis.end())
922 continue;
923
924 PhiMap &Map = It->second;
925 SmallVector<BasicBlock *> &UndefBlks = UndefBlksMap[To];
926 for (const auto &[Phi, Incoming] : Map) {
927 Value *Poison = PoisonValue::get(Phi->getType());
928 Updater.Initialize(Phi->getType(), "");
929 Updater.AddAvailableValue(&Func->getEntryBlock(), Poison);
930 Updater.AddAvailableValue(To, Poison);
931
932 // Use leader phi's incoming if there is.
933 auto LeaderIt = PhiClasses.findLeader(Phi);
934 bool UseIncomingOfLeader =
935 LeaderIt != PhiClasses.member_end() && *LeaderIt != Phi;
936 const auto &IncomingMap =
937 UseIncomingOfLeader ? DeletedPhis[(*LeaderIt)->getParent()][*LeaderIt]
938 : Incoming;
939
940 SmallVector<BasicBlock *> ConstantPreds;
941 for (const auto &[BB, V] : IncomingMap) {
942 Updater.AddAvailableValue(BB, V);
943 if (isa<Constant>(V))
944 ConstantPreds.push_back(BB);
945 }
946
947 for (auto UB : UndefBlks) {
948 // If this undef block is dominated by any predecessor(before
949 // structurization) of reconstructed PHI with constant incoming value,
950 // don't mark the available value as undefined. Setting undef to such
951 // block will stop us from getting optimal phi insertion.
952 if (any_of(ConstantPreds,
953 [&](BasicBlock *CP) { return DT->dominates(CP, UB); }))
954 continue;
955 // Maybe already get a value through sharing with other phi nodes.
956 if (Updater.HasValueForBlock(UB))
957 continue;
958
959 Updater.AddAvailableValue(UB, Poison);
960 }
961
962 for (BasicBlock *FI : From)
963 Phi->setIncomingValueForBlock(FI, Updater.GetValueAtEndOfBlock(FI));
964 AffectedPhis.push_back(Phi);
965 }
966 }
967
968 AffectedPhis.append(InsertedPhis.begin(), InsertedPhis.end());
969}
970
971/// Updates PHI nodes after hoisted zero cost instructions by replacing poison
972/// entries on Flow nodes with the appropriate hoisted values
973void StructurizeCFG::simplifyHoistedPhis() {
974 for (WeakVH VH : AffectedPhis) {
975 PHINode *Phi = dyn_cast_or_null<PHINode>(VH);
976 if (!Phi || Phi->getNumIncomingValues() != 2)
977 continue;
978
979 for (int i = 0; i < 2; i++) {
980 Value *V = Phi->getIncomingValue(i);
981 auto BBIt = HoistedValues.find(V);
982
983 if (BBIt == HoistedValues.end())
984 continue;
985
986 Value *OtherV = Phi->getIncomingValue(!i);
987 PHINode *OtherPhi = dyn_cast<PHINode>(OtherV);
988 if (!OtherPhi)
989 continue;
990
991 int PoisonValBBIdx = -1;
992 for (size_t i = 0; i < OtherPhi->getNumIncomingValues(); i++) {
993 if (!isa<PoisonValue>(OtherPhi->getIncomingValue(i)))
994 continue;
995 PoisonValBBIdx = i;
996 break;
997 }
998 if (PoisonValBBIdx == -1 ||
999 !DT->dominates(BBIt->second,
1000 OtherPhi->getIncomingBlock(PoisonValBBIdx)))
1001 continue;
1002
1003 OtherPhi->setIncomingValue(PoisonValBBIdx, V);
1004 if (DT->dominates(OtherV, Phi))
1005 Phi->setIncomingValue(i, OtherV);
1006 }
1007 }
1008}
1009
1010void StructurizeCFG::simplifyAffectedPhis() {
1011 bool Changed;
1012 do {
1013 Changed = false;
1014 SimplifyQuery Q(Func->getDataLayout());
1015 Q.DT = DT;
1016 // Setting CanUseUndef to true might extend value liveness, set it to false
1017 // to achieve better register pressure.
1018 Q.CanUseUndef = false;
1019 for (WeakVH VH : AffectedPhis) {
1020 if (auto Phi = dyn_cast_or_null<PHINode>(VH)) {
1021 if (auto NewValue = simplifyInstruction(Phi, Q)) {
1022 Phi->replaceAllUsesWith(NewValue);
1023 Phi->eraseFromParent();
1024 Changed = true;
1025 }
1026 }
1027 }
1028 } while (Changed);
1029}
1030
1031/// Remove phi values from all successors and then remove the terminator.
1032DebugLoc StructurizeCFG::killTerminator(BasicBlock *BB) {
1034 if (!Term)
1035 return DebugLoc();
1036
1037 for (BasicBlock *Succ : successors(BB))
1038 delPhiValues(BB, Succ);
1039
1040 DebugLoc DL = Term->getDebugLoc();
1041 Term->eraseFromParent();
1042 return DL;
1043}
1044
1045/// Let node exit(s) point to NewExit
1046void StructurizeCFG::changeExit(RegionNode *Node, BasicBlock *NewExit,
1047 bool IncludeDominator) {
1048 if (Node->isSubRegion()) {
1049 Region *SubRegion = Node->getNodeAs<Region>();
1050 BasicBlock *OldExit = SubRegion->getExit();
1051 BasicBlock *Dominator = nullptr;
1052
1053 // Find all the edges from the sub region to the exit.
1054 // We use make_early_inc_range here because we modify BB's terminator.
1055 for (BasicBlock *BB : llvm::make_early_inc_range(predecessors(OldExit))) {
1056 if (!SubRegion->contains(BB))
1057 continue;
1058
1059 // Modify the edges to point to the new exit
1060 delPhiValues(BB, OldExit);
1061 BB->getTerminator()->replaceUsesOfWith(OldExit, NewExit);
1062 addPhiValues(BB, NewExit);
1063
1064 // Find the new dominator (if requested)
1065 if (IncludeDominator) {
1066 if (!Dominator)
1067 Dominator = BB;
1068 else
1069 Dominator = DT->findNearestCommonDominator(Dominator, BB);
1070 }
1071 }
1072
1073 // Change the dominator (if requested)
1074 if (Dominator)
1075 DT->changeImmediateDominator(NewExit, Dominator);
1076
1077 // Update the region info
1078 SubRegion->replaceExit(NewExit);
1079 } else {
1080 BasicBlock *BB = Node->getNodeAs<BasicBlock>();
1081 DebugLoc DL = killTerminator(BB);
1082 BranchInst *Br = BranchInst::Create(NewExit, BB);
1083 Br->setDebugLoc(DL);
1084 addPhiValues(BB, NewExit);
1085 if (IncludeDominator)
1086 DT->changeImmediateDominator(NewExit, BB);
1087 }
1088}
1089
1090/// Create a new flow node and update dominator tree and region info
1091BasicBlock *StructurizeCFG::getNextFlow(BasicBlock *Dominator) {
1092 LLVMContext &Context = Func->getContext();
1093 BasicBlock *Insert = Order.empty() ? ParentRegion->getExit() :
1094 Order.back()->getEntry();
1096 Func, Insert);
1097 FlowSet.insert(Flow);
1098 DT->addNewBlock(Flow, Dominator);
1099 ParentRegion->getRegionInfo()->setRegionFor(Flow, ParentRegion);
1100 return Flow;
1101}
1102
1103/// Create a new or reuse the previous node as flow node. Returns a block and a
1104/// debug location to be used for new instructions in that block.
1105std::pair<BasicBlock *, DebugLoc> StructurizeCFG::needPrefix(bool NeedEmpty) {
1106 BasicBlock *Entry = PrevNode->getEntry();
1107
1108 if (!PrevNode->isSubRegion()) {
1109 DebugLoc DL = killTerminator(Entry);
1110 if (!NeedEmpty || Entry->getFirstInsertionPt() == Entry->end())
1111 return {Entry, DL};
1112 }
1113
1114 // create a new flow node
1115 BasicBlock *Flow = getNextFlow(Entry);
1116
1117 // and wire it up
1118 changeExit(PrevNode, Flow, true);
1119 PrevNode = ParentRegion->getBBNode(Flow);
1120 return {Flow, DebugLoc()};
1121}
1122
1123/// Returns the region exit if possible, otherwise just a new flow node
1124BasicBlock *StructurizeCFG::needPostfix(BasicBlock *Flow,
1125 bool ExitUseAllowed) {
1126 if (!Order.empty() || !ExitUseAllowed)
1127 return getNextFlow(Flow);
1128
1129 BasicBlock *Exit = ParentRegion->getExit();
1130 DT->changeImmediateDominator(Exit, Flow);
1131 addPhiValues(Flow, Exit);
1132 return Exit;
1133}
1134
1135/// Set the previous node
1136void StructurizeCFG::setPrevNode(BasicBlock *BB) {
1137 PrevNode = ParentRegion->contains(BB) ? ParentRegion->getBBNode(BB)
1138 : nullptr;
1139}
1140
1141/// Does BB dominate all the predicates of Node?
1142bool StructurizeCFG::dominatesPredicates(BasicBlock *BB, RegionNode *Node) {
1143 BBPredicates &Preds = Predicates[Node->getEntry()];
1144 return llvm::all_of(Preds, [&](std::pair<BasicBlock *, PredInfo> Pred) {
1145 return DT->dominates(BB, Pred.first);
1146 });
1147}
1148
1149/// Can we predict that this node will always be called?
1150bool StructurizeCFG::isPredictableTrue(RegionNode *Node) {
1151 BBPredicates &Preds = Predicates[Node->getEntry()];
1152 bool Dominated = false;
1153
1154 // Regionentry is always true
1155 if (!PrevNode)
1156 return true;
1157
1158 for (auto [BB, PI] : Preds) {
1159 if (PI.Pred != BoolTrue)
1160 return false;
1161
1162 if (!Dominated && DT->dominates(BB, PrevNode->getEntry()))
1163 Dominated = true;
1164 }
1165
1166 // TODO: The dominator check is too strict
1167 return Dominated;
1168}
1169
1170/// Take one node from the order vector and wire it up
1171void StructurizeCFG::wireFlow(bool ExitUseAllowed,
1172 BasicBlock *LoopEnd) {
1173 RegionNode *Node = Order.pop_back_val();
1174 Visited.insert(Node->getEntry());
1175
1176 if (isPredictableTrue(Node)) {
1177 // Just a linear flow
1178 if (PrevNode) {
1179 changeExit(PrevNode, Node->getEntry(), true);
1180 }
1181 PrevNode = Node;
1182 } else {
1183 // Insert extra prefix node (or reuse last one)
1184 auto [Flow, DL] = needPrefix(false);
1185
1186 // Insert extra postfix node (or use exit instead)
1187 BasicBlock *Entry = Node->getEntry();
1188 BasicBlock *Next = needPostfix(Flow, ExitUseAllowed);
1189
1190 // let it point to entry and next block
1191 BranchInst *Br = BranchInst::Create(Entry, Next, BoolPoison, Flow);
1192 Br->setDebugLoc(DL);
1193 Conditions.push_back(Br);
1194 addPhiValues(Flow, Entry);
1195 DT->changeImmediateDominator(Entry, Flow);
1196
1197 PrevNode = Node;
1198 while (!Order.empty() && !Visited.count(LoopEnd) &&
1199 dominatesPredicates(Entry, Order.back())) {
1200 handleLoops(false, LoopEnd);
1201 }
1202
1203 changeExit(PrevNode, Next, false);
1204 setPrevNode(Next);
1205 }
1206}
1207
1208void StructurizeCFG::handleLoops(bool ExitUseAllowed,
1209 BasicBlock *LoopEnd) {
1210 RegionNode *Node = Order.back();
1211 BasicBlock *LoopStart = Node->getEntry();
1212
1213 if (!Loops.count(LoopStart)) {
1214 wireFlow(ExitUseAllowed, LoopEnd);
1215 return;
1216 }
1217
1218 if (!isPredictableTrue(Node))
1219 LoopStart = needPrefix(true).first;
1220
1221 LoopEnd = Loops[Node->getEntry()];
1222 wireFlow(false, LoopEnd);
1223 while (!Visited.count(LoopEnd)) {
1224 handleLoops(false, LoopEnd);
1225 }
1226
1227 assert(LoopStart != &LoopStart->getParent()->getEntryBlock());
1228
1229 // Create an extra loop end node
1230 DebugLoc DL;
1231 std::tie(LoopEnd, DL) = needPrefix(false);
1232 BasicBlock *Next = needPostfix(LoopEnd, ExitUseAllowed);
1233 BranchInst *Br = BranchInst::Create(Next, LoopStart, BoolPoison, LoopEnd);
1234 Br->setDebugLoc(DL);
1235 LoopConds.push_back(Br);
1236 addPhiValues(LoopEnd, LoopStart);
1237 setPrevNode(Next);
1238}
1239
1240/// After this function control flow looks like it should be, but
1241/// branches and PHI nodes only have undefined conditions.
1242void StructurizeCFG::createFlow() {
1243 BasicBlock *Exit = ParentRegion->getExit();
1244 bool EntryDominatesExit = DT->dominates(ParentRegion->getEntry(), Exit);
1245
1246 AffectedPhis.clear();
1247 DeletedPhis.clear();
1248 AddedPhis.clear();
1249 Conditions.clear();
1250 LoopConds.clear();
1251
1252 PrevNode = nullptr;
1253 Visited.clear();
1254
1255 while (!Order.empty()) {
1256 handleLoops(EntryDominatesExit, nullptr);
1257 }
1258
1259 if (PrevNode)
1260 changeExit(PrevNode, Exit, EntryDominatesExit);
1261 else
1262 assert(EntryDominatesExit);
1263}
1264
1265/// Handle a rare case where the disintegrated nodes instructions
1266/// no longer dominate all their uses. Not sure if this is really necessary
1267void StructurizeCFG::rebuildSSA() {
1268 SSAUpdater Updater;
1269 for (BasicBlock *BB : ParentRegion->blocks())
1270 for (Instruction &I : *BB) {
1271 bool Initialized = false;
1272 // We may modify the use list as we iterate over it, so we use
1273 // make_early_inc_range.
1274 for (Use &U : llvm::make_early_inc_range(I.uses())) {
1275 Instruction *User = cast<Instruction>(U.getUser());
1276 if (User->getParent() == BB) {
1277 continue;
1278 } else if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
1279 if (UserPN->getIncomingBlock(U) == BB)
1280 continue;
1281 }
1282
1283 if (DT->dominates(&I, User))
1284 continue;
1285
1286 if (!Initialized) {
1287 Value *Poison = PoisonValue::get(I.getType());
1288 Updater.Initialize(I.getType(), "");
1289 Updater.AddAvailableValue(&Func->getEntryBlock(), Poison);
1290 Updater.AddAvailableValue(BB, &I);
1291 Initialized = true;
1292 }
1293 Updater.RewriteUseAfterInsertions(U);
1294 }
1295 }
1296}
1297
1298static bool hasOnlyUniformBranches(Region *R, unsigned UniformMDKindID,
1299 const UniformityInfo &UA) {
1300 // Bool for if all sub-regions are uniform.
1301 bool SubRegionsAreUniform = true;
1302 // Count of how many direct children are conditional.
1303 unsigned ConditionalDirectChildren = 0;
1304
1305 for (auto *E : R->elements()) {
1306 if (!E->isSubRegion()) {
1307 auto Br = dyn_cast<BranchInst>(E->getEntry()->getTerminator());
1308 if (!Br || !Br->isConditional())
1309 continue;
1310
1311 if (!UA.isUniform(Br))
1312 return false;
1313
1314 // One of our direct children is conditional.
1315 ConditionalDirectChildren++;
1316
1317 LLVM_DEBUG(dbgs() << "BB: " << Br->getParent()->getName()
1318 << " has uniform terminator\n");
1319 } else {
1320 // Explicitly refuse to treat regions as uniform if they have non-uniform
1321 // subregions. We cannot rely on UniformityAnalysis for branches in
1322 // subregions because those branches may have been removed and re-created,
1323 // so we look for our metadata instead.
1324 //
1325 // Warning: It would be nice to treat regions as uniform based only on
1326 // their direct child basic blocks' terminators, regardless of whether
1327 // subregions are uniform or not. However, this requires a very careful
1328 // look at SIAnnotateControlFlow to make sure nothing breaks there.
1329 for (auto *BB : E->getNodeAs<Region>()->blocks()) {
1330 auto Br = dyn_cast<BranchInst>(BB->getTerminator());
1331 if (!Br || !Br->isConditional())
1332 continue;
1333
1334 if (!Br->getMetadata(UniformMDKindID)) {
1335 // Early exit if we cannot have relaxed uniform regions.
1336 if (!RelaxedUniformRegions)
1337 return false;
1338
1339 SubRegionsAreUniform = false;
1340 break;
1341 }
1342 }
1343 }
1344 }
1345
1346 // Our region is uniform if:
1347 // 1. All conditional branches that are direct children are uniform (checked
1348 // above).
1349 // 2. And either:
1350 // a. All sub-regions are uniform.
1351 // b. There is one or less conditional branches among the direct children.
1352 return SubRegionsAreUniform || (ConditionalDirectChildren <= 1);
1353}
1354
1355void StructurizeCFG::init(Region *R) {
1356 LLVMContext &Context = R->getEntry()->getContext();
1357
1358 Boolean = Type::getInt1Ty(Context);
1359 BoolTrue = ConstantInt::getTrue(Context);
1360 BoolFalse = ConstantInt::getFalse(Context);
1361 BoolPoison = PoisonValue::get(Boolean);
1362
1363 this->UA = nullptr;
1364}
1365
1366bool StructurizeCFG::makeUniformRegion(Region *R, UniformityInfo &UA) {
1367 if (R->isTopLevelRegion())
1368 return false;
1369
1370 this->UA = &UA;
1371
1372 // TODO: We could probably be smarter here with how we handle sub-regions.
1373 // We currently rely on the fact that metadata is set by earlier invocations
1374 // of the pass on sub-regions, and that this metadata doesn't get lost --
1375 // but we shouldn't rely on metadata for correctness!
1376 unsigned UniformMDKindID =
1377 R->getEntry()->getContext().getMDKindID("structurizecfg.uniform");
1378
1379 if (hasOnlyUniformBranches(R, UniformMDKindID, UA)) {
1380 LLVM_DEBUG(dbgs() << "Skipping region with uniform control flow: " << *R
1381 << '\n');
1382
1383 // Mark all direct child block terminators as having been treated as
1384 // uniform. To account for a possible future in which non-uniform
1385 // sub-regions are treated more cleverly, indirect children are not
1386 // marked as uniform.
1387 MDNode *MD = MDNode::get(R->getEntry()->getParent()->getContext(), {});
1388 for (RegionNode *E : R->elements()) {
1389 if (E->isSubRegion())
1390 continue;
1391
1392 if (Instruction *Term = E->getEntry()->getTerminator())
1393 Term->setMetadata(UniformMDKindID, MD);
1394 }
1395
1396 return true;
1397 }
1398 return false;
1399}
1400
1401/// Run the transformation for each region found
1402bool StructurizeCFG::run(Region *R, DominatorTree *DT,
1403 const TargetTransformInfo *TTI) {
1404 // CallBr and its corresponding direct target blocks are for now ignored by
1405 // this pass. This is not a limitation for the currently intended uses cases
1406 // of callbr in the AMDGPU backend.
1407 // Parent and child regions are not affected by this (current) restriction.
1408 // See `llvm/test/Transforms/StructurizeCFG/callbr.ll` for details.
1409 if (R->isTopLevelRegion() || isa<CallBrInst>(R->getEntry()->getTerminator()))
1410 return false;
1411
1412 this->DT = DT;
1413 this->TTI = TTI;
1414 Func = R->getEntry()->getParent();
1415
1416 ParentRegion = R;
1417
1418 orderNodes();
1419 collectInfos();
1420 createFlow();
1421
1422 SSAUpdaterBulk PhiInserter;
1423 insertConditions(false, PhiInserter);
1424 insertConditions(true, PhiInserter);
1425 PhiInserter.RewriteAndOptimizeAllUses(*DT);
1426
1427 setPhiValues();
1428 simplifyHoistedPhis();
1429 simplifyConditions();
1430 simplifyAffectedPhis();
1431 rebuildSSA();
1432
1433 // Cleanup
1434 Order.clear();
1435 Visited.clear();
1436 DeletedPhis.clear();
1437 AddedPhis.clear();
1438 Predicates.clear();
1439 Conditions.clear();
1440 Loops.clear();
1441 LoopPreds.clear();
1442 LoopConds.clear();
1443 FlowSet.clear();
1444
1445 return true;
1446}
1447
1448Pass *llvm::createStructurizeCFGPass(bool SkipUniformRegions) {
1449 return new StructurizeCFGLegacyPass(SkipUniformRegions);
1450}
1451
1452static void addRegionIntoQueue(Region &R, std::vector<Region *> &Regions) {
1453 Regions.push_back(&R);
1454 for (const auto &E : R)
1455 addRegionIntoQueue(*E, Regions);
1456}
1457
1459 : SkipUniformRegions(SkipUniformRegions_) {
1460 if (ForceSkipUniformRegions.getNumOccurrences())
1461 SkipUniformRegions = ForceSkipUniformRegions.getValue();
1462}
1463
1465 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
1467 OS, MapClassName2PassName);
1468 if (SkipUniformRegions)
1469 OS << "<skip-uniform-regions>";
1470}
1471
1474
1475 bool Changed = false;
1477 auto &RI = AM.getResult<RegionInfoAnalysis>(F);
1479 UniformityInfo *UI = nullptr;
1480 if (SkipUniformRegions)
1482
1483 std::vector<Region *> Regions;
1484 addRegionIntoQueue(*RI.getTopLevelRegion(), Regions);
1485 while (!Regions.empty()) {
1486 Region *R = Regions.back();
1487 Regions.pop_back();
1488
1489 StructurizeCFG SCFG;
1490 SCFG.init(R);
1491
1492 if (SkipUniformRegions && SCFG.makeUniformRegion(R, *UI)) {
1493 Changed = true; // May have added metadata.
1494 continue;
1495 }
1496
1497 Changed |= SCFG.run(R, DT, TTI);
1498 }
1499 if (!Changed)
1500 return PreservedAnalyses::all();
1503 return PA;
1504}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
DenseMap< BasicBlock *, Instruction * > BBPredicates
This file defines the DenseMap class.
@ Default
Generic implementation of equivalence classes through the use Tarjan's efficient union-find algorithm...
Hexagon Hardware Loops
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This header defines various interfaces for pass management in LLVM.
This defines the Use class.
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
This file implements a map that provides insertion order iteration.
This file contains the declarations for metadata subclasses.
#define T
#define P(N)
if(PassOpts->AAPipeline)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
static void addRegionIntoQueue(Region &R, std::deque< Region * > &RQ)
This builds on the llvm/ADT/GraphTraits.h file to find the strongly connected components (SCCs) of a ...
Annotate SI Control Flow
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
SmallDenseMap< const PHINode *, PhiInfo, 16 > PhiMap
const char FlowBlockName[]
static bool hasOnlyUniformBranches(Region *R, unsigned UniformMDKindID, const UniformityInfo &UA)
#define LLVM_DEBUG(...)
Definition Debug.h:114
This pass exposes codegen information to IR-level passes.
LLVM IR instance of the generic uniformity analysis.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition BasicBlock.h:528
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
Conditional or Unconditional Branch instruction.
bool isConditional() const
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
A debug info location.
Definition DebugLoc.h:124
bool empty() const
Definition DenseMap.h:109
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition DenseMap.h:174
Analysis pass which computes a DominatorTree.
Definition Dominators.h:284
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
Legacy analysis pass which computes a DominatorTree.
Definition Dominators.h:322
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:165
LLVM_ABI Instruction * findNearestCommonDominator(Instruction *I1, Instruction *I2) const
Find the nearest instruction I that dominates both I1 and I2, in the sense that a result produced bef...
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
const ECValue & insert(const ElemTy &Data)
insert - Insert a new value into the union/find set, ignoring the request if the value already exists...
member_iterator member_end() const
member_iterator findLeader(const ElemTy &V) const
findLeader - Given a value in the set, return a member iterator for the equivalence class it is in.
member_iterator unionSets(const ElemTy &V1, const ElemTy &V2)
union - Merge the two equivalence sets for the specified values, inserting them if they do not alread...
const BasicBlock & getEntryBlock() const
Definition Function.h:807
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Definition Function.cpp:363
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:359
bool isUniform(ConstValueRefT V) const
Whether V is uniform/non-divergent.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1569
This class implements a map that also provides access to all stored values in a deterministic order.
Definition MapVector.h:36
void setIncomingValue(unsigned i, Value *V)
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
Definition Pass.h:99
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition Pass.cpp:112
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132
The pass manager to schedule RegionPasses.
Definition RegionPass.h:88
void replaceExit(BlockT *BB)
Replace the exit basic block of the region with the new basic block.
block_range blocks()
Returns a range view of the basic blocks in the region.
Definition RegionInfo.h:620
BlockT * getExit() const
Get the exit BasicBlock of the Region.
Definition RegionInfo.h:357
RegionNodeT * getBBNode(BlockT *BB) const
Get the BasicBlock RegionNode for a BasicBlock.
bool contains(const BlockT *BB) const
Check if the region contains a BasicBlock.
RegionInfoT * getRegionInfo() const
Return the RegionInfo object, that belongs to this Region.
Definition RegionInfo.h:424
BlockT * getEntry() const
Get the entry BasicBlock of the Region.
Definition RegionInfo.h:320
Analysis pass that exposes the RegionInfo for a function.
Definition RegionInfo.h:965
RegionT * getRegionFor(BlockT *BB) const
Get the smallest region that contains a BasicBlock.
bool isSubRegion() const
Is this RegionNode a subregion?
Definition RegionInfo.h:186
BlockT * getEntry() const
Get the entry BasicBlock of this RegionNode.
Definition RegionInfo.h:172
A pass that runs on each Region in a function.
Definition RegionPass.h:33
Helper class for SSA formation on a set of values defined in multiple blocks.
LLVM_ABI unsigned AddVariable(StringRef Name, Type *Ty)
Add a new variable to the SSA rewriter.
LLVM_ABI void AddAvailableValue(unsigned Var, BasicBlock *BB, Value *V)
Indicate that a rewritten value is available in the specified block with the specified value.
LLVM_ABI void AddUse(unsigned Var, Use *U)
Record a use of the symbolic value.
void RewriteAndOptimizeAllUses(DominatorTree &DT)
Rewrite all uses and simplify the inserted PHI nodes.
void RewriteUseAfterInsertions(Use &U)
Rewrite a use like RewriteUse but handling in-block definitions.
void Initialize(Type *Ty, StringRef Name)
Reset this object to get ready for a new set of SSA updates with type 'Ty'.
void AddAvailableValue(BasicBlock *BB, Value *V)
Indicate that a rewritten value is available in the specified block with the specified value.
Implements a dense probed hash-table based set with some number of buckets stored inline.
Definition DenseSet.h:291
void insert_range(Range &&R)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Analysis pass providing the TargetTransformInfo.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
@ TCK_Latency
The latency of instruction.
@ TCC_Expensive
The cost of a 'div' instruction on x86.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
Analysis pass which computes UniformityInfo.
Legacy analysis pass which computes a CycleInfo.
LLVM_ABI bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition User.cpp:21
LLVM Value Representation.
Definition Value.h:75
int getNumOccurrences() const
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:202
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition ilist_node.h:34
CRTP base class for adapting an iterator to a different type.
Definition iterator.h:237
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
static scc_iterator begin(const GraphT &G)
bool isAtEnd() const
Direct loop termination test which is more efficient than comparison with end().
Changed
@ Entry
Definition COFF.h:862
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
initializer< Ty > init(const Ty &Val)
@ User
could "use" a pointer
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
GenericUniformityInfo< SSAContext > UniformityInfo
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
unsigned char Boolean
Definition ConvertUTF.h:132
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1725
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
APInt operator*(APInt a, uint64_t RHS)
Definition APInt.h:2236
auto successors(const MachineBasicBlock *BB)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2136
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632
LLVM_ABI Pass * createStructurizeCFGPass(bool SkipUniformRegions=false)
When SkipUniformRegions is true the structizer will not structurize regions that only contain uniform...
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
Definition STLExtras.h:1150
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
LLVM_ABI void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected, bool ElideAllZero=false)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1732
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
auto make_first_range(ContainerTy &&c)
Given a container of pairs, return a range over the first elements.
Definition STLExtras.h:1397
iterator_range< filter_iterator< detail::IterOfRange< RangeT >, PredicateT > > make_filter_range(RangeT &&Range, PredicateT Pred)
Convenience function that takes a range of elements and a predicate, and return a new filter_iterator...
Definition STLExtras.h:550
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
@ Other
Any other memory.
Definition ModRef.h:68
TargetTransformInfo TTI
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto predecessors(const MachineBasicBlock *BB)
iterator_range< typename GraphTraits< GraphType >::ChildIteratorType > children(const typename GraphTraits< GraphType >::NodeRef &G)
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI Value * invertCondition(Value *Condition)
Invert the given true/false value, possibly reusing an existing copy.
Definition Local.cpp:3959
filter_iterator_impl< WrappedIteratorT, PredicateT, detail::fwd_or_bidi_tag< WrappedIteratorT > > filter_iterator
Defines filter_iterator to a suitable specialization of filter_iterator_impl, based on the underlying...
Definition STLExtras.h:537
LLVM_ABI void initializeStructurizeCFGLegacyPassPass(PassRegistry &)
#define N
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition PassManager.h:70
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
StructurizeCFGPass(bool SkipUniformRegions=false)