LLVM 22.0.0git
StructurizeCFG.cpp
Go to the documentation of this file.
1//===- StructurizeCFG.cpp -------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "llvm/ADT/DenseMap.h"
12#include "llvm/ADT/MapVector.h"
14#include "llvm/ADT/STLExtras.h"
23#include "llvm/IR/BasicBlock.h"
24#include "llvm/IR/CFG.h"
25#include "llvm/IR/Constants.h"
26#include "llvm/IR/Dominators.h"
27#include "llvm/IR/Function.h"
28#include "llvm/IR/InstrTypes.h"
29#include "llvm/IR/Instruction.h"
31#include "llvm/IR/Metadata.h"
32#include "llvm/IR/PassManager.h"
35#include "llvm/IR/Type.h"
36#include "llvm/IR/Use.h"
37#include "llvm/IR/Value.h"
38#include "llvm/IR/ValueHandle.h"
40#include "llvm/Pass.h"
43#include "llvm/Support/Debug.h"
51#include <cassert>
52#include <utility>
53
54using namespace llvm;
55using namespace llvm::PatternMatch;
56
57#define DEBUG_TYPE "structurizecfg"
58
59// The name for newly created blocks.
60const char FlowBlockName[] = "Flow";
61
62namespace {
63
64static cl::opt<bool> ForceSkipUniformRegions(
65 "structurizecfg-skip-uniform-regions",
67 cl::desc("Force whether the StructurizeCFG pass skips uniform regions"),
68 cl::init(false));
69
70static cl::opt<bool>
71 RelaxedUniformRegions("structurizecfg-relaxed-uniform-regions", cl::Hidden,
72 cl::desc("Allow relaxed uniform region checks"),
73 cl::init(true));
74
75// Definition of the complex types used in this pass.
76
77using BBValuePair = std::pair<BasicBlock *, Value *>;
78
79using RNVector = SmallVector<RegionNode *, 8>;
80using BBVector = SmallVector<BasicBlock *, 8>;
81using BranchVector = SmallVector<BranchInst *, 8>;
82using BBValueVector = SmallVector<BBValuePair, 2>;
83
85
87using BB2BBVecMap = MapVector<BasicBlock *, BBVector>;
88
89using BBPhiMap = DenseMap<BasicBlock *, PhiMap>;
90
91using MaybeCondBranchWeights = std::optional<class CondBranchWeights>;
92
93class CondBranchWeights {
94 uint32_t TrueWeight;
95 uint32_t FalseWeight;
96
97 CondBranchWeights(uint32_t T, uint32_t F) : TrueWeight(T), FalseWeight(F) {}
98
99public:
100 static MaybeCondBranchWeights tryParse(const BranchInst &Br) {
101 assert(Br.isConditional());
102
103 uint64_t T, F;
104 if (!extractBranchWeights(Br, T, F))
105 return std::nullopt;
106
107 return CondBranchWeights(T, F);
108 }
109
110 static void setMetadata(BranchInst &Br,
111 const MaybeCondBranchWeights &Weights) {
112 assert(Br.isConditional());
113 if (!Weights)
114 return;
115 uint32_t Arr[] = {Weights->TrueWeight, Weights->FalseWeight};
116 setBranchWeights(Br, Arr, false);
117 }
118
119 CondBranchWeights invert() const {
120 return CondBranchWeights{FalseWeight, TrueWeight};
121 }
122};
123
124struct PredInfo {
125 Value *Pred;
126 MaybeCondBranchWeights Weights;
127};
128
132using Val2BBMap = DenseMap<Value *, BasicBlock *>;
133
134// A traits type that is intended to be used in graph algorithms. The graph
135// traits starts at an entry node, and traverses the RegionNodes that are in
136// the Nodes set.
137struct SubGraphTraits {
138 using NodeRef = std::pair<RegionNode *, SmallDenseSet<RegionNode *> *>;
139 using BaseSuccIterator = GraphTraits<RegionNode *>::ChildIteratorType;
140
141 // This wraps a set of Nodes into the iterator, so we know which edges to
142 // filter out.
143 class WrappedSuccIterator
144 : public iterator_adaptor_base<
145 WrappedSuccIterator, BaseSuccIterator,
146 typename std::iterator_traits<BaseSuccIterator>::iterator_category,
147 NodeRef, std::ptrdiff_t, NodeRef *, NodeRef> {
149
150 public:
151 WrappedSuccIterator(BaseSuccIterator It, SmallDenseSet<RegionNode *> *Nodes)
152 : iterator_adaptor_base(It), Nodes(Nodes) {}
153
154 NodeRef operator*() const { return {*I, Nodes}; }
155 };
156
157 static bool filterAll(const NodeRef &N) { return true; }
158 static bool filterSet(const NodeRef &N) { return N.second->count(N.first); }
159
160 using ChildIteratorType =
161 filter_iterator<WrappedSuccIterator, bool (*)(const NodeRef &)>;
162
163 static NodeRef getEntryNode(Region *R) {
164 return {GraphTraits<Region *>::getEntryNode(R), nullptr};
165 }
166
167 static NodeRef getEntryNode(NodeRef N) { return N; }
168
169 static iterator_range<ChildIteratorType> children(const NodeRef &N) {
170 auto *filter = N.second ? &filterSet : &filterAll;
171 return make_filter_range(
174 {GraphTraits<RegionNode *>::child_end(N.first), N.second}),
175 filter);
176 }
177
178 static ChildIteratorType child_begin(const NodeRef &N) {
179 return children(N).begin();
180 }
181
182 static ChildIteratorType child_end(const NodeRef &N) {
183 return children(N).end();
184 }
185};
186
187/// Finds the nearest common dominator of a set of BasicBlocks.
188///
189/// For every BB you add to the set, you can specify whether we "remember" the
190/// block. When you get the common dominator, you can also ask whether it's one
191/// of the blocks we remembered.
192class NearestCommonDominator {
193 DominatorTree *DT;
194 BasicBlock *Result = nullptr;
195 bool ResultIsRemembered = false;
196
197 /// Add BB to the resulting dominator.
198 void addBlock(BasicBlock *BB, bool Remember) {
199 if (!Result) {
200 Result = BB;
201 ResultIsRemembered = Remember;
202 return;
203 }
204
205 BasicBlock *NewResult = DT->findNearestCommonDominator(Result, BB);
206 if (NewResult != Result)
207 ResultIsRemembered = false;
208 if (NewResult == BB)
209 ResultIsRemembered |= Remember;
210 Result = NewResult;
211 }
212
213public:
214 explicit NearestCommonDominator(DominatorTree *DomTree) : DT(DomTree) {}
215
216 void addBlock(BasicBlock *BB) {
217 addBlock(BB, /* Remember = */ false);
218 }
219
220 void addAndRememberBlock(BasicBlock *BB) {
221 addBlock(BB, /* Remember = */ true);
222 }
223
224 /// Get the nearest common dominator of all the BBs added via addBlock() and
225 /// addAndRememberBlock().
226 BasicBlock *result() { return Result; }
227
228 /// Is the BB returned by getResult() one of the blocks we added to the set
229 /// with addAndRememberBlock()?
230 bool resultIsRememberedBlock() { return ResultIsRemembered; }
231};
232
233/// Transforms the control flow graph on one single entry/exit region
234/// at a time.
235///
236/// After the transform all "If"/"Then"/"Else" style control flow looks like
237/// this:
238///
239/// \verbatim
240/// 1
241/// ||
242/// | |
243/// 2 |
244/// | /
245/// |/
246/// 3
247/// || Where:
248/// | | 1 = "If" block, calculates the condition
249/// 4 | 2 = "Then" subregion, runs if the condition is true
250/// | / 3 = "Flow" blocks, newly inserted flow blocks, rejoins the flow
251/// |/ 4 = "Else" optional subregion, runs if the condition is false
252/// 5 5 = "End" block, also rejoins the control flow
253/// \endverbatim
254///
255/// Control flow is expressed as a branch where the true exit goes into the
256/// "Then"/"Else" region, while the false exit skips the region
257/// The condition for the optional "Else" region is expressed as a PHI node.
258/// The incoming values of the PHI node are true for the "If" edge and false
259/// for the "Then" edge.
260///
261/// Additionally to that even complicated loops look like this:
262///
263/// \verbatim
264/// 1
265/// ||
266/// | |
267/// 2 ^ Where:
268/// | / 1 = "Entry" block
269/// |/ 2 = "Loop" optional subregion, with all exits at "Flow" block
270/// 3 3 = "Flow" block, with back edge to entry block
271/// |
272/// \endverbatim
273///
274/// The back edge of the "Flow" block is always on the false side of the branch
275/// while the true side continues the general flow. So the loop condition
276/// consist of a network of PHI nodes where the true incoming values expresses
277/// breaks and the false values expresses continue states.
278
279class StructurizeCFG {
280 Type *Boolean;
281 ConstantInt *BoolTrue;
282 ConstantInt *BoolFalse;
283 Value *BoolPoison;
285 Function *Func;
286 Region *ParentRegion;
287
288 UniformityInfo *UA = nullptr;
289 DominatorTree *DT;
290
292 BBSet Visited;
293 BBSet FlowSet;
294
295 SmallVector<WeakVH, 8> AffectedPhis;
296 BBPhiMap DeletedPhis;
297 BB2BBVecMap AddedPhis;
298
299 PredMap Predicates;
300 BranchVector Conditions;
301
302 BB2BBMap Loops;
303 PredMap LoopPreds;
304 BranchVector LoopConds;
305
306 Val2BBMap HoistedValues;
307
308 RegionNode *PrevNode;
309
310 void hoistZeroCostElseBlockPhiValues(BasicBlock *ElseBB, BasicBlock *ThenBB);
311
312 bool isHoistableInstruction(Instruction *I, BasicBlock *BB,
313 BasicBlock *HoistTo);
314
315 void orderNodes();
316
317 void analyzeLoops(RegionNode *N);
318
319 PredInfo buildCondition(BranchInst *Term, unsigned Idx, bool Invert);
320
321 void gatherPredicates(RegionNode *N);
322
323 void collectInfos();
324
325 void insertConditions(bool Loops, SSAUpdaterBulk &PhiInserter);
326
327 void simplifyConditions();
328
329 void delPhiValues(BasicBlock *From, BasicBlock *To);
330
331 void addPhiValues(BasicBlock *From, BasicBlock *To);
332
333 void findUndefBlocks(BasicBlock *PHIBlock,
334 const SmallPtrSet<BasicBlock *, 8> &Incomings,
335 SmallVector<BasicBlock *> &UndefBlks) const;
336
337 void mergeIfCompatible(EquivalenceClasses<PHINode *> &PhiClasses, PHINode *A,
338 PHINode *B);
339
340 void setPhiValues();
341
342 void simplifyAffectedPhis();
343
344 void simplifyHoistedPhis();
345
346 DebugLoc killTerminator(BasicBlock *BB);
347
348 void changeExit(RegionNode *Node, BasicBlock *NewExit,
349 bool IncludeDominator);
350
351 BasicBlock *getNextFlow(BasicBlock *Dominator);
352
353 std::pair<BasicBlock *, DebugLoc> needPrefix(bool NeedEmpty);
354
355 BasicBlock *needPostfix(BasicBlock *Flow, bool ExitUseAllowed);
356
357 void setPrevNode(BasicBlock *BB);
358
359 bool dominatesPredicates(BasicBlock *BB, RegionNode *Node);
360
361 bool isPredictableTrue(RegionNode *Node);
362
363 void wireFlow(bool ExitUseAllowed, BasicBlock *LoopEnd);
364
365 void handleLoops(bool ExitUseAllowed, BasicBlock *LoopEnd);
366
367 void createFlow();
368
369 void rebuildSSA();
370
371public:
372 void init(Region *R);
373 bool run(Region *R, DominatorTree *DT, const TargetTransformInfo *TTI);
374 bool makeUniformRegion(Region *R, UniformityInfo &UA);
375};
376
377class StructurizeCFGLegacyPass : public RegionPass {
378 bool SkipUniformRegions;
379
380public:
381 static char ID;
382
383 explicit StructurizeCFGLegacyPass(bool SkipUniformRegions_ = false)
384 : RegionPass(ID), SkipUniformRegions(SkipUniformRegions_) {
385 if (ForceSkipUniformRegions.getNumOccurrences())
386 SkipUniformRegions = ForceSkipUniformRegions.getValue();
388 }
389
390 bool runOnRegion(Region *R, RGPassManager &RGM) override {
391 StructurizeCFG SCFG;
392 SCFG.init(R);
393 if (SkipUniformRegions) {
394 UniformityInfo &UA =
395 getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();
396 if (SCFG.makeUniformRegion(R, UA))
397 return false;
398 }
399 Function *F = R->getEntry()->getParent();
400 const TargetTransformInfo *TTI =
401 &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(*F);
402 DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
403 return SCFG.run(R, DT, TTI);
404 }
405
406 StringRef getPassName() const override { return "Structurize control flow"; }
407
408 void getAnalysisUsage(AnalysisUsage &AU) const override {
409 if (SkipUniformRegions)
414
417 }
418};
419
420} // end anonymous namespace
421
422char StructurizeCFGLegacyPass::ID = 0;
423
424INITIALIZE_PASS_BEGIN(StructurizeCFGLegacyPass, "structurizecfg",
425 "Structurize the CFG", false, false)
429INITIALIZE_PASS_END(StructurizeCFGLegacyPass, "structurizecfg",
430 "Structurize the CFG", false, false)
431
432/// Checks whether an instruction is zero cost instruction and checks if the
433/// operands are from different BB. If so, this instruction can be coalesced
434/// if its hoisted to predecessor block. So, this returns true.
435bool StructurizeCFG::isHoistableInstruction(Instruction *I, BasicBlock *BB,
436 BasicBlock *HoistTo) {
437 if (I->getParent() != BB || isa<PHINode>(I))
438 return false;
439
440 // If the instruction is not a zero cost instruction, return false.
441 auto Cost = TTI->getInstructionCost(I, TargetTransformInfo::TCK_Latency);
443 Cost.isValid()
444 ? Cost.getValue()
446 if (CostVal != 0)
447 return false;
448
449 // Check if all operands are available at the hoisting destination.
450 for (auto &Op : I->operands()) {
451 if (auto *OpI = dyn_cast<Instruction>(Op)) {
452 // Operand must dominate the hoisting destination.
453 if (!DT->dominates(OpI->getParent(), HoistTo))
454 return false;
455 }
456 }
457
458 return true;
459}
460
461/// Structurization can introduce unnecessary VGPR copies due to register
462/// coalescing interference. For example, if the Else block has a zero-cost
463/// instruction and the Then block modifies the VGPR value, only one value is
464/// live at a time in merge block before structurization. After structurization,
465/// the coalescer may incorrectly treat the Then value as live in the Else block
466/// (via the path Then → Flow → Else), leading to unnecessary VGPR copies.
467///
468/// This function examines phi nodes whose incoming values are zero-cost
469/// instructions in the Else block. It identifies such values that can be safely
470/// hoisted and moves them to the nearest common dominator of Then and Else
471/// blocks. A follow-up function after setting PhiNodes assigns the hoisted
472/// value to poison phi nodes along the if→flow edge, aiding register coalescing
473/// and minimizing unnecessary live ranges.
474void StructurizeCFG::hoistZeroCostElseBlockPhiValues(BasicBlock *ElseBB,
475 BasicBlock *ThenBB) {
476
477 BasicBlock *ElseSucc = ElseBB->getSingleSuccessor();
478 BasicBlock *CommonDominator = DT->findNearestCommonDominator(ElseBB, ThenBB);
479
480 if (!ElseSucc || !CommonDominator)
481 return;
482 Instruction *Term = CommonDominator->getTerminator();
483 for (PHINode &Phi : ElseSucc->phis()) {
484 Value *ElseVal = Phi.getIncomingValueForBlock(ElseBB);
485 auto *Inst = dyn_cast<Instruction>(ElseVal);
486 if (!Inst || !isHoistableInstruction(Inst, ElseBB, CommonDominator))
487 continue;
488 Inst->removeFromParent();
489 Inst->insertInto(CommonDominator, Term->getIterator());
490 HoistedValues[Inst] = CommonDominator;
491 }
492}
493
494/// Build up the general order of nodes, by performing a topological sort of the
495/// parent region's nodes, while ensuring that there is no outer cycle node
496/// between any two inner cycle nodes.
497void StructurizeCFG::orderNodes() {
498 Order.resize(std::distance(GraphTraits<Region *>::nodes_begin(ParentRegion),
499 GraphTraits<Region *>::nodes_end(ParentRegion)));
500 if (Order.empty())
501 return;
502
503 SmallDenseSet<RegionNode *> Nodes;
504 auto EntryNode = SubGraphTraits::getEntryNode(ParentRegion);
505
506 // A list of range indices of SCCs in Order, to be processed.
508 unsigned I = 0, E = Order.size();
509 while (true) {
510 // Run through all the SCCs in the subgraph starting with Entry.
511 for (auto SCCI =
513 EntryNode);
514 !SCCI.isAtEnd(); ++SCCI) {
515 auto &SCC = *SCCI;
516
517 // An SCC up to the size of 2, can be reduced to an entry (the last node),
518 // and a possible additional node. Therefore, it is already in order, and
519 // there is no need to add it to the work-list.
520 unsigned Size = SCC.size();
521 if (Size > 2)
522 WorkList.emplace_back(I, I + Size);
523
524 // Add the SCC nodes to the Order array.
525 for (const auto &N : SCC) {
526 assert(I < E && "SCC size mismatch!");
527 Order[I++] = N.first;
528 }
529 }
530 assert(I == E && "SCC size mismatch!");
531
532 // If there are no more SCCs to order, then we are done.
533 if (WorkList.empty())
534 break;
535
536 std::tie(I, E) = WorkList.pop_back_val();
537
538 // Collect the set of nodes in the SCC's subgraph. These are only the
539 // possible child nodes; we do not add the entry (last node) otherwise we
540 // will have the same exact SCC all over again.
541 Nodes.clear();
542 Nodes.insert(Order.begin() + I, Order.begin() + E - 1);
543
544 // Update the entry node.
545 EntryNode.first = Order[E - 1];
546 EntryNode.second = &Nodes;
547 }
548}
549
550/// Determine the end of the loops
551void StructurizeCFG::analyzeLoops(RegionNode *N) {
552 if (N->isSubRegion()) {
553 // Test for exit as back edge
554 BasicBlock *Exit = N->getNodeAs<Region>()->getExit();
555 if (Visited.count(Exit))
556 Loops[Exit] = N->getEntry();
557
558 } else {
559 // Test for successors as back edge
560 BasicBlock *BB = N->getNodeAs<BasicBlock>();
561 BranchInst *Term = cast<BranchInst>(BB->getTerminator());
562
563 for (BasicBlock *Succ : Term->successors())
564 if (Visited.count(Succ))
565 Loops[Succ] = BB;
566 }
567}
568
569/// Build the condition for one edge
570PredInfo StructurizeCFG::buildCondition(BranchInst *Term, unsigned Idx,
571 bool Invert) {
572 Value *Cond = Invert ? BoolFalse : BoolTrue;
573 MaybeCondBranchWeights Weights;
574
575 if (Term->isConditional()) {
576 Cond = Term->getCondition();
577 Weights = CondBranchWeights::tryParse(*Term);
578
579 if (Idx != (unsigned)Invert) {
581 if (Weights)
582 Weights = Weights->invert();
583 }
584 }
585 return {Cond, Weights};
586}
587
588/// Analyze the predecessors of each block and build up predicates
589void StructurizeCFG::gatherPredicates(RegionNode *N) {
590 RegionInfo *RI = ParentRegion->getRegionInfo();
591 BasicBlock *BB = N->getEntry();
592 BBPredicates &Pred = Predicates[BB];
593 BBPredicates &LPred = LoopPreds[BB];
594
595 for (BasicBlock *P : predecessors(BB)) {
596 // Ignore it if it's a branch from outside into our region entry
597 if (!ParentRegion->contains(P))
598 continue;
599
600 Region *R = RI->getRegionFor(P);
601 if (R == ParentRegion) {
602 // It's a top level block in our region
603 BranchInst *Term = cast<BranchInst>(P->getTerminator());
604 for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
605 BasicBlock *Succ = Term->getSuccessor(i);
606 if (Succ != BB)
607 continue;
608
609 if (Visited.count(P)) {
610 // Normal forward edge
611 if (Term->isConditional()) {
612 // Try to treat it like an ELSE block
613 BasicBlock *Other = Term->getSuccessor(!i);
614 if (Visited.count(Other) && !Loops.count(Other) &&
615 !Pred.count(Other) && !Pred.count(P)) {
616 hoistZeroCostElseBlockPhiValues(Succ, Other);
617 Pred[Other] = {BoolFalse, std::nullopt};
618 Pred[P] = {BoolTrue, std::nullopt};
619 continue;
620 }
621 }
622 Pred[P] = buildCondition(Term, i, false);
623 } else {
624 // Back edge
625 LPred[P] = buildCondition(Term, i, true);
626 }
627 }
628 } else {
629 // It's an exit from a sub region
630 while (R->getParent() != ParentRegion)
631 R = R->getParent();
632
633 // Edge from inside a subregion to its entry, ignore it
634 if (*R == *N)
635 continue;
636
637 BasicBlock *Entry = R->getEntry();
638 if (Visited.count(Entry))
639 Pred[Entry] = {BoolTrue, std::nullopt};
640 else
641 LPred[Entry] = {BoolFalse, std::nullopt};
642 }
643 }
644}
645
646/// Collect various loop and predicate infos
647void StructurizeCFG::collectInfos() {
648 // Reset predicate
649 Predicates.clear();
650
651 // and loop infos
652 Loops.clear();
653 LoopPreds.clear();
654
655 // Reset the visited nodes
656 Visited.clear();
657
658 for (RegionNode *RN : reverse(Order)) {
659 LLVM_DEBUG(dbgs() << "Visiting: "
660 << (RN->isSubRegion() ? "SubRegion with entry: " : "")
661 << RN->getEntry()->getName() << "\n");
662
663 // Analyze all the conditions leading to a node
664 gatherPredicates(RN);
665
666 // Remember that we've seen this node
667 Visited.insert(RN->getEntry());
668
669 // Find the last back edges
670 analyzeLoops(RN);
671 }
672}
673
674/// Insert the missing branch conditions
675void StructurizeCFG::insertConditions(bool Loops, SSAUpdaterBulk &PhiInserter) {
676 BranchVector &Conds = Loops ? LoopConds : Conditions;
677 Value *Default = Loops ? BoolTrue : BoolFalse;
678
679 for (BranchInst *Term : Conds) {
680 assert(Term->isConditional());
681
682 BasicBlock *Parent = Term->getParent();
683 BasicBlock *SuccTrue = Term->getSuccessor(0);
684 BasicBlock *SuccFalse = Term->getSuccessor(1);
685
686 unsigned Variable = PhiInserter.AddVariable("", Boolean);
687 PhiInserter.AddAvailableValue(Variable, Loops ? SuccFalse : Parent,
688 Default);
689
690 BBPredicates &Preds = Loops ? LoopPreds[SuccFalse] : Predicates[SuccTrue];
691
692 NearestCommonDominator Dominator(DT);
693 Dominator.addBlock(Parent);
694
695 PredInfo ParentInfo{nullptr, std::nullopt};
696 for (auto [BB, PI] : Preds) {
697 if (BB == Parent) {
698 ParentInfo = PI;
699 break;
700 }
701 PhiInserter.AddAvailableValue(Variable, BB, PI.Pred);
702 Dominator.addAndRememberBlock(BB);
703 }
704
705 if (ParentInfo.Pred) {
706 Term->setCondition(ParentInfo.Pred);
707 CondBranchWeights::setMetadata(*Term, ParentInfo.Weights);
708 } else {
709 if (!Dominator.resultIsRememberedBlock())
710 PhiInserter.AddAvailableValue(Variable, Dominator.result(), Default);
711
712 PhiInserter.AddUse(Variable, &Term->getOperandUse(0));
713 }
714 }
715}
716
717/// Simplify any inverted conditions that were built by buildConditions.
718void StructurizeCFG::simplifyConditions() {
719 SmallVector<Instruction *> InstToErase;
720 for (auto &I : concat<PredMap::value_type>(Predicates, LoopPreds)) {
721 auto &Preds = I.second;
722 for (auto [BB, PI] : Preds) {
723 Instruction *Inverted;
724 if (match(PI.Pred, m_Not(m_OneUse(m_Instruction(Inverted)))) &&
725 !PI.Pred->use_empty()) {
726 if (auto *InvertedCmp = dyn_cast<CmpInst>(Inverted)) {
727 InvertedCmp->setPredicate(InvertedCmp->getInversePredicate());
728 PI.Pred->replaceAllUsesWith(InvertedCmp);
729 InstToErase.push_back(cast<Instruction>(PI.Pred));
730 }
731 }
732 }
733 }
734 for (auto *I : InstToErase)
735 I->eraseFromParent();
736}
737
738/// Remove all PHI values coming from "From" into "To" and remember
739/// them in DeletedPhis
740void StructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) {
741 PhiMap &Map = DeletedPhis[To];
742 for (PHINode &Phi : To->phis()) {
743 bool Recorded = false;
744 while (Phi.getBasicBlockIndex(From) != -1) {
745 Value *Deleted = Phi.removeIncomingValue(From, false);
746 Map[&Phi].push_back(std::make_pair(From, Deleted));
747 if (!Recorded) {
748 AffectedPhis.push_back(&Phi);
749 Recorded = true;
750 }
751 }
752 }
753}
754
755/// Add a dummy PHI value as soon as we knew the new predecessor
756void StructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) {
757 for (PHINode &Phi : To->phis()) {
758 Value *Poison = PoisonValue::get(Phi.getType());
759 Phi.addIncoming(Poison, From);
760 }
761 AddedPhis[To].push_back(From);
762}
763
764/// When we are reconstructing a PHI inside \p PHIBlock with incoming values
765/// from predecessors \p Incomings, we have a chance to mark the available value
766/// from some blocks as undefined. The function will find out all such blocks
767/// and return in \p UndefBlks.
768void StructurizeCFG::findUndefBlocks(
769 BasicBlock *PHIBlock, const SmallPtrSet<BasicBlock *, 8> &Incomings,
770 SmallVector<BasicBlock *> &UndefBlks) const {
771 // We may get a post-structured CFG like below:
772 //
773 // | P1
774 // |/
775 // F1
776 // |\
777 // | N
778 // |/
779 // F2
780 // |\
781 // | P2
782 // |/
783 // F3
784 // |\
785 // B
786 //
787 // B is the block that has a PHI being reconstructed. P1/P2 are predecessors
788 // of B before structurization. F1/F2/F3 are flow blocks inserted during
789 // structurization process. Block N is not a predecessor of B before
790 // structurization, but are placed between the predecessors(P1/P2) of B after
791 // structurization. This usually means that threads went to N never take the
792 // path N->F2->F3->B. For example, the threads take the branch F1->N may
793 // always take the branch F2->P2. So, when we are reconstructing a PHI
794 // originally in B, we can safely say the incoming value from N is undefined.
795 SmallPtrSet<BasicBlock *, 8> VisitedBlock;
796 SmallVector<BasicBlock *, 8> Stack;
797 if (PHIBlock == ParentRegion->getExit()) {
798 for (auto P : predecessors(PHIBlock)) {
799 if (ParentRegion->contains(P))
800 Stack.push_back(P);
801 }
802 } else {
803 append_range(Stack, predecessors(PHIBlock));
804 }
805
806 // Do a backward traversal over the CFG, and stop further searching if
807 // the block is not a Flow. If a block is neither flow block nor the
808 // incoming predecessor, then the incoming value from the block is
809 // undefined value for the PHI being reconstructed.
810 while (!Stack.empty()) {
811 BasicBlock *Current = Stack.pop_back_val();
812 if (!VisitedBlock.insert(Current).second)
813 continue;
814
815 if (FlowSet.contains(Current))
816 llvm::append_range(Stack, predecessors(Current));
817 else if (!Incomings.contains(Current))
818 UndefBlks.push_back(Current);
819 }
820}
821
822// If two phi nodes have compatible incoming values (for each
823// incoming block, either they have the same incoming value or only one phi
824// node has an incoming value), let them share the merged incoming values. The
825// merge process is guided by the equivalence information from \p PhiClasses.
826// The function will possibly update the incoming values of leader phi in
827// DeletedPhis.
828void StructurizeCFG::mergeIfCompatible(
829 EquivalenceClasses<PHINode *> &PhiClasses, PHINode *A, PHINode *B) {
830 auto ItA = PhiClasses.findLeader(PhiClasses.insert(A));
831 auto ItB = PhiClasses.findLeader(PhiClasses.insert(B));
832 // They are already in the same class, no work needed.
833 if (ItA == ItB)
834 return;
835
836 PHINode *LeaderA = *ItA;
837 PHINode *LeaderB = *ItB;
838 BBValueVector &IncomingA = DeletedPhis[LeaderA->getParent()][LeaderA];
839 BBValueVector &IncomingB = DeletedPhis[LeaderB->getParent()][LeaderB];
840
841 DenseMap<BasicBlock *, Value *> Mergeable(IncomingA.begin(), IncomingA.end());
842 for (auto [BB, V] : IncomingB) {
843 auto BBIt = Mergeable.find(BB);
844 if (BBIt != Mergeable.end() && BBIt->second != V)
845 return;
846 // Either IncomingA does not have this value or IncomingA has the same
847 // value.
848 Mergeable.insert({BB, V});
849 }
850
851 // Update the incoming value of leaderA.
852 IncomingA.assign(Mergeable.begin(), Mergeable.end());
853 PhiClasses.unionSets(ItA, ItB);
854}
855
856/// Add the real PHI value as soon as everything is set up
857void StructurizeCFG::setPhiValues() {
858 SmallVector<PHINode *, 8> InsertedPhis;
859 SSAUpdater Updater(&InsertedPhis);
860 DenseMap<BasicBlock *, SmallVector<BasicBlock *>> UndefBlksMap;
861
862 // Find phi nodes that have compatible incoming values (either they have
863 // the same value for the same block or only one phi node has an incoming
864 // value, see example below). We only search again the phi's that are
865 // referenced by another phi, which is the case we care about.
866 //
867 // For example (-- means no incoming value):
868 // phi1 : BB1:phi2 BB2:v BB3:--
869 // phi2: BB1:-- BB2:v BB3:w
870 //
871 // Then we can merge these incoming values and let phi1, phi2 use the
872 // same set of incoming values:
873 //
874 // phi1&phi2: BB1:phi2 BB2:v BB3:w
875 //
876 // By doing this, phi1 and phi2 would share more intermediate phi nodes.
877 // This would help reduce the number of phi nodes during SSA reconstruction
878 // and ultimately result in fewer COPY instructions.
879 //
880 // This should be correct, because if a phi node does not have incoming
881 // value from certain block, this means the block is not the predecessor
882 // of the parent block, so we actually don't care about its incoming value.
883 EquivalenceClasses<PHINode *> PhiClasses;
884 for (const auto &[To, From] : AddedPhis) {
885 auto OldPhiIt = DeletedPhis.find(To);
886 if (OldPhiIt == DeletedPhis.end())
887 continue;
888
889 PhiMap &BlkPhis = OldPhiIt->second;
890 SmallVector<BasicBlock *> &UndefBlks = UndefBlksMap[To];
891 SmallPtrSet<BasicBlock *, 8> Incomings;
892
893 // Get the undefined blocks shared by all the phi nodes.
894 if (!BlkPhis.empty()) {
895 Incomings.insert_range(llvm::make_first_range(BlkPhis.front().second));
896 findUndefBlocks(To, Incomings, UndefBlks);
897 }
898
899 for (const auto &[Phi, Incomings] : OldPhiIt->second) {
900 SmallVector<PHINode *> IncomingPHIs;
901 for (const auto &[BB, V] : Incomings) {
902 // First, for each phi, check whether it has incoming value which is
903 // another phi.
904 if (PHINode *P = dyn_cast<PHINode>(V))
905 IncomingPHIs.push_back(P);
906 }
907
908 for (auto *OtherPhi : IncomingPHIs) {
909 // Skip phis that are unrelated to the phi reconstruction for now.
910 if (!DeletedPhis.contains(OtherPhi->getParent()))
911 continue;
912 mergeIfCompatible(PhiClasses, Phi, OtherPhi);
913 }
914 }
915 }
916
917 for (const auto &AddedPhi : AddedPhis) {
918 BasicBlock *To = AddedPhi.first;
919 const BBVector &From = AddedPhi.second;
920
921 auto It = DeletedPhis.find(To);
922 if (It == DeletedPhis.end())
923 continue;
924
925 PhiMap &Map = It->second;
926 SmallVector<BasicBlock *> &UndefBlks = UndefBlksMap[To];
927 for (const auto &[Phi, Incoming] : Map) {
928 Value *Poison = PoisonValue::get(Phi->getType());
929 Updater.Initialize(Phi->getType(), "");
930 Updater.AddAvailableValue(&Func->getEntryBlock(), Poison);
931 Updater.AddAvailableValue(To, Poison);
932
933 // Use leader phi's incoming if there is.
934 auto LeaderIt = PhiClasses.findLeader(Phi);
935 bool UseIncomingOfLeader =
936 LeaderIt != PhiClasses.member_end() && *LeaderIt != Phi;
937 const auto &IncomingMap =
938 UseIncomingOfLeader ? DeletedPhis[(*LeaderIt)->getParent()][*LeaderIt]
939 : Incoming;
940
941 SmallVector<BasicBlock *> ConstantPreds;
942 for (const auto &[BB, V] : IncomingMap) {
943 Updater.AddAvailableValue(BB, V);
944 if (isa<Constant>(V))
945 ConstantPreds.push_back(BB);
946 }
947
948 for (auto UB : UndefBlks) {
949 // If this undef block is dominated by any predecessor(before
950 // structurization) of reconstructed PHI with constant incoming value,
951 // don't mark the available value as undefined. Setting undef to such
952 // block will stop us from getting optimal phi insertion.
953 if (any_of(ConstantPreds,
954 [&](BasicBlock *CP) { return DT->dominates(CP, UB); }))
955 continue;
956 // Maybe already get a value through sharing with other phi nodes.
957 if (Updater.HasValueForBlock(UB))
958 continue;
959
960 Updater.AddAvailableValue(UB, Poison);
961 }
962
963 for (BasicBlock *FI : From)
964 Phi->setIncomingValueForBlock(FI, Updater.GetValueAtEndOfBlock(FI));
965 AffectedPhis.push_back(Phi);
966 }
967 }
968
969 AffectedPhis.append(InsertedPhis.begin(), InsertedPhis.end());
970}
971
972/// Updates PHI nodes after hoisted zero cost instructions by replacing poison
973/// entries on Flow nodes with the appropriate hoisted values
974void StructurizeCFG::simplifyHoistedPhis() {
975 for (WeakVH VH : AffectedPhis) {
976 PHINode *Phi = dyn_cast_or_null<PHINode>(VH);
977 if (!Phi || Phi->getNumIncomingValues() != 2)
978 continue;
979
980 for (int i = 0; i < 2; i++) {
981 Value *V = Phi->getIncomingValue(i);
982 auto BBIt = HoistedValues.find(V);
983
984 if (BBIt == HoistedValues.end())
985 continue;
986
987 Value *OtherV = Phi->getIncomingValue(!i);
988 PHINode *OtherPhi = dyn_cast<PHINode>(OtherV);
989 if (!OtherPhi)
990 continue;
991
992 int PoisonValBBIdx = -1;
993 for (size_t i = 0; i < OtherPhi->getNumIncomingValues(); i++) {
994 if (!isa<PoisonValue>(OtherPhi->getIncomingValue(i)))
995 continue;
996 PoisonValBBIdx = i;
997 break;
998 }
999 if (PoisonValBBIdx == -1 ||
1000 !DT->dominates(BBIt->second,
1001 OtherPhi->getIncomingBlock(PoisonValBBIdx)))
1002 continue;
1003
1004 OtherPhi->setIncomingValue(PoisonValBBIdx, V);
1005 if (DT->dominates(OtherV, Phi))
1006 Phi->setIncomingValue(i, OtherV);
1007 }
1008 }
1009}
1010
1011void StructurizeCFG::simplifyAffectedPhis() {
1012 bool Changed;
1013 do {
1014 Changed = false;
1015 SimplifyQuery Q(Func->getDataLayout());
1016 Q.DT = DT;
1017 // Setting CanUseUndef to true might extend value liveness, set it to false
1018 // to achieve better register pressure.
1019 Q.CanUseUndef = false;
1020 for (WeakVH VH : AffectedPhis) {
1021 if (auto Phi = dyn_cast_or_null<PHINode>(VH)) {
1022 if (auto NewValue = simplifyInstruction(Phi, Q)) {
1023 Phi->replaceAllUsesWith(NewValue);
1024 Phi->eraseFromParent();
1025 Changed = true;
1026 }
1027 }
1028 }
1029 } while (Changed);
1030}
1031
1032/// Remove phi values from all successors and then remove the terminator.
1033DebugLoc StructurizeCFG::killTerminator(BasicBlock *BB) {
1035 if (!Term)
1036 return DebugLoc();
1037
1038 for (BasicBlock *Succ : successors(BB))
1039 delPhiValues(BB, Succ);
1040
1041 DebugLoc DL = Term->getDebugLoc();
1042 Term->eraseFromParent();
1043 return DL;
1044}
1045
1046/// Let node exit(s) point to NewExit
1047void StructurizeCFG::changeExit(RegionNode *Node, BasicBlock *NewExit,
1048 bool IncludeDominator) {
1049 if (Node->isSubRegion()) {
1050 Region *SubRegion = Node->getNodeAs<Region>();
1051 BasicBlock *OldExit = SubRegion->getExit();
1052 BasicBlock *Dominator = nullptr;
1053
1054 // Find all the edges from the sub region to the exit.
1055 // We use make_early_inc_range here because we modify BB's terminator.
1056 for (BasicBlock *BB : llvm::make_early_inc_range(predecessors(OldExit))) {
1057 if (!SubRegion->contains(BB))
1058 continue;
1059
1060 // Modify the edges to point to the new exit
1061 delPhiValues(BB, OldExit);
1062 BB->getTerminator()->replaceUsesOfWith(OldExit, NewExit);
1063 addPhiValues(BB, NewExit);
1064
1065 // Find the new dominator (if requested)
1066 if (IncludeDominator) {
1067 if (!Dominator)
1068 Dominator = BB;
1069 else
1070 Dominator = DT->findNearestCommonDominator(Dominator, BB);
1071 }
1072 }
1073
1074 // Change the dominator (if requested)
1075 if (Dominator)
1076 DT->changeImmediateDominator(NewExit, Dominator);
1077
1078 // Update the region info
1079 SubRegion->replaceExit(NewExit);
1080 } else {
1081 BasicBlock *BB = Node->getNodeAs<BasicBlock>();
1082 DebugLoc DL = killTerminator(BB);
1083 BranchInst *Br = BranchInst::Create(NewExit, BB);
1084 Br->setDebugLoc(DL);
1085 addPhiValues(BB, NewExit);
1086 if (IncludeDominator)
1087 DT->changeImmediateDominator(NewExit, BB);
1088 }
1089}
1090
1091/// Create a new flow node and update dominator tree and region info
1092BasicBlock *StructurizeCFG::getNextFlow(BasicBlock *Dominator) {
1093 LLVMContext &Context = Func->getContext();
1094 BasicBlock *Insert = Order.empty() ? ParentRegion->getExit() :
1095 Order.back()->getEntry();
1097 Func, Insert);
1098 FlowSet.insert(Flow);
1099 DT->addNewBlock(Flow, Dominator);
1100 ParentRegion->getRegionInfo()->setRegionFor(Flow, ParentRegion);
1101 return Flow;
1102}
1103
1104/// Create a new or reuse the previous node as flow node. Returns a block and a
1105/// debug location to be used for new instructions in that block.
1106std::pair<BasicBlock *, DebugLoc> StructurizeCFG::needPrefix(bool NeedEmpty) {
1107 BasicBlock *Entry = PrevNode->getEntry();
1108
1109 if (!PrevNode->isSubRegion()) {
1110 DebugLoc DL = killTerminator(Entry);
1111 if (!NeedEmpty || Entry->getFirstInsertionPt() == Entry->end())
1112 return {Entry, DL};
1113 }
1114
1115 // create a new flow node
1116 BasicBlock *Flow = getNextFlow(Entry);
1117
1118 // and wire it up
1119 changeExit(PrevNode, Flow, true);
1120 PrevNode = ParentRegion->getBBNode(Flow);
1121 return {Flow, DebugLoc()};
1122}
1123
1124/// Returns the region exit if possible, otherwise just a new flow node
1125BasicBlock *StructurizeCFG::needPostfix(BasicBlock *Flow,
1126 bool ExitUseAllowed) {
1127 if (!Order.empty() || !ExitUseAllowed)
1128 return getNextFlow(Flow);
1129
1130 BasicBlock *Exit = ParentRegion->getExit();
1131 DT->changeImmediateDominator(Exit, Flow);
1132 addPhiValues(Flow, Exit);
1133 return Exit;
1134}
1135
1136/// Set the previous node
1137void StructurizeCFG::setPrevNode(BasicBlock *BB) {
1138 PrevNode = ParentRegion->contains(BB) ? ParentRegion->getBBNode(BB)
1139 : nullptr;
1140}
1141
1142/// Does BB dominate all the predicates of Node?
1143bool StructurizeCFG::dominatesPredicates(BasicBlock *BB, RegionNode *Node) {
1144 BBPredicates &Preds = Predicates[Node->getEntry()];
1145 return llvm::all_of(Preds, [&](std::pair<BasicBlock *, PredInfo> Pred) {
1146 return DT->dominates(BB, Pred.first);
1147 });
1148}
1149
1150/// Can we predict that this node will always be called?
1151bool StructurizeCFG::isPredictableTrue(RegionNode *Node) {
1152 BBPredicates &Preds = Predicates[Node->getEntry()];
1153 bool Dominated = false;
1154
1155 // Regionentry is always true
1156 if (!PrevNode)
1157 return true;
1158
1159 for (auto [BB, PI] : Preds) {
1160 if (PI.Pred != BoolTrue)
1161 return false;
1162
1163 if (!Dominated && DT->dominates(BB, PrevNode->getEntry()))
1164 Dominated = true;
1165 }
1166
1167 // TODO: The dominator check is too strict
1168 return Dominated;
1169}
1170
1171/// Take one node from the order vector and wire it up
1172void StructurizeCFG::wireFlow(bool ExitUseAllowed,
1173 BasicBlock *LoopEnd) {
1174 RegionNode *Node = Order.pop_back_val();
1175 Visited.insert(Node->getEntry());
1176
1177 if (isPredictableTrue(Node)) {
1178 // Just a linear flow
1179 if (PrevNode) {
1180 changeExit(PrevNode, Node->getEntry(), true);
1181 }
1182 PrevNode = Node;
1183 } else {
1184 // Insert extra prefix node (or reuse last one)
1185 auto [Flow, DL] = needPrefix(false);
1186
1187 // Insert extra postfix node (or use exit instead)
1188 BasicBlock *Entry = Node->getEntry();
1189 BasicBlock *Next = needPostfix(Flow, ExitUseAllowed);
1190
1191 // let it point to entry and next block
1192 BranchInst *Br = BranchInst::Create(Entry, Next, BoolPoison, Flow);
1193 Br->setDebugLoc(DL);
1194 Conditions.push_back(Br);
1195 addPhiValues(Flow, Entry);
1196 DT->changeImmediateDominator(Entry, Flow);
1197
1198 PrevNode = Node;
1199 while (!Order.empty() && !Visited.count(LoopEnd) &&
1200 dominatesPredicates(Entry, Order.back())) {
1201 handleLoops(false, LoopEnd);
1202 }
1203
1204 changeExit(PrevNode, Next, false);
1205 setPrevNode(Next);
1206 }
1207}
1208
1209void StructurizeCFG::handleLoops(bool ExitUseAllowed,
1210 BasicBlock *LoopEnd) {
1211 RegionNode *Node = Order.back();
1212 BasicBlock *LoopStart = Node->getEntry();
1213
1214 if (!Loops.count(LoopStart)) {
1215 wireFlow(ExitUseAllowed, LoopEnd);
1216 return;
1217 }
1218
1219 if (!isPredictableTrue(Node))
1220 LoopStart = needPrefix(true).first;
1221
1222 LoopEnd = Loops[Node->getEntry()];
1223 wireFlow(false, LoopEnd);
1224 while (!Visited.count(LoopEnd)) {
1225 handleLoops(false, LoopEnd);
1226 }
1227
1228 assert(LoopStart != &LoopStart->getParent()->getEntryBlock());
1229
1230 // Create an extra loop end node
1231 DebugLoc DL;
1232 std::tie(LoopEnd, DL) = needPrefix(false);
1233 BasicBlock *Next = needPostfix(LoopEnd, ExitUseAllowed);
1234 BranchInst *Br = BranchInst::Create(Next, LoopStart, BoolPoison, LoopEnd);
1235 Br->setDebugLoc(DL);
1236 LoopConds.push_back(Br);
1237 addPhiValues(LoopEnd, LoopStart);
1238 setPrevNode(Next);
1239}
1240
1241/// After this function control flow looks like it should be, but
1242/// branches and PHI nodes only have undefined conditions.
1243void StructurizeCFG::createFlow() {
1244 BasicBlock *Exit = ParentRegion->getExit();
1245 bool EntryDominatesExit = DT->dominates(ParentRegion->getEntry(), Exit);
1246
1247 AffectedPhis.clear();
1248 DeletedPhis.clear();
1249 AddedPhis.clear();
1250 Conditions.clear();
1251 LoopConds.clear();
1252
1253 PrevNode = nullptr;
1254 Visited.clear();
1255
1256 while (!Order.empty()) {
1257 handleLoops(EntryDominatesExit, nullptr);
1258 }
1259
1260 if (PrevNode)
1261 changeExit(PrevNode, Exit, EntryDominatesExit);
1262 else
1263 assert(EntryDominatesExit);
1264}
1265
1266/// Handle a rare case where the disintegrated nodes instructions
1267/// no longer dominate all their uses. Not sure if this is really necessary
1268void StructurizeCFG::rebuildSSA() {
1269 SSAUpdater Updater;
1270 for (BasicBlock *BB : ParentRegion->blocks())
1271 for (Instruction &I : *BB) {
1272 bool Initialized = false;
1273 // We may modify the use list as we iterate over it, so we use
1274 // make_early_inc_range.
1275 for (Use &U : llvm::make_early_inc_range(I.uses())) {
1276 Instruction *User = cast<Instruction>(U.getUser());
1277 if (User->getParent() == BB) {
1278 continue;
1279 } else if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
1280 if (UserPN->getIncomingBlock(U) == BB)
1281 continue;
1282 }
1283
1284 if (DT->dominates(&I, User))
1285 continue;
1286
1287 if (!Initialized) {
1288 Value *Poison = PoisonValue::get(I.getType());
1289 Updater.Initialize(I.getType(), "");
1290 Updater.AddAvailableValue(&Func->getEntryBlock(), Poison);
1291 Updater.AddAvailableValue(BB, &I);
1292 Initialized = true;
1293 }
1294 Updater.RewriteUseAfterInsertions(U);
1295 }
1296 }
1297}
1298
1299static bool hasOnlyUniformBranches(Region *R, unsigned UniformMDKindID,
1300 const UniformityInfo &UA) {
1301 // Bool for if all sub-regions are uniform.
1302 bool SubRegionsAreUniform = true;
1303 // Count of how many direct children are conditional.
1304 unsigned ConditionalDirectChildren = 0;
1305
1306 for (auto *E : R->elements()) {
1307 if (!E->isSubRegion()) {
1308 auto Br = dyn_cast<BranchInst>(E->getEntry()->getTerminator());
1309 if (!Br || !Br->isConditional())
1310 continue;
1311
1312 if (!UA.isUniform(Br))
1313 return false;
1314
1315 // One of our direct children is conditional.
1316 ConditionalDirectChildren++;
1317
1318 LLVM_DEBUG(dbgs() << "BB: " << Br->getParent()->getName()
1319 << " has uniform terminator\n");
1320 } else {
1321 // Explicitly refuse to treat regions as uniform if they have non-uniform
1322 // subregions. We cannot rely on UniformityAnalysis for branches in
1323 // subregions because those branches may have been removed and re-created,
1324 // so we look for our metadata instead.
1325 //
1326 // Warning: It would be nice to treat regions as uniform based only on
1327 // their direct child basic blocks' terminators, regardless of whether
1328 // subregions are uniform or not. However, this requires a very careful
1329 // look at SIAnnotateControlFlow to make sure nothing breaks there.
1330 for (auto *BB : E->getNodeAs<Region>()->blocks()) {
1331 auto Br = dyn_cast<BranchInst>(BB->getTerminator());
1332 if (!Br || !Br->isConditional())
1333 continue;
1334
1335 if (!Br->getMetadata(UniformMDKindID)) {
1336 // Early exit if we cannot have relaxed uniform regions.
1337 if (!RelaxedUniformRegions)
1338 return false;
1339
1340 SubRegionsAreUniform = false;
1341 break;
1342 }
1343 }
1344 }
1345 }
1346
1347 // Our region is uniform if:
1348 // 1. All conditional branches that are direct children are uniform (checked
1349 // above).
1350 // 2. And either:
1351 // a. All sub-regions are uniform.
1352 // b. There is one or less conditional branches among the direct children.
1353 return SubRegionsAreUniform || (ConditionalDirectChildren <= 1);
1354}
1355
1356void StructurizeCFG::init(Region *R) {
1357 LLVMContext &Context = R->getEntry()->getContext();
1358
1359 Boolean = Type::getInt1Ty(Context);
1360 BoolTrue = ConstantInt::getTrue(Context);
1361 BoolFalse = ConstantInt::getFalse(Context);
1362 BoolPoison = PoisonValue::get(Boolean);
1363
1364 this->UA = nullptr;
1365}
1366
1367bool StructurizeCFG::makeUniformRegion(Region *R, UniformityInfo &UA) {
1368 if (R->isTopLevelRegion())
1369 return false;
1370
1371 this->UA = &UA;
1372
1373 // TODO: We could probably be smarter here with how we handle sub-regions.
1374 // We currently rely on the fact that metadata is set by earlier invocations
1375 // of the pass on sub-regions, and that this metadata doesn't get lost --
1376 // but we shouldn't rely on metadata for correctness!
1377 unsigned UniformMDKindID =
1378 R->getEntry()->getContext().getMDKindID("structurizecfg.uniform");
1379
1380 if (hasOnlyUniformBranches(R, UniformMDKindID, UA)) {
1381 LLVM_DEBUG(dbgs() << "Skipping region with uniform control flow: " << *R
1382 << '\n');
1383
1384 // Mark all direct child block terminators as having been treated as
1385 // uniform. To account for a possible future in which non-uniform
1386 // sub-regions are treated more cleverly, indirect children are not
1387 // marked as uniform.
1388 MDNode *MD = MDNode::get(R->getEntry()->getParent()->getContext(), {});
1389 for (RegionNode *E : R->elements()) {
1390 if (E->isSubRegion())
1391 continue;
1392
1393 if (Instruction *Term = E->getEntry()->getTerminator())
1394 Term->setMetadata(UniformMDKindID, MD);
1395 }
1396
1397 return true;
1398 }
1399 return false;
1400}
1401
1402/// Run the transformation for each region found
1403bool StructurizeCFG::run(Region *R, DominatorTree *DT,
1404 const TargetTransformInfo *TTI) {
1405 if (R->isTopLevelRegion())
1406 return false;
1407
1408 this->DT = DT;
1409 this->TTI = TTI;
1410 Func = R->getEntry()->getParent();
1411 assert(hasOnlySimpleTerminator(*Func) && "Unsupported block terminator.");
1412
1413 ParentRegion = R;
1414
1415 orderNodes();
1416 collectInfos();
1417 createFlow();
1418
1419 SSAUpdaterBulk PhiInserter;
1420 insertConditions(false, PhiInserter);
1421 insertConditions(true, PhiInserter);
1422 PhiInserter.RewriteAndOptimizeAllUses(*DT);
1423
1424 setPhiValues();
1425 simplifyHoistedPhis();
1426 simplifyConditions();
1427 simplifyAffectedPhis();
1428 rebuildSSA();
1429
1430 // Cleanup
1431 Order.clear();
1432 Visited.clear();
1433 DeletedPhis.clear();
1434 AddedPhis.clear();
1435 Predicates.clear();
1436 Conditions.clear();
1437 Loops.clear();
1438 LoopPreds.clear();
1439 LoopConds.clear();
1440 FlowSet.clear();
1441
1442 return true;
1443}
1444
1445Pass *llvm::createStructurizeCFGPass(bool SkipUniformRegions) {
1446 return new StructurizeCFGLegacyPass(SkipUniformRegions);
1447}
1448
1449static void addRegionIntoQueue(Region &R, std::vector<Region *> &Regions) {
1450 Regions.push_back(&R);
1451 for (const auto &E : R)
1452 addRegionIntoQueue(*E, Regions);
1453}
1454
1456 : SkipUniformRegions(SkipUniformRegions_) {
1457 if (ForceSkipUniformRegions.getNumOccurrences())
1458 SkipUniformRegions = ForceSkipUniformRegions.getValue();
1459}
1460
1462 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
1464 OS, MapClassName2PassName);
1465 if (SkipUniformRegions)
1466 OS << "<skip-uniform-regions>";
1467}
1468
1471
1472 bool Changed = false;
1474 auto &RI = AM.getResult<RegionInfoAnalysis>(F);
1476 UniformityInfo *UI = nullptr;
1477 if (SkipUniformRegions)
1479
1480 std::vector<Region *> Regions;
1481 addRegionIntoQueue(*RI.getTopLevelRegion(), Regions);
1482 while (!Regions.empty()) {
1483 Region *R = Regions.back();
1484 Regions.pop_back();
1485
1486 StructurizeCFG SCFG;
1487 SCFG.init(R);
1488
1489 if (SkipUniformRegions && SCFG.makeUniformRegion(R, *UI)) {
1490 Changed = true; // May have added metadata.
1491 continue;
1492 }
1493
1494 Changed |= SCFG.run(R, DT, TTI);
1495 }
1496 if (!Changed)
1497 return PreservedAnalyses::all();
1500 return PA;
1501}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
DenseMap< BasicBlock *, Instruction * > BBPredicates
This file defines the DenseMap class.
@ Default
Generic implementation of equivalence classes through the use Tarjan's efficient union-find algorithm...
Hexagon Hardware Loops
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This header defines various interfaces for pass management in LLVM.
This defines the Use class.
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
This file implements a map that provides insertion order iteration.
This file contains the declarations for metadata subclasses.
#define T
#define P(N)
if(PassOpts->AAPipeline)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file contains the declarations for profiling metadata utility functions.
const SmallVectorImpl< MachineOperand > & Cond
static void addRegionIntoQueue(Region &R, std::deque< Region * > &RQ)
This builds on the llvm/ADT/GraphTraits.h file to find the strongly connected components (SCCs) of a ...
Annotate SI Control Flow
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
SmallDenseMap< const PHINode *, PhiInfo, 16 > PhiMap
const char FlowBlockName[]
static bool hasOnlyUniformBranches(Region *R, unsigned UniformMDKindID, const UniformityInfo &UA)
#define LLVM_DEBUG(...)
Definition Debug.h:114
This pass exposes codegen information to IR-level passes.
LLVM IR instance of the generic uniformity analysis.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition BasicBlock.h:528
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
LLVM_ABI const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
Conditional or Unconditional Branch instruction.
bool isConditional() const
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
A debug info location.
Definition DebugLoc.h:124
bool empty() const
Definition DenseMap.h:109
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition DenseMap.h:163
Analysis pass which computes a DominatorTree.
Definition Dominators.h:284
void changeImmediateDominator(DomTreeNodeBase< NodeT > *N, DomTreeNodeBase< NodeT > *NewIDom)
changeImmediateDominator - This method is used to update the dominator tree information when a node's...
DomTreeNodeBase< NodeT > * addNewBlock(NodeT *BB, NodeT *DomBB)
Add a new node to the dominator tree information.
Legacy analysis pass which computes a DominatorTree.
Definition Dominators.h:322
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:165
LLVM_ABI Instruction * findNearestCommonDominator(Instruction *I1, Instruction *I2) const
Find the nearest instruction I that dominates both I1 and I2, in the sense that a result produced bef...
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
const ECValue & insert(const ElemTy &Data)
insert - Insert a new value into the union/find set, ignoring the request if the value already exists...
member_iterator member_end() const
member_iterator findLeader(const ElemTy &V) const
findLeader - Given a value in the set, return a member iterator for the equivalence class it is in.
member_iterator unionSets(const ElemTy &V1, const ElemTy &V2)
union - Merge the two equivalence sets for the specified values, inserting them if they do not alread...
const BasicBlock & getEntryBlock() const
Definition Function.h:807
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Definition Function.cpp:363
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition Function.cpp:359
bool isUniform(ConstValueRefT V) const
Whether V is uniform/non-divergent.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1569
This class implements a map that also provides access to all stored values in a deterministic order.
Definition MapVector.h:36
void setIncomingValue(unsigned i, Value *V)
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
Definition Pass.h:99
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition Pass.cpp:112
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132
The pass manager to schedule RegionPasses.
Definition RegionPass.h:88
void replaceExit(BlockT *BB)
Replace the exit basic block of the region with the new basic block.
block_range blocks()
Returns a range view of the basic blocks in the region.
Definition RegionInfo.h:620
BlockT * getExit() const
Get the exit BasicBlock of the Region.
Definition RegionInfo.h:357
RegionNodeT * getBBNode(BlockT *BB) const
Get the BasicBlock RegionNode for a BasicBlock.
bool contains(const BlockT *BB) const
Check if the region contains a BasicBlock.
RegionInfoT * getRegionInfo() const
Return the RegionInfo object, that belongs to this Region.
Definition RegionInfo.h:424
BlockT * getEntry() const
Get the entry BasicBlock of the Region.
Definition RegionInfo.h:320
Analysis pass that exposes the RegionInfo for a function.
Definition RegionInfo.h:965
RegionT * getRegionFor(BlockT *BB) const
Get the smallest region that contains a BasicBlock.
bool isSubRegion() const
Is this RegionNode a subregion?
Definition RegionInfo.h:186
BlockT * getEntry() const
Get the entry BasicBlock of this RegionNode.
Definition RegionInfo.h:172
A pass that runs on each Region in a function.
Definition RegionPass.h:33
Helper class for SSA formation on a set of values defined in multiple blocks.
LLVM_ABI unsigned AddVariable(StringRef Name, Type *Ty)
Add a new variable to the SSA rewriter.
LLVM_ABI void AddAvailableValue(unsigned Var, BasicBlock *BB, Value *V)
Indicate that a rewritten value is available in the specified block with the specified value.
LLVM_ABI void AddUse(unsigned Var, Use *U)
Record a use of the symbolic value.
void RewriteAndOptimizeAllUses(DominatorTree &DT)
Rewrite all uses and simplify the inserted PHI nodes.
void RewriteUseAfterInsertions(Use &U)
Rewrite a use like RewriteUse but handling in-block definitions.
void Initialize(Type *Ty, StringRef Name)
Reset this object to get ready for a new set of SSA updates with type 'Ty'.
void AddAvailableValue(BasicBlock *BB, Value *V)
Indicate that a rewritten value is available in the specified block with the specified value.
Implements a dense probed hash-table based set with some number of buckets stored inline.
Definition DenseSet.h:291
void insert_range(Range &&R)
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
bool contains(ConstPtrType Ptr) const
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Analysis pass providing the TargetTransformInfo.
Wrapper pass for TargetTransformInfo.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
@ TCK_Latency
The latency of instruction.
@ TCC_Expensive
The cost of a 'div' instruction on x86.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
Analysis pass which computes UniformityInfo.
Legacy analysis pass which computes a CycleInfo.
LLVM_ABI bool replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition User.cpp:21
LLVM Value Representation.
Definition Value.h:75
int getNumOccurrences() const
std::pair< iterator, bool > insert(const ValueT &V)
Definition DenseSet.h:202
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition ilist_node.h:34
CRTP base class for adapting an iterator to a different type.
Definition iterator.h:237
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
static scc_iterator begin(const GraphT &G)
bool isAtEnd() const
Direct loop termination test which is more efficient than comparison with end().
Changed
@ Entry
Definition COFF.h:862
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
BinaryOp_match< SrcTy, SpecificConstantMatch, TargetOpcode::G_XOR, true > m_Not(const SrcTy &&Src)
Matches a register not-ed by a G_XOR.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
initializer< Ty > init(const Ty &Val)
@ User
could "use" a pointer
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
GenericUniformityInfo< SSAContext > UniformityInfo
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1725
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
APInt operator*(APInt a, uint64_t RHS)
Definition APInt.h:2235
LLVM_ABI bool hasOnlySimpleTerminator(const Function &F)
auto successors(const MachineBasicBlock *BB)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2136
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632
LLVM_ABI Pass * createStructurizeCFGPass(bool SkipUniformRegions=false)
When SkipUniformRegions is true the structizer will not structurize regions that only contain uniform...
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&...Ranges)
Returns a concatenated range across two or more ranges.
Definition STLExtras.h:1150
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
LLVM_ABI void setBranchWeights(Instruction &I, ArrayRef< uint32_t > Weights, bool IsExpected, bool ElideAllZero=false)
Create a new branch_weights metadata node and add or overwrite a prof metadata reference to instructi...
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1732
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
auto make_first_range(ContainerTy &&c)
Given a container of pairs, return a range over the first elements.
Definition STLExtras.h:1397
iterator_range< filter_iterator< detail::IterOfRange< RangeT >, PredicateT > > make_filter_range(RangeT &&Range, PredicateT Pred)
Convenience function that takes a range of elements and a predicate, and return a new filter_iterator...
Definition STLExtras.h:550
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
unsigned char Boolean
Definition ConvertUTF.h:132
@ Other
Any other memory.
Definition ModRef.h:68
TargetTransformInfo TTI
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
LLVM_ABI bool extractBranchWeights(const MDNode *ProfileData, SmallVectorImpl< uint32_t > &Weights)
Extract branch weights from MD_prof metadata.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto predecessors(const MachineBasicBlock *BB)
iterator_range< typename GraphTraits< GraphType >::ChildIteratorType > children(const typename GraphTraits< GraphType >::NodeRef &G)
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI Value * invertCondition(Value *Condition)
Invert the given true/false value, possibly reusing an existing copy.
Definition Local.cpp:3955
filter_iterator_impl< WrappedIteratorT, PredicateT, detail::fwd_or_bidi_tag< WrappedIteratorT > > filter_iterator
Defines filter_iterator to a suitable specialization of filter_iterator_impl, based on the underlying...
Definition STLExtras.h:537
LLVM_ABI void initializeStructurizeCFGLegacyPassPass(PassRegistry &)
#define N
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition PassManager.h:70
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
StructurizeCFGPass(bool SkipUniformRegions=false)