LLVM 20.0.0git
GVNSink.cpp
Go to the documentation of this file.
1//===- GVNSink.cpp - sink expressions into successors ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file GVNSink.cpp
10/// This pass attempts to sink instructions into successors, reducing static
11/// instruction count and enabling if-conversion.
12///
13/// We use a variant of global value numbering to decide what can be sunk.
14/// Consider:
15///
16/// [ %a1 = add i32 %b, 1 ] [ %c1 = add i32 %d, 1 ]
17/// [ %a2 = xor i32 %a1, 1 ] [ %c2 = xor i32 %c1, 1 ]
18/// \ /
19/// [ %e = phi i32 %a2, %c2 ]
20/// [ add i32 %e, 4 ]
21///
22///
23/// GVN would number %a1 and %c1 differently because they compute different
24/// results - the VN of an instruction is a function of its opcode and the
25/// transitive closure of its operands. This is the key property for hoisting
26/// and CSE.
27///
28/// What we want when sinking however is for a numbering that is a function of
29/// the *uses* of an instruction, which allows us to answer the question "if I
30/// replace %a1 with %c1, will it contribute in an equivalent way to all
31/// successive instructions?". The PostValueTable class in GVN provides this
32/// mapping.
33//
34//===----------------------------------------------------------------------===//
35
36#include "llvm/ADT/ArrayRef.h"
37#include "llvm/ADT/DenseMap.h"
38#include "llvm/ADT/DenseSet.h"
39#include "llvm/ADT/Hashing.h"
41#include "llvm/ADT/STLExtras.h"
44#include "llvm/ADT/Statistic.h"
46#include "llvm/IR/BasicBlock.h"
47#include "llvm/IR/CFG.h"
48#include "llvm/IR/Constants.h"
49#include "llvm/IR/Function.h"
50#include "llvm/IR/InstrTypes.h"
51#include "llvm/IR/Instruction.h"
53#include "llvm/IR/PassManager.h"
54#include "llvm/IR/Type.h"
55#include "llvm/IR/Use.h"
56#include "llvm/IR/Value.h"
62#include "llvm/Support/Debug.h"
68#include <algorithm>
69#include <cassert>
70#include <cstddef>
71#include <cstdint>
72#include <iterator>
73#include <utility>
74
75using namespace llvm;
76
77#define DEBUG_TYPE "gvn-sink"
78
79STATISTIC(NumRemoved, "Number of instructions removed");
80
81namespace llvm {
82namespace GVNExpression {
83
85 print(dbgs());
86 dbgs() << "\n";
87}
88
89} // end namespace GVNExpression
90} // end namespace llvm
91
92namespace {
93
94static bool isMemoryInst(const Instruction *I) {
95 return isa<LoadInst>(I) || isa<StoreInst>(I) ||
96 (isa<InvokeInst>(I) && !cast<InvokeInst>(I)->doesNotAccessMemory()) ||
97 (isa<CallInst>(I) && !cast<CallInst>(I)->doesNotAccessMemory());
98}
99
100/// Iterates through instructions in a set of blocks in reverse order from the
101/// first non-terminator. For example (assume all blocks have size n):
102/// LockstepReverseIterator I([B1, B2, B3]);
103/// *I-- = [B1[n], B2[n], B3[n]];
104/// *I-- = [B1[n-1], B2[n-1], B3[n-1]];
105/// *I-- = [B1[n-2], B2[n-2], B3[n-2]];
106/// ...
107///
108/// It continues until all blocks have been exhausted. Use \c getActiveBlocks()
109/// to
110/// determine which blocks are still going and the order they appear in the
111/// list returned by operator*.
112class LockstepReverseIterator {
116 bool Fail;
117
118public:
119 LockstepReverseIterator(ArrayRef<BasicBlock *> Blocks) : Blocks(Blocks) {
120 reset();
121 }
122
123 void reset() {
124 Fail = false;
125 ActiveBlocks.clear();
126 for (BasicBlock *BB : Blocks)
127 ActiveBlocks.insert(BB);
128 Insts.clear();
129 for (BasicBlock *BB : Blocks) {
130 if (BB->size() <= 1) {
131 // Block wasn't big enough - only contained a terminator.
132 ActiveBlocks.remove(BB);
133 continue;
134 }
135 Insts.push_back(BB->getTerminator()->getPrevNonDebugInstruction());
136 }
137 if (Insts.empty())
138 Fail = true;
139 }
140
141 bool isValid() const { return !Fail; }
142 ArrayRef<Instruction *> operator*() const { return Insts; }
143
144 // Note: This needs to return a SmallSetVector as the elements of
145 // ActiveBlocks will be later copied to Blocks using std::copy. The
146 // resultant order of elements in Blocks needs to be deterministic.
147 // Using SmallPtrSet instead causes non-deterministic order while
148 // copying. And we cannot simply sort Blocks as they need to match the
149 // corresponding Values.
150 SmallSetVector<BasicBlock *, 4> &getActiveBlocks() { return ActiveBlocks; }
151
152 void restrictToBlocks(SmallSetVector<BasicBlock *, 4> &Blocks) {
153 for (auto II = Insts.begin(); II != Insts.end();) {
154 if (!Blocks.contains((*II)->getParent())) {
155 ActiveBlocks.remove((*II)->getParent());
156 II = Insts.erase(II);
157 } else {
158 ++II;
159 }
160 }
161 }
162
163 void operator--() {
164 if (Fail)
165 return;
167 for (auto *Inst : Insts) {
168 if (Inst == &Inst->getParent()->front())
169 ActiveBlocks.remove(Inst->getParent());
170 else
171 NewInsts.push_back(Inst->getPrevNonDebugInstruction());
172 }
173 if (NewInsts.empty()) {
174 Fail = true;
175 return;
176 }
177 Insts = NewInsts;
178 }
179};
180
181//===----------------------------------------------------------------------===//
182
183/// Candidate solution for sinking. There may be different ways to
184/// sink instructions, differing in the number of instructions sunk,
185/// the number of predecessors sunk from and the number of PHIs
186/// required.
187struct SinkingInstructionCandidate {
188 unsigned NumBlocks;
189 unsigned NumInstructions;
190 unsigned NumPHIs;
191 unsigned NumMemoryInsts;
192 int Cost = -1;
194
195 void calculateCost(unsigned NumOrigPHIs, unsigned NumOrigBlocks) {
196 unsigned NumExtraPHIs = NumPHIs - NumOrigPHIs;
197 unsigned SplitEdgeCost = (NumOrigBlocks > NumBlocks) ? 2 : 0;
198 Cost = (NumInstructions * (NumBlocks - 1)) -
199 (NumExtraPHIs *
200 NumExtraPHIs) // PHIs are expensive, so make sure they're worth it.
201 - SplitEdgeCost;
202 }
203
204 bool operator>(const SinkingInstructionCandidate &Other) const {
205 return Cost > Other.Cost;
206 }
207};
208
209#ifndef NDEBUG
210raw_ostream &operator<<(raw_ostream &OS, const SinkingInstructionCandidate &C) {
211 OS << "<Candidate Cost=" << C.Cost << " #Blocks=" << C.NumBlocks
212 << " #Insts=" << C.NumInstructions << " #PHIs=" << C.NumPHIs << ">";
213 return OS;
214}
215#endif
216
217//===----------------------------------------------------------------------===//
218
219/// Describes a PHI node that may or may not exist. These track the PHIs
220/// that must be created if we sunk a sequence of instructions. It provides
221/// a hash function for efficient equality comparisons.
222class ModelledPHI {
225
226public:
227 ModelledPHI() = default;
228
229 ModelledPHI(const PHINode *PN,
230 const DenseMap<const BasicBlock *, unsigned> &BlockOrder) {
231 // BasicBlock comes first so we sort by basic block pointer order,
232 // then by value pointer order. No need to call `verifyModelledPHI`
233 // As the Values and Blocks are populated in a deterministic order.
234 using OpsType = std::pair<BasicBlock *, Value *>;
236 for (unsigned I = 0, E = PN->getNumIncomingValues(); I != E; ++I)
238
239 auto ComesBefore = [BlockOrder](OpsType O1, OpsType O2) {
240 return BlockOrder.lookup(O1.first) < BlockOrder.lookup(O2.first);
241 };
242 // Sort in a deterministic order.
243 llvm::sort(Ops, ComesBefore);
244
245 for (auto &P : Ops) {
246 Blocks.push_back(P.first);
247 Values.push_back(P.second);
248 }
249 }
250
251 /// Create a dummy ModelledPHI that will compare unequal to any other ModelledPHI
252 /// without the same ID.
253 /// \note This is specifically for DenseMapInfo - do not use this!
254 static ModelledPHI createDummy(size_t ID) {
255 ModelledPHI M;
256 M.Values.push_back(reinterpret_cast<Value*>(ID));
257 return M;
258 }
259
260 void
261 verifyModelledPHI(const DenseMap<const BasicBlock *, unsigned> &BlockOrder) {
262 assert(Values.size() > 1 && Blocks.size() > 1 &&
263 "Modelling PHI with less than 2 values");
264 auto ComesBefore = [BlockOrder](const BasicBlock *BB1,
265 const BasicBlock *BB2) {
266 return BlockOrder.lookup(BB1) < BlockOrder.lookup(BB2);
267 };
268 assert(llvm::is_sorted(Blocks, ComesBefore));
269 int C = 0;
270 for (const Value *V : Values) {
271 if (!isa<UndefValue>(V)) {
272 assert(cast<Instruction>(V)->getParent() == Blocks[C]);
273 (void)C;
274 }
275 C++;
276 }
277 }
278 /// Create a PHI from an array of incoming values and incoming blocks.
279 ModelledPHI(SmallVectorImpl<Instruction *> &V,
281 const DenseMap<const BasicBlock *, unsigned> &BlockOrder) {
282 // The order of Values and Blocks are already ordered by the caller.
283 llvm::copy(V, std::back_inserter(Values));
284 llvm::copy(B, std::back_inserter(Blocks));
285 verifyModelledPHI(BlockOrder);
286 }
287
288 /// Create a PHI from [I[OpNum] for I in Insts].
289 /// TODO: Figure out a way to verifyModelledPHI in this constructor.
290 ModelledPHI(ArrayRef<Instruction *> Insts, unsigned OpNum,
292 llvm::copy(B, std::back_inserter(Blocks));
293 for (auto *I : Insts)
294 Values.push_back(I->getOperand(OpNum));
295 }
296
297 /// Restrict the PHI's contents down to only \c NewBlocks.
298 /// \c NewBlocks must be a subset of \c this->Blocks.
299 void restrictToBlocks(const SmallSetVector<BasicBlock *, 4> &NewBlocks) {
300 auto BI = Blocks.begin();
301 auto VI = Values.begin();
302 while (BI != Blocks.end()) {
303 assert(VI != Values.end());
304 if (!NewBlocks.contains(*BI)) {
305 BI = Blocks.erase(BI);
306 VI = Values.erase(VI);
307 } else {
308 ++BI;
309 ++VI;
310 }
311 }
312 assert(Blocks.size() == NewBlocks.size());
313 }
314
315 ArrayRef<Value *> getValues() const { return Values; }
316
317 bool areAllIncomingValuesSame() const {
318 return llvm::all_equal(Values);
319 }
320
321 bool areAllIncomingValuesSameType() const {
322 return llvm::all_of(
323 Values, [&](Value *V) { return V->getType() == Values[0]->getType(); });
324 }
325
326 bool areAnyIncomingValuesConstant() const {
327 return llvm::any_of(Values, [&](Value *V) { return isa<Constant>(V); });
328 }
329
330 // Hash functor
331 unsigned hash() const {
332 // Is deterministic because Values are saved in a specific order.
333 return (unsigned)hash_combine_range(Values.begin(), Values.end());
334 }
335
336 bool operator==(const ModelledPHI &Other) const {
337 return Values == Other.Values && Blocks == Other.Blocks;
338 }
339};
340
341template <typename ModelledPHI> struct DenseMapInfo {
342 static inline ModelledPHI &getEmptyKey() {
343 static ModelledPHI Dummy = ModelledPHI::createDummy(0);
344 return Dummy;
345 }
346
347 static inline ModelledPHI &getTombstoneKey() {
348 static ModelledPHI Dummy = ModelledPHI::createDummy(1);
349 return Dummy;
350 }
351
352 static unsigned getHashValue(const ModelledPHI &V) { return V.hash(); }
353
354 static bool isEqual(const ModelledPHI &LHS, const ModelledPHI &RHS) {
355 return LHS == RHS;
356 }
357};
358
360
361//===----------------------------------------------------------------------===//
362// ValueTable
363//===----------------------------------------------------------------------===//
364// This is a value number table where the value number is a function of the
365// *uses* of a value, rather than its operands. Thus, if VN(A) == VN(B) we know
366// that the program would be equivalent if we replaced A with PHI(A, B).
367//===----------------------------------------------------------------------===//
368
369/// A GVN expression describing how an instruction is used. The operands
370/// field of BasicExpression is used to store uses, not operands.
371///
372/// This class also contains fields for discriminators used when determining
373/// equivalence of instructions with sideeffects.
374class InstructionUseExpr : public GVNExpression::BasicExpression {
375 unsigned MemoryUseOrder = -1;
376 bool Volatile = false;
377 ArrayRef<int> ShuffleMask;
378
379public:
380 InstructionUseExpr(Instruction *I, ArrayRecycler<Value *> &R,
382 : GVNExpression::BasicExpression(I->getNumUses()) {
384 setOpcode(I->getOpcode());
385 setType(I->getType());
386
387 if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I))
388 ShuffleMask = SVI->getShuffleMask().copy(A);
389
390 for (auto &U : I->uses())
391 op_push_back(U.getUser());
393 }
394
395 void setMemoryUseOrder(unsigned MUO) { MemoryUseOrder = MUO; }
396 void setVolatile(bool V) { Volatile = V; }
397
398 hash_code getHashValue() const override {
400 MemoryUseOrder, Volatile, ShuffleMask);
401 }
402
403 template <typename Function> hash_code getHashValue(Function MapFn) {
404 hash_code H = hash_combine(getOpcode(), getType(), MemoryUseOrder, Volatile,
405 ShuffleMask);
406 for (auto *V : operands())
407 H = hash_combine(H, MapFn(V));
408 return H;
409 }
410};
411
412using BasicBlocksSet = SmallPtrSet<const BasicBlock *, 32>;
413
414class ValueTable {
415 DenseMap<Value *, uint32_t> ValueNumbering;
417 DenseMap<size_t, uint32_t> HashNumbering;
420 uint32_t nextValueNumber = 1;
421 BasicBlocksSet ReachableBBs;
422
423 /// Create an expression for I based on its opcode and its uses. If I
424 /// touches or reads memory, the expression is also based upon its memory
425 /// order - see \c getMemoryUseOrder().
426 InstructionUseExpr *createExpr(Instruction *I) {
427 InstructionUseExpr *E =
428 new (Allocator) InstructionUseExpr(I, Recycler, Allocator);
429 if (isMemoryInst(I))
430 E->setMemoryUseOrder(getMemoryUseOrder(I));
431
432 if (CmpInst *C = dyn_cast<CmpInst>(I)) {
433 CmpInst::Predicate Predicate = C->getPredicate();
434 E->setOpcode((C->getOpcode() << 8) | Predicate);
435 }
436 return E;
437 }
438
439 /// Helper to compute the value number for a memory instruction
440 /// (LoadInst/StoreInst), including checking the memory ordering and
441 /// volatility.
442 template <class Inst> InstructionUseExpr *createMemoryExpr(Inst *I) {
443 if (isStrongerThanUnordered(I->getOrdering()) || I->isAtomic())
444 return nullptr;
445 InstructionUseExpr *E = createExpr(I);
446 E->setVolatile(I->isVolatile());
447 return E;
448 }
449
450public:
451 ValueTable() = default;
452
453 /// Set basic blocks reachable from entry block.
454 void setReachableBBs(const BasicBlocksSet &ReachableBBs) {
455 this->ReachableBBs = ReachableBBs;
456 }
457
458 /// Returns the value number for the specified value, assigning
459 /// it a new number if it did not have one before.
460 uint32_t lookupOrAdd(Value *V) {
461 auto VI = ValueNumbering.find(V);
462 if (VI != ValueNumbering.end())
463 return VI->second;
464
465 if (!isa<Instruction>(V)) {
466 ValueNumbering[V] = nextValueNumber;
467 return nextValueNumber++;
468 }
469
470 Instruction *I = cast<Instruction>(V);
471 if (!ReachableBBs.contains(I->getParent()))
472 return ~0U;
473
474 InstructionUseExpr *exp = nullptr;
475 switch (I->getOpcode()) {
476 case Instruction::Load:
477 exp = createMemoryExpr(cast<LoadInst>(I));
478 break;
479 case Instruction::Store:
480 exp = createMemoryExpr(cast<StoreInst>(I));
481 break;
482 case Instruction::Call:
483 case Instruction::Invoke:
484 case Instruction::FNeg:
485 case Instruction::Add:
486 case Instruction::FAdd:
487 case Instruction::Sub:
488 case Instruction::FSub:
489 case Instruction::Mul:
490 case Instruction::FMul:
491 case Instruction::UDiv:
492 case Instruction::SDiv:
493 case Instruction::FDiv:
494 case Instruction::URem:
495 case Instruction::SRem:
496 case Instruction::FRem:
497 case Instruction::Shl:
498 case Instruction::LShr:
499 case Instruction::AShr:
500 case Instruction::And:
501 case Instruction::Or:
502 case Instruction::Xor:
503 case Instruction::ICmp:
504 case Instruction::FCmp:
505 case Instruction::Trunc:
506 case Instruction::ZExt:
507 case Instruction::SExt:
508 case Instruction::FPToUI:
509 case Instruction::FPToSI:
510 case Instruction::UIToFP:
511 case Instruction::SIToFP:
512 case Instruction::FPTrunc:
513 case Instruction::FPExt:
514 case Instruction::PtrToInt:
515 case Instruction::IntToPtr:
516 case Instruction::BitCast:
517 case Instruction::AddrSpaceCast:
518 case Instruction::Select:
519 case Instruction::ExtractElement:
520 case Instruction::InsertElement:
521 case Instruction::ShuffleVector:
522 case Instruction::InsertValue:
523 case Instruction::GetElementPtr:
524 exp = createExpr(I);
525 break;
526 default:
527 break;
528 }
529
530 if (!exp) {
531 ValueNumbering[V] = nextValueNumber;
532 return nextValueNumber++;
533 }
534
535 uint32_t e = ExpressionNumbering[exp];
536 if (!e) {
537 hash_code H = exp->getHashValue([=](Value *V) { return lookupOrAdd(V); });
538 auto I = HashNumbering.find(H);
539 if (I != HashNumbering.end()) {
540 e = I->second;
541 } else {
542 e = nextValueNumber++;
543 HashNumbering[H] = e;
544 ExpressionNumbering[exp] = e;
545 }
546 }
547 ValueNumbering[V] = e;
548 return e;
549 }
550
551 /// Returns the value number of the specified value. Fails if the value has
552 /// not yet been numbered.
553 uint32_t lookup(Value *V) const {
554 auto VI = ValueNumbering.find(V);
555 assert(VI != ValueNumbering.end() && "Value not numbered?");
556 return VI->second;
557 }
558
559 /// Removes all value numberings and resets the value table.
560 void clear() {
561 ValueNumbering.clear();
562 ExpressionNumbering.clear();
563 HashNumbering.clear();
564 Recycler.clear(Allocator);
565 nextValueNumber = 1;
566 }
567
568 /// \c Inst uses or touches memory. Return an ID describing the memory state
569 /// at \c Inst such that if getMemoryUseOrder(I1) == getMemoryUseOrder(I2),
570 /// the exact same memory operations happen after I1 and I2.
571 ///
572 /// This is a very hard problem in general, so we use domain-specific
573 /// knowledge that we only ever check for equivalence between blocks sharing a
574 /// single immediate successor that is common, and when determining if I1 ==
575 /// I2 we will have already determined that next(I1) == next(I2). This
576 /// inductive property allows us to simply return the value number of the next
577 /// instruction that defines memory.
578 uint32_t getMemoryUseOrder(Instruction *Inst) {
579 auto *BB = Inst->getParent();
580 for (auto I = std::next(Inst->getIterator()), E = BB->end();
581 I != E && !I->isTerminator(); ++I) {
582 if (!isMemoryInst(&*I))
583 continue;
584 if (isa<LoadInst>(&*I))
585 continue;
586 CallInst *CI = dyn_cast<CallInst>(&*I);
587 if (CI && CI->onlyReadsMemory())
588 continue;
589 InvokeInst *II = dyn_cast<InvokeInst>(&*I);
590 if (II && II->onlyReadsMemory())
591 continue;
592 return lookupOrAdd(&*I);
593 }
594 return 0;
595 }
596};
597
598//===----------------------------------------------------------------------===//
599
600class GVNSink {
601public:
602 GVNSink() {}
603
604 bool run(Function &F) {
605 LLVM_DEBUG(dbgs() << "GVNSink: running on function @" << F.getName()
606 << "\n");
607
608 unsigned NumSunk = 0;
610 VN.setReachableBBs(BasicBlocksSet(RPOT.begin(), RPOT.end()));
611 // Populate reverse post-order to order basic blocks in deterministic
612 // order. Any arbitrary ordering will work in this case as long as they are
613 // deterministic. The node ordering of newly created basic blocks
614 // are irrelevant because RPOT(for computing sinkable candidates) is also
615 // obtained ahead of time and only their order are relevant for this pass.
616 unsigned NodeOrdering = 0;
617 RPOTOrder[*RPOT.begin()] = ++NodeOrdering;
618 for (auto *BB : RPOT)
619 if (!pred_empty(BB))
620 RPOTOrder[BB] = ++NodeOrdering;
621 for (auto *N : RPOT)
622 NumSunk += sinkBB(N);
623
624 return NumSunk > 0;
625 }
626
627private:
628 ValueTable VN;
630
631 bool shouldAvoidSinkingInstruction(Instruction *I) {
632 // These instructions may change or break semantics if moved.
633 if (isa<PHINode>(I) || I->isEHPad() || isa<AllocaInst>(I) ||
634 I->getType()->isTokenTy())
635 return true;
636 return false;
637 }
638
639 /// The main heuristic function. Analyze the set of instructions pointed to by
640 /// LRI and return a candidate solution if these instructions can be sunk, or
641 /// std::nullopt otherwise.
642 std::optional<SinkingInstructionCandidate> analyzeInstructionForSinking(
643 LockstepReverseIterator &LRI, unsigned &InstNum, unsigned &MemoryInstNum,
644 ModelledPHISet &NeededPHIs, SmallPtrSetImpl<Value *> &PHIContents);
645
646 /// Create a ModelledPHI for each PHI in BB, adding to PHIs.
647 void analyzeInitialPHIs(BasicBlock *BB, ModelledPHISet &PHIs,
648 SmallPtrSetImpl<Value *> &PHIContents) {
649 for (PHINode &PN : BB->phis()) {
650 auto MPHI = ModelledPHI(&PN, RPOTOrder);
651 PHIs.insert(MPHI);
652 for (auto *V : MPHI.getValues())
653 PHIContents.insert(V);
654 }
655 }
656
657 /// The main instruction sinking driver. Set up state and try and sink
658 /// instructions into BBEnd from its predecessors.
659 unsigned sinkBB(BasicBlock *BBEnd);
660
661 /// Perform the actual mechanics of sinking an instruction from Blocks into
662 /// BBEnd, which is their only successor.
664
665 /// Remove PHIs that all have the same incoming value.
666 void foldPointlessPHINodes(BasicBlock *BB) {
667 auto I = BB->begin();
668 while (PHINode *PN = dyn_cast<PHINode>(I++)) {
669 if (!llvm::all_of(PN->incoming_values(), [&](const Value *V) {
670 return V == PN->getIncomingValue(0);
671 }))
672 continue;
673 if (PN->getIncomingValue(0) != PN)
675 else
677 PN->eraseFromParent();
678 }
679 }
680};
681
682std::optional<SinkingInstructionCandidate>
683GVNSink::analyzeInstructionForSinking(LockstepReverseIterator &LRI,
684 unsigned &InstNum,
685 unsigned &MemoryInstNum,
686 ModelledPHISet &NeededPHIs,
687 SmallPtrSetImpl<Value *> &PHIContents) {
688 auto Insts = *LRI;
689 LLVM_DEBUG(dbgs() << " -- Analyzing instruction set: [\n"; for (auto *I
690 : Insts) {
691 I->dump();
692 } dbgs() << " ]\n";);
693
695 for (auto *I : Insts) {
696 uint32_t N = VN.lookupOrAdd(I);
697 LLVM_DEBUG(dbgs() << " VN=" << Twine::utohexstr(N) << " for" << *I << "\n");
698 if (N == ~0U)
699 return std::nullopt;
700 VNums[N]++;
701 }
702 unsigned VNumToSink = llvm::max_element(VNums, llvm::less_second())->first;
703
704 if (VNums[VNumToSink] == 1)
705 // Can't sink anything!
706 return std::nullopt;
707
708 // Now restrict the number of incoming blocks down to only those with
709 // VNumToSink.
710 auto &ActivePreds = LRI.getActiveBlocks();
711 unsigned InitialActivePredSize = ActivePreds.size();
713 for (auto *I : Insts) {
714 if (VN.lookup(I) != VNumToSink)
715 ActivePreds.remove(I->getParent());
716 else
717 NewInsts.push_back(I);
718 }
719 for (auto *I : NewInsts)
720 if (shouldAvoidSinkingInstruction(I))
721 return std::nullopt;
722
723 // If we've restricted the incoming blocks, restrict all needed PHIs also
724 // to that set.
725 bool RecomputePHIContents = false;
726 if (ActivePreds.size() != InitialActivePredSize) {
727 ModelledPHISet NewNeededPHIs;
728 for (auto P : NeededPHIs) {
729 P.restrictToBlocks(ActivePreds);
730 NewNeededPHIs.insert(P);
731 }
732 NeededPHIs = NewNeededPHIs;
733 LRI.restrictToBlocks(ActivePreds);
734 RecomputePHIContents = true;
735 }
736
737 // The sunk instruction's results.
738 ModelledPHI NewPHI(NewInsts, ActivePreds, RPOTOrder);
739
740 // Does sinking this instruction render previous PHIs redundant?
741 if (NeededPHIs.erase(NewPHI))
742 RecomputePHIContents = true;
743
744 if (RecomputePHIContents) {
745 // The needed PHIs have changed, so recompute the set of all needed
746 // values.
747 PHIContents.clear();
748 for (auto &PHI : NeededPHIs)
749 PHIContents.insert(PHI.getValues().begin(), PHI.getValues().end());
750 }
751
752 // Is this instruction required by a later PHI that doesn't match this PHI?
753 // if so, we can't sink this instruction.
754 for (auto *V : NewPHI.getValues())
755 if (PHIContents.count(V))
756 // V exists in this PHI, but the whole PHI is different to NewPHI
757 // (else it would have been removed earlier). We cannot continue
758 // because this isn't representable.
759 return std::nullopt;
760
761 // Which operands need PHIs?
762 // FIXME: If any of these fail, we should partition up the candidates to
763 // try and continue making progress.
764 Instruction *I0 = NewInsts[0];
765
766 auto isNotSameOperation = [&I0](Instruction *I) {
767 return !I0->isSameOperationAs(I);
768 };
769
770 if (any_of(NewInsts, isNotSameOperation))
771 return std::nullopt;
772
773 for (unsigned OpNum = 0, E = I0->getNumOperands(); OpNum != E; ++OpNum) {
774 ModelledPHI PHI(NewInsts, OpNum, ActivePreds);
775 if (PHI.areAllIncomingValuesSame())
776 continue;
777 if (!canReplaceOperandWithVariable(I0, OpNum))
778 // We can 't create a PHI from this instruction!
779 return std::nullopt;
780 if (NeededPHIs.count(PHI))
781 continue;
782 if (!PHI.areAllIncomingValuesSameType())
783 return std::nullopt;
784 // Don't create indirect calls! The called value is the final operand.
785 if ((isa<CallInst>(I0) || isa<InvokeInst>(I0)) && OpNum == E - 1 &&
786 PHI.areAnyIncomingValuesConstant())
787 return std::nullopt;
788
789 NeededPHIs.reserve(NeededPHIs.size());
790 NeededPHIs.insert(PHI);
791 PHIContents.insert(PHI.getValues().begin(), PHI.getValues().end());
792 }
793
794 if (isMemoryInst(NewInsts[0]))
795 ++MemoryInstNum;
796
797 SinkingInstructionCandidate Cand;
798 Cand.NumInstructions = ++InstNum;
799 Cand.NumMemoryInsts = MemoryInstNum;
800 Cand.NumBlocks = ActivePreds.size();
801 Cand.NumPHIs = NeededPHIs.size();
802 append_range(Cand.Blocks, ActivePreds);
803
804 return Cand;
805}
806
807unsigned GVNSink::sinkBB(BasicBlock *BBEnd) {
808 LLVM_DEBUG(dbgs() << "GVNSink: running on basic block ";
809 BBEnd->printAsOperand(dbgs()); dbgs() << "\n");
811 for (auto *B : predecessors(BBEnd)) {
812 // Bailout on basic blocks without predecessor(PR42346).
813 if (!RPOTOrder.count(B))
814 return 0;
815 auto *T = B->getTerminator();
816 if (isa<BranchInst>(T) || isa<SwitchInst>(T))
817 Preds.push_back(B);
818 else
819 return 0;
820 }
821 if (Preds.size() < 2)
822 return 0;
823 auto ComesBefore = [this](const BasicBlock *BB1, const BasicBlock *BB2) {
824 return RPOTOrder.lookup(BB1) < RPOTOrder.lookup(BB2);
825 };
826 // Sort in a deterministic order.
827 llvm::sort(Preds, ComesBefore);
828
829 unsigned NumOrigPreds = Preds.size();
830 // We can only sink instructions through unconditional branches.
831 llvm::erase_if(Preds, [](BasicBlock *BB) {
832 return BB->getTerminator()->getNumSuccessors() != 1;
833 });
834
835 LockstepReverseIterator LRI(Preds);
837 unsigned InstNum = 0, MemoryInstNum = 0;
838 ModelledPHISet NeededPHIs;
839 SmallPtrSet<Value *, 4> PHIContents;
840 analyzeInitialPHIs(BBEnd, NeededPHIs, PHIContents);
841 unsigned NumOrigPHIs = NeededPHIs.size();
842
843 while (LRI.isValid()) {
844 auto Cand = analyzeInstructionForSinking(LRI, InstNum, MemoryInstNum,
845 NeededPHIs, PHIContents);
846 if (!Cand)
847 break;
848 Cand->calculateCost(NumOrigPHIs, Preds.size());
849 Candidates.emplace_back(*Cand);
850 --LRI;
851 }
852
853 llvm::stable_sort(Candidates, std::greater<SinkingInstructionCandidate>());
854 LLVM_DEBUG(dbgs() << " -- Sinking candidates:\n"; for (auto &C
855 : Candidates) dbgs()
856 << " " << C << "\n";);
857
858 // Pick the top candidate, as long it is positive!
859 if (Candidates.empty() || Candidates.front().Cost <= 0)
860 return 0;
861 auto C = Candidates.front();
862
863 LLVM_DEBUG(dbgs() << " -- Sinking: " << C << "\n");
864 BasicBlock *InsertBB = BBEnd;
865 if (C.Blocks.size() < NumOrigPreds) {
866 LLVM_DEBUG(dbgs() << " -- Splitting edge to ";
867 BBEnd->printAsOperand(dbgs()); dbgs() << "\n");
868 InsertBB = SplitBlockPredecessors(BBEnd, C.Blocks, ".gvnsink.split");
869 if (!InsertBB) {
870 LLVM_DEBUG(dbgs() << " -- FAILED to split edge!\n");
871 // Edge couldn't be split.
872 return 0;
873 }
874 }
875
876 for (unsigned I = 0; I < C.NumInstructions; ++I)
877 sinkLastInstruction(C.Blocks, InsertBB);
878
879 return C.NumInstructions;
880}
881
882void GVNSink::sinkLastInstruction(ArrayRef<BasicBlock *> Blocks,
883 BasicBlock *BBEnd) {
885 for (BasicBlock *BB : Blocks)
887 Instruction *I0 = Insts.front();
888
889 SmallVector<Value *, 4> NewOperands;
890 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O) {
891 bool NeedPHI = llvm::any_of(Insts, [&I0, O](const Instruction *I) {
892 return I->getOperand(O) != I0->getOperand(O);
893 });
894 if (!NeedPHI) {
895 NewOperands.push_back(I0->getOperand(O));
896 continue;
897 }
898
899 // Create a new PHI in the successor block and populate it.
900 auto *Op = I0->getOperand(O);
901 assert(!Op->getType()->isTokenTy() && "Can't PHI tokens!");
902 auto *PN =
903 PHINode::Create(Op->getType(), Insts.size(), Op->getName() + ".sink");
904 PN->insertBefore(BBEnd->begin());
905 for (auto *I : Insts)
906 PN->addIncoming(I->getOperand(O), I->getParent());
907 NewOperands.push_back(PN);
908 }
909
910 // Arbitrarily use I0 as the new "common" instruction; remap its operands
911 // and move it to the start of the successor block.
912 for (unsigned O = 0, E = I0->getNumOperands(); O != E; ++O)
913 I0->getOperandUse(O).set(NewOperands[O]);
914 I0->moveBefore(&*BBEnd->getFirstInsertionPt());
915
916 // Update metadata and IR flags.
917 for (auto *I : Insts)
918 if (I != I0) {
919 combineMetadataForCSE(I0, I, true);
920 I0->andIRFlags(I);
921 }
922
923 for (auto *I : Insts)
924 if (I != I0) {
925 I->replaceAllUsesWith(I0);
926 I0->applyMergedLocation(I0->getDebugLoc(), I->getDebugLoc());
927 }
928 foldPointlessPHINodes(BBEnd);
929
930 // Finally nuke all instructions apart from the common instruction.
931 for (auto *I : Insts)
932 if (I != I0)
933 I->eraseFromParent();
934
935 NumRemoved += Insts.size() - 1;
936}
937
938} // end anonymous namespace
939
941 GVNSink G;
942 if (!G.run(F))
943 return PreservedAnalyses::all();
944
946}
#define Fail
Rewrite undef for PHI
This file defines the BumpPtrAllocator interface.
Atomic ordering constants.
static const Function * getParent(const Value *V)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:537
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static void clear(coro::Shape &Shape)
Definition: Coroutines.cpp:148
#define LLVM_DEBUG(X)
Definition: Debug.h:101
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
std::optional< std::vector< StOtherPiece > > Other
Definition: ELFYAML.cpp:1294
DenseMap< Block *, BlockRelaxAux > Blocks
Definition: ELF_riscv.cpp:507
The header file for the GVN pass that contains expression handling classes.
This file provides the interface for LLVM's Global Value Numbering pass which eliminates fully redund...
This is the interface for a simple mod/ref and alias analysis over globals.
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This defines the Use class.
static bool lookup(const GsymReader &GR, DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr, uint64_t Addr, SourceLocations &SrcLocs, llvm::Error &Err)
A Lookup helper functions.
Definition: InlineInfo.cpp:109
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
#define H(x, y, z)
Definition: MD5.cpp:57
uint64_t IntrinsicInst * II
#define P(N)
This header defines various interfaces for pass management in LLVM.
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
Basic Register Allocator
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
raw_pwrite_stream & OS
static void sinkLastInstruction(ArrayRef< BasicBlock * > Blocks)
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static SymbolRef::Type getType(const Symbol *Sym)
Definition: TapiFile.cpp:40
Value * RHS
Value * LHS
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
Recycle small arrays allocated from a BumpPtrAllocator.
Definition: ArrayRecycler.h:28
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
MutableArrayRef< T > copy(Allocator &A)
Definition: ArrayRef.h:180
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:448
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:517
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:416
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:239
Allocate memory in an ever growing pool, as if by bump-pointer.
Definition: Allocator.h:66
bool onlyReadsMemory(unsigned OpNo) const
Definition: InstrTypes.h:1816
This class represents a function call, abstracting a target machine's calling convention.
This class is the base class for the comparison instructions.
Definition: InstrTypes.h:747
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:757
This class represents an Operation in the Expression.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:194
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:151
iterator end()
Definition: DenseMap.h:84
Implements a dense probed hash-table based set.
Definition: DenseSet.h:271
void allocateOperands(RecyclerType &Recycler, BumpPtrAllocator &Allocator)
hash_code getHashValue() const override
iterator_range< op_iterator > operands()
LLVM_DUMP_METHOD void dump() const
Definition: GVNSink.cpp:84
void setOpcode(unsigned opcode)
void print(raw_ostream &OS) const
bool isSameOperationAs(const Instruction *I, unsigned flags=0) const LLVM_READONLY
This function determines if the specified instruction executes the same operation as the current one.
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
Definition: Instruction.cpp:97
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:466
void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
const Instruction * getPrevNonDebugInstruction(bool SkipPseudoOp=false) const
Return a pointer to the previous non-debug instruction in the same basic block as 'this',...
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:92
void applyMergedLocation(DILocation *LocA, DILocation *LocB)
Merge 2 debug locations and apply it to the Instruction.
Definition: DebugInfo.cpp:932
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Invoke instruction.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1852
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:114
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
Recycler - This class manages a linked-list of deallocated nodes and facilitates reusing deallocated ...
Definition: Recycler.h:34
void clear(AllocatorType &Allocator)
clear - Release all the tracked allocations to the allocator.
Definition: Recycler.h:68
bool remove(const value_type &X)
Remove an item from the set vector.
Definition: SetVector.h:188
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:98
void clear()
Completely clear the SetVector.
Definition: SetVector.h:273
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
bool contains(const key_type &key) const
Check if the SetVector contains the given key.
Definition: SetVector.h:254
This instruction constructs a fixed permutation of two input vectors.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:347
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:436
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:368
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:503
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:370
bool empty() const
Definition: SmallVector.h:95
size_t size() const
Definition: SmallVector.h:92
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:587
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:951
iterator erase(const_iterator CI)
Definition: SmallVector.h:751
void push_back(const T &Elt)
Definition: SmallVector.h:427
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1210
static Twine utohexstr(const uint64_t &Val)
Definition: Twine.h:416
void set(Value *Val)
Definition: Value.h:882
const Use & getOperandUse(unsigned i) const
Definition: User.h:182
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
void printAsOperand(raw_ostream &O, bool PrintType=true, const Module *M=nullptr) const
Print the name of this Value out to the specified raw_ostream.
Definition: AsmWriter.cpp:5106
An opaque object representing a hash code.
Definition: Hashing.h:75
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
ArchKind & operator--(ArchKind &Kind)
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ Volatile
Definition: NVPTX.h:123
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
constexpr double e
Definition: MathExtras.h:47
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void stable_sort(R &&Range)
Definition: STLExtras.h:1995
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
bool isEqual(const GCNRPTracker::LiveRegSet &S1, const GCNRPTracker::LiveRegSet &S2)
APInt operator*(APInt a, uint64_t RHS)
Definition: APInt.h:2184
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2073
bool isStrongerThanUnordered(AtomicOrdering AO)
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
bool operator>(int64_t V1, const APSInt &V2)
Definition: APSInt.h:362
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1647
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool is_sorted(R &&Range, Compare C)
Wrapper function around std::is_sorted to check if elements in a range R are sorted with respect to a...
Definition: STLExtras.h:1902
void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
Definition: Local.cpp:3345
BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
bool canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx)
Given an instruction, is it legal to set operand OpIdx to a non-constant value?
Definition: Local.cpp:4113
auto max_element(R &&Range)
Definition: STLExtras.h:1986
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:293
OutputIt copy(R &&Range, OutputIt Out)
Definition: STLExtras.h:1824
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2057
auto predecessors(const MachineBasicBlock *BB)
bool pred_empty(const BasicBlock *BB)
Definition: CFG.h:118
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
Definition: STLExtras.h:2045
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
Definition: Hashing.h:593
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
Definition: Hashing.h:471
#define N
An information struct used to provide DenseMap with the various necessary components for a given valu...
Definition: DenseMapInfo.h:52
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Run the pass over the function.
Definition: GVNSink.cpp:940
Function object to check whether the second component of a container supported by std::get (like std:...
Definition: STLExtras.h:1459