77#define DEBUG_TYPE "gvn-sink"
79STATISTIC(NumRemoved,
"Number of instructions removed");
82namespace GVNExpression {
95 return isa<LoadInst>(
I) || isa<StoreInst>(
I) ||
96 (isa<InvokeInst>(
I) && !cast<InvokeInst>(
I)->doesNotAccessMemory()) ||
97 (isa<CallInst>(
I) && !cast<CallInst>(
I)->doesNotAccessMemory());
112class LockstepReverseIterator {
125 ActiveBlocks.
clear();
130 if (BB->size() <= 1) {
135 Insts.
push_back(BB->getTerminator()->getPrevNonDebugInstruction());
154 if (!
Blocks.contains((*II)->getParent())) {
155 ActiveBlocks.
remove((*II)->getParent());
167 for (
auto *Inst : Insts) {
168 if (Inst == &Inst->getParent()->front())
169 ActiveBlocks.
remove(Inst->getParent());
171 NewInsts.
push_back(Inst->getPrevNonDebugInstruction());
173 if (NewInsts.
empty()) {
187struct SinkingInstructionCandidate {
189 unsigned NumInstructions;
191 unsigned NumMemoryInsts;
195 void calculateCost(
unsigned NumOrigPHIs,
unsigned NumOrigBlocks) {
196 unsigned NumExtraPHIs = NumPHIs - NumOrigPHIs;
197 unsigned SplitEdgeCost = (NumOrigBlocks > NumBlocks) ? 2 : 0;
198 Cost = (NumInstructions * (NumBlocks - 1)) -
211 OS <<
"<Candidate Cost=" <<
C.Cost <<
" #Blocks=" <<
C.NumBlocks
212 <<
" #Insts=" <<
C.NumInstructions <<
" #PHIs=" <<
C.NumPHIs <<
">";
227 ModelledPHI() =
default;
234 using OpsType = std::pair<BasicBlock *, Value *>;
239 auto ComesBefore = [BlockOrder](OpsType O1, OpsType O2) {
240 return BlockOrder.
lookup(O1.first) < BlockOrder.
lookup(O2.first);
245 for (
auto &
P : Ops) {
254 static ModelledPHI createDummy(
size_t ID) {
256 M.Values.push_back(
reinterpret_cast<Value*
>(
ID));
263 "Modelling PHI with less than 2 values");
264 auto ComesBefore = [BlockOrder](
const BasicBlock *BB1,
270 for (
const Value *V : Values) {
271 if (!isa<UndefValue>(V)) {
285 verifyModelledPHI(BlockOrder);
293 for (
auto *
I : Insts)
294 Values.push_back(
I->getOperand(OpNum));
301 auto VI = Values.begin();
302 while (BI !=
Blocks.end()) {
303 assert(VI != Values.end());
306 VI = Values.erase(VI);
317 bool areAllIncomingValuesSame()
const {
321 bool areAllIncomingValuesSameType()
const {
323 Values, [&](
Value *V) {
return V->getType() == Values[0]->
getType(); });
326 bool areAnyIncomingValuesConstant()
const {
331 unsigned hash()
const {
342 static inline ModelledPHI &getEmptyKey() {
343 static ModelledPHI
Dummy = ModelledPHI::createDummy(0);
347 static inline ModelledPHI &getTombstoneKey() {
348 static ModelledPHI
Dummy = ModelledPHI::createDummy(1);
352 static unsigned getHashValue(
const ModelledPHI &V) {
return V.hash(); }
354 static bool isEqual(
const ModelledPHI &LHS,
const ModelledPHI &RHS) {
375 unsigned MemoryUseOrder = -1;
382 : GVNExpression::BasicExpression(
I->getNumUses()) {
388 ShuffleMask = SVI->getShuffleMask().
copy(
A);
390 for (
auto &U :
I->uses())
395 void setMemoryUseOrder(
unsigned MUO) { MemoryUseOrder = MUO; }
396 void setVolatile(
bool V) {
Volatile =
V; }
400 MemoryUseOrder, Volatile, ShuffleMask);
421 BasicBlocksSet ReachableBBs;
427 InstructionUseExpr *E =
430 E->setMemoryUseOrder(getMemoryUseOrder(
I));
434 E->setOpcode((
C->getOpcode() << 8) |
Predicate);
442 template <
class Inst> InstructionUseExpr *createMemoryExpr(Inst *
I) {
445 InstructionUseExpr *E = createExpr(
I);
446 E->setVolatile(
I->isVolatile());
454 void setReachableBBs(
const BasicBlocksSet &ReachableBBs) {
455 this->ReachableBBs = ReachableBBs;
461 auto VI = ValueNumbering.
find(V);
462 if (VI != ValueNumbering.
end())
465 if (!isa<Instruction>(V)) {
466 ValueNumbering[
V] = nextValueNumber;
467 return nextValueNumber++;
471 if (!ReachableBBs.contains(
I->getParent()))
474 InstructionUseExpr *exp =
nullptr;
475 switch (
I->getOpcode()) {
476 case Instruction::Load:
477 exp = createMemoryExpr(cast<LoadInst>(
I));
479 case Instruction::Store:
480 exp = createMemoryExpr(cast<StoreInst>(
I));
482 case Instruction::Call:
483 case Instruction::Invoke:
484 case Instruction::FNeg:
485 case Instruction::Add:
486 case Instruction::FAdd:
487 case Instruction::Sub:
488 case Instruction::FSub:
489 case Instruction::Mul:
490 case Instruction::FMul:
491 case Instruction::UDiv:
492 case Instruction::SDiv:
493 case Instruction::FDiv:
494 case Instruction::URem:
495 case Instruction::SRem:
496 case Instruction::FRem:
497 case Instruction::Shl:
498 case Instruction::LShr:
499 case Instruction::AShr:
500 case Instruction::And:
501 case Instruction::Or:
502 case Instruction::Xor:
503 case Instruction::ICmp:
504 case Instruction::FCmp:
505 case Instruction::Trunc:
506 case Instruction::ZExt:
507 case Instruction::SExt:
508 case Instruction::FPToUI:
509 case Instruction::FPToSI:
510 case Instruction::UIToFP:
511 case Instruction::SIToFP:
512 case Instruction::FPTrunc:
513 case Instruction::FPExt:
514 case Instruction::PtrToInt:
515 case Instruction::IntToPtr:
516 case Instruction::BitCast:
517 case Instruction::AddrSpaceCast:
518 case Instruction::Select:
519 case Instruction::ExtractElement:
520 case Instruction::InsertElement:
521 case Instruction::ShuffleVector:
522 case Instruction::InsertValue:
523 case Instruction::GetElementPtr:
531 ValueNumbering[
V] = nextValueNumber;
532 return nextValueNumber++;
537 hash_code H = exp->getHashValue([=](
Value *V) {
return lookupOrAdd(V); });
538 auto I = HashNumbering.
find(
H);
539 if (
I != HashNumbering.
end()) {
542 e = nextValueNumber++;
543 HashNumbering[
H] =
e;
544 ExpressionNumbering[exp] =
e;
547 ValueNumbering[
V] =
e;
554 auto VI = ValueNumbering.
find(V);
555 assert(VI != ValueNumbering.
end() &&
"Value not numbered?");
561 ValueNumbering.
clear();
562 ExpressionNumbering.
clear();
563 HashNumbering.
clear();
580 for (
auto I = std::next(Inst->
getIterator()), E = BB->end();
581 I != E && !
I->isTerminator(); ++
I) {
582 if (!isMemoryInst(&*
I))
584 if (isa<LoadInst>(&*
I))
590 if (
II &&
II->onlyReadsMemory())
592 return lookupOrAdd(&*
I);
608 unsigned NumSunk = 0;
610 VN.setReachableBBs(BasicBlocksSet(RPOT.begin(), RPOT.end()));
616 unsigned NodeOrdering = 0;
617 RPOTOrder[*RPOT.begin()] = ++NodeOrdering;
618 for (
auto *BB : RPOT)
620 RPOTOrder[BB] = ++NodeOrdering;
622 NumSunk += sinkBB(
N);
633 if (isa<PHINode>(
I) ||
I->isEHPad() || isa<AllocaInst>(
I) ||
634 I->getType()->isTokenTy())
642 std::optional<SinkingInstructionCandidate> analyzeInstructionForSinking(
643 LockstepReverseIterator &LRI,
unsigned &InstNum,
unsigned &MemoryInstNum,
647 void analyzeInitialPHIs(
BasicBlock *BB, ModelledPHISet &PHIs,
650 auto MPHI = ModelledPHI(&PN, RPOTOrder);
652 for (
auto *V : MPHI.getValues())
668 while (
PHINode *PN = dyn_cast<PHINode>(
I++)) {
670 return V == PN->getIncomingValue(0);
682std::optional<SinkingInstructionCandidate>
683GVNSink::analyzeInstructionForSinking(LockstepReverseIterator &LRI,
685 unsigned &MemoryInstNum,
686 ModelledPHISet &NeededPHIs,
689 LLVM_DEBUG(
dbgs() <<
" -- Analyzing instruction set: [\n";
for (
auto *
I
692 }
dbgs() <<
" ]\n";);
695 for (
auto *
I : Insts) {
704 if (VNums[VNumToSink] == 1)
710 auto &ActivePreds = LRI.getActiveBlocks();
711 unsigned InitialActivePredSize = ActivePreds.size();
713 for (
auto *
I : Insts) {
714 if (VN.lookup(
I) != VNumToSink)
715 ActivePreds.remove(
I->getParent());
719 for (
auto *
I : NewInsts)
720 if (shouldAvoidSinkingInstruction(
I))
725 bool RecomputePHIContents =
false;
726 if (ActivePreds.size() != InitialActivePredSize) {
727 ModelledPHISet NewNeededPHIs;
728 for (
auto P : NeededPHIs) {
729 P.restrictToBlocks(ActivePreds);
730 NewNeededPHIs.insert(
P);
732 NeededPHIs = NewNeededPHIs;
733 LRI.restrictToBlocks(ActivePreds);
734 RecomputePHIContents =
true;
738 ModelledPHI NewPHI(NewInsts, ActivePreds, RPOTOrder);
741 if (NeededPHIs.erase(NewPHI))
742 RecomputePHIContents =
true;
744 if (RecomputePHIContents) {
748 for (
auto &
PHI : NeededPHIs)
749 PHIContents.
insert(
PHI.getValues().begin(),
PHI.getValues().end());
754 for (
auto *V : NewPHI.getValues())
755 if (PHIContents.
count(V))
770 if (
any_of(NewInsts, isNotSameOperation))
773 for (
unsigned OpNum = 0, E = I0->
getNumOperands(); OpNum != E; ++OpNum) {
774 ModelledPHI
PHI(NewInsts, OpNum, ActivePreds);
775 if (
PHI.areAllIncomingValuesSame())
780 if (NeededPHIs.count(
PHI))
782 if (!
PHI.areAllIncomingValuesSameType())
785 if ((isa<CallInst>(I0) || isa<InvokeInst>(I0)) && OpNum == E - 1 &&
786 PHI.areAnyIncomingValuesConstant())
789 NeededPHIs.reserve(NeededPHIs.size());
790 NeededPHIs.insert(
PHI);
791 PHIContents.
insert(
PHI.getValues().begin(),
PHI.getValues().end());
794 if (isMemoryInst(NewInsts[0]))
797 SinkingInstructionCandidate Cand;
798 Cand.NumInstructions = ++InstNum;
799 Cand.NumMemoryInsts = MemoryInstNum;
800 Cand.NumBlocks = ActivePreds.size();
801 Cand.NumPHIs = NeededPHIs.size();
815 auto *
T =
B->getTerminator();
816 if (isa<BranchInst>(
T) || isa<SwitchInst>(
T))
821 if (Preds.size() < 2)
829 unsigned NumOrigPreds = Preds.size();
835 LockstepReverseIterator LRI(Preds);
837 unsigned InstNum = 0, MemoryInstNum = 0;
838 ModelledPHISet NeededPHIs;
840 analyzeInitialPHIs(BBEnd, NeededPHIs, PHIContents);
841 unsigned NumOrigPHIs = NeededPHIs.size();
843 while (LRI.isValid()) {
844 auto Cand = analyzeInstructionForSinking(LRI, InstNum, MemoryInstNum,
845 NeededPHIs, PHIContents);
848 Cand->calculateCost(NumOrigPHIs, Preds.size());
856 <<
" " <<
C <<
"\n";);
859 if (Candidates.empty() || Candidates.front().Cost <= 0)
861 auto C = Candidates.front();
865 if (
C.Blocks.size() < NumOrigPreds) {
876 for (
unsigned I = 0;
I <
C.NumInstructions; ++
I)
879 return C.NumInstructions;
901 assert(!
Op->getType()->isTokenTy() &&
"Can't PHI tokens!");
905 for (
auto *
I : Insts)
917 for (
auto *
I : Insts)
923 for (
auto *
I : Insts)
925 I->replaceAllUsesWith(I0);
928 foldPointlessPHINodes(BBEnd);
931 for (
auto *
I : Insts)
933 I->eraseFromParent();
935 NumRemoved += Insts.size() - 1;
This file defines the BumpPtrAllocator interface.
Atomic ordering constants.
static const Function * getParent(const Value *V)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static void clear(coro::Shape &Shape)
This file defines the DenseMap class.
This file defines the DenseSet and SmallDenseSet classes.
std::optional< std::vector< StOtherPiece > > Other
DenseMap< Block *, BlockRelaxAux > Blocks
The header file for the GVN pass that contains expression handling classes.
This file provides the interface for LLVM's Global Value Numbering pass which eliminates fully redund...
This is the interface for a simple mod/ref and alias analysis over globals.
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
This defines the Use class.
static bool lookup(const GsymReader &GR, DataExtractor &Data, uint64_t &Offset, uint64_t BaseAddr, uint64_t Addr, SourceLocations &SrcLocs, llvm::Error &Err)
A Lookup helper functions.
uint64_t IntrinsicInst * II
This header defines various interfaces for pass management in LLVM.
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static void sinkLastInstruction(ArrayRef< BasicBlock * > Blocks)
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static SymbolRef::Type getType(const Symbol *Sym)
A container for analyses that lazily runs them and caches their results.
Recycle small arrays allocated from a BumpPtrAllocator.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
MutableArrayRef< T > copy(Allocator &A)
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Allocate memory in an ever growing pool, as if by bump-pointer.
bool onlyReadsMemory(unsigned OpNo) const
This class represents a function call, abstracting a target machine's calling convention.
This class is the base class for the comparison instructions.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
This class represents an Operation in the Expression.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Implements a dense probed hash-table based set.
void allocateOperands(RecyclerType &Recycler, BumpPtrAllocator &Allocator)
void op_push_back(Value *Arg)
hash_code getHashValue() const override
iterator_range< op_iterator > operands()
unsigned getOpcode() const
LLVM_DUMP_METHOD void dump() const
void setOpcode(unsigned opcode)
void print(raw_ostream &OS) const
bool isSameOperationAs(const Instruction *I, unsigned flags=0) const LLVM_READONLY
This function determines if the specified instruction executes the same operation as the current one.
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
void andIRFlags(const Value *V)
Logical 'and' of any supported wrapping, exact, and fast-math flags of V and this instruction.
const Instruction * getPrevNonDebugInstruction(bool SkipPseudoOp=false) const
Return a pointer to the previous non-debug instruction in the same basic block as 'this',...
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
void applyMergedLocation(DILocation *LocA, DILocation *LocB)
Merge 2 debug locations and apply it to the Instruction.
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Recycler - This class manages a linked-list of deallocated nodes and facilitates reusing deallocated ...
void clear(AllocatorType &Allocator)
clear - Release all the tracked allocations to the allocator.
bool remove(const value_type &X)
Remove an item from the set vector.
size_type size() const
Determine the number of elements in the SetVector.
void clear()
Completely clear the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
bool contains(const key_type &key) const
Check if the SetVector contains the given key.
This instruction constructs a fixed permutation of two input vectors.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
A SetVector that performs no allocations if smaller than a certain size.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
iterator erase(const_iterator CI)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
static Twine utohexstr(const uint64_t &Val)
const Use & getOperandUse(unsigned i) const
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
void printAsOperand(raw_ostream &O, bool PrintType=true, const Module *M=nullptr) const
Print the name of this Value out to the specified raw_ostream.
An opaque object representing a hash code.
const ParentTy * getParent() const
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
ArchKind & operator--(ArchKind &Kind)
@ C
The default llvm calling convention, compatible with C.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
This is an optimization pass for GlobalISel generic memory operations.
void stable_sort(R &&Range)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
bool isEqual(const GCNRPTracker::LiveRegSet &S1, const GCNRPTracker::LiveRegSet &S2)
APInt operator*(APInt a, uint64_t RHS)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
bool isStrongerThanUnordered(AtomicOrdering AO)
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
bool operator>(int64_t V1, const APSInt &V2)
void sort(IteratorTy Start, IteratorTy End)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool is_sorted(R &&Range, Compare C)
Wrapper function around std::is_sorted to check if elements in a range R are sorted with respect to a...
void combineMetadataForCSE(Instruction *K, const Instruction *J, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
BasicBlock * SplitBlockPredecessors(BasicBlock *BB, ArrayRef< BasicBlock * > Preds, const char *Suffix, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, bool PreserveLCSSA=false)
This method introduces at least one new basic block into the function and moves some of the predecess...
bool canReplaceOperandWithVariable(const Instruction *I, unsigned OpIdx)
Given an instruction, is it legal to set operand OpIdx to a non-constant value?
auto max_element(R &&Range)
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
OutputIt copy(R &&Range, OutputIt Out)
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
auto predecessors(const MachineBasicBlock *BB)
bool pred_empty(const BasicBlock *BB)
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
hash_code hash_combine(const Ts &...args)
Combine values into a single hash_code.
hash_code hash_combine_range(InputIteratorT first, InputIteratorT last)
Compute a hash_code for a sequence of values.
An information struct used to provide DenseMap with the various necessary components for a given valu...
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Run the pass over the function.
Function object to check whether the second component of a container supported by std::get (like std:...