LLVM 20.0.0git
MoveAutoInit.cpp
Go to the documentation of this file.
1//===-- MoveAutoInit.cpp - move auto-init inst closer to their use site----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass moves instruction maked as auto-init closer to the basic block that
10// use it, eventually removing it from some control path of the function.
11//
12//===----------------------------------------------------------------------===//
13
15#include "llvm/ADT/STLExtras.h"
16#include "llvm/ADT/Statistic.h"
20#include "llvm/IR/DebugInfo.h"
21#include "llvm/IR/Dominators.h"
22#include "llvm/IR/IRBuilder.h"
28
29using namespace llvm;
30
31#define DEBUG_TYPE "move-auto-init"
32
33STATISTIC(NumMoved, "Number of instructions moved");
34
36 "move-auto-init-threshold", cl::Hidden, cl::init(128),
37 cl::desc("Maximum instructions to analyze per moved initialization"));
38
39static bool hasAutoInitMetadata(const Instruction &I) {
40 return I.hasMetadata(LLVMContext::MD_annotation) &&
41 any_of(I.getMetadata(LLVMContext::MD_annotation)->operands(),
42 [](const MDOperand &Op) { return Op.equalsStr("auto-init"); });
43}
44
45static std::optional<MemoryLocation> writeToAlloca(const Instruction &I) {
47 if (auto *MI = dyn_cast<MemIntrinsic>(&I))
49 else if (auto *SI = dyn_cast<StoreInst>(&I))
51 else
52 return std::nullopt;
53
54 if (isa<AllocaInst>(getUnderlyingObject(ML.Ptr)))
55 return ML;
56 else
57 return {};
58}
59
60/// Finds a BasicBlock in the CFG where instruction `I` can be moved to while
61/// not changing the Memory SSA ordering and being guarded by at least one
62/// condition.
64 DominatorTree &DT, MemorySSA &MSSA) {
65 BasicBlock *CurrentDominator = nullptr;
66 MemoryUseOrDef &IMA = *MSSA.getMemoryAccess(I);
67 BatchAAResults AA(MSSA.getAA());
68
70
71 auto AsMemoryAccess = [](User *U) { return cast<MemoryAccess>(U); };
72 SmallVector<MemoryAccess *> WorkList(map_range(IMA.users(), AsMemoryAccess));
73
74 while (!WorkList.empty()) {
75 MemoryAccess *MA = WorkList.pop_back_val();
76 if (!Visited.insert(MA).second)
77 continue;
78
79 if (Visited.size() > MoveAutoInitThreshold)
80 return nullptr;
81
82 bool FoundClobberingUser = false;
83 if (auto *M = dyn_cast<MemoryUseOrDef>(MA)) {
84 Instruction *MI = M->getMemoryInst();
85
86 // If this memory instruction may not clobber `I`, we can skip it.
87 // LifetimeEnd is a valid user, but we do not want it in the user
88 // dominator.
89 if (AA.getModRefInfo(MI, ML) != ModRefInfo::NoModRef &&
90 !MI->isLifetimeStartOrEnd() && MI != I) {
91 FoundClobberingUser = true;
92 CurrentDominator = CurrentDominator
93 ? DT.findNearestCommonDominator(CurrentDominator,
94 MI->getParent())
95 : MI->getParent();
96 }
97 }
98 if (!FoundClobberingUser) {
99 auto UsersAsMemoryAccesses = map_range(MA->users(), AsMemoryAccess);
100 append_range(WorkList, UsersAsMemoryAccesses);
101 }
102 }
103 return CurrentDominator;
104}
105
107 BasicBlock &EntryBB = F.getEntryBlock();
109
110 //
111 // Compute movable instructions.
112 //
113 for (Instruction &I : EntryBB) {
115 continue;
116
117 std::optional<MemoryLocation> ML = writeToAlloca(I);
118 if (!ML)
119 continue;
120
121 if (I.isVolatile())
122 continue;
123
124 BasicBlock *UsersDominator = usersDominator(ML.value(), &I, DT, MSSA);
125 if (!UsersDominator)
126 continue;
127
128 if (UsersDominator == &EntryBB)
129 continue;
130
131 // Traverse the CFG to detect cycles `UsersDominator` would be part of.
132 SmallPtrSet<BasicBlock *, 8> TransitiveSuccessors;
133 SmallVector<BasicBlock *> WorkList(successors(UsersDominator));
134 bool HasCycle = false;
135 while (!WorkList.empty()) {
136 BasicBlock *CurrBB = WorkList.pop_back_val();
137 if (CurrBB == UsersDominator)
138 // No early exit because we want to compute the full set of transitive
139 // successors.
140 HasCycle = true;
141 for (BasicBlock *Successor : successors(CurrBB)) {
142 if (!TransitiveSuccessors.insert(Successor).second)
143 continue;
144 WorkList.push_back(Successor);
145 }
146 }
147
148 // Don't insert if that could create multiple execution of I,
149 // but we can insert it in the non back-edge predecessors, if it exists.
150 if (HasCycle) {
151 BasicBlock *UsersDominatorHead = UsersDominator;
152 while (BasicBlock *UniquePredecessor =
153 UsersDominatorHead->getUniquePredecessor())
154 UsersDominatorHead = UniquePredecessor;
155
156 if (UsersDominatorHead == &EntryBB)
157 continue;
158
159 BasicBlock *DominatingPredecessor = nullptr;
160 for (BasicBlock *Pred : predecessors(UsersDominatorHead)) {
161 // If one of the predecessor of the dominator also transitively is a
162 // successor, moving to the dominator would do the inverse of loop
163 // hoisting, and we don't want that.
164 if (TransitiveSuccessors.count(Pred))
165 continue;
166
167 if (!DT.isReachableFromEntry(Pred))
168 continue;
169
170 DominatingPredecessor =
171 DominatingPredecessor
172 ? DT.findNearestCommonDominator(DominatingPredecessor, Pred)
173 : Pred;
174 }
175
176 if (!DominatingPredecessor || DominatingPredecessor == &EntryBB)
177 continue;
178
179 UsersDominator = DominatingPredecessor;
180 }
181
182 // CatchSwitchInst blocks can only have one instruction, so they are not
183 // good candidates for insertion.
184 while (isa<CatchSwitchInst>(UsersDominator->getFirstNonPHI())) {
185 for (BasicBlock *Pred : predecessors(UsersDominator))
186 if (DT.isReachableFromEntry(Pred))
187 UsersDominator = DT.findNearestCommonDominator(UsersDominator, Pred);
188 }
189
190 // We finally found a place where I can be moved while not introducing extra
191 // execution, and guarded by at least one condition.
192 if (UsersDominator != &EntryBB)
193 JobList.emplace_back(&I, UsersDominator);
194 }
195
196 //
197 // Perform the actual substitution.
198 //
199 if (JobList.empty())
200 return false;
201
202 MemorySSAUpdater MSSAU(&MSSA);
203
204 // Reverse insertion to respect relative order between instructions:
205 // if two instructions are moved from the same BB to the same BB, we insert
206 // the second one in the front, then the first on top of it.
207 for (auto &Job : reverse(JobList)) {
208 Job.first->moveBefore(*Job.second, Job.second->getFirstInsertionPt());
209 MSSAU.moveToPlace(MSSA.getMemoryAccess(Job.first), Job.first->getParent(),
210 MemorySSA::InsertionPlace::Beginning);
211 }
212
213 if (VerifyMemorySSA)
214 MSSA.verifyMemorySSA();
215
216 NumMoved += JobList.size();
217
218 return true;
219}
220
223
224 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
225 auto &MSSA = AM.getResult<MemorySSAAnalysis>(F).getMSSA();
226 if (!runMoveAutoInit(F, DT, MSSA))
227 return PreservedAnalyses::all();
228
233 return PA;
234}
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
static cl::opt< unsigned > MoveAutoInitThreshold("move-auto-init-threshold", cl::Hidden, cl::init(128), cl::desc("Maximum instructions to analyze per moved initialization"))
static bool runMoveAutoInit(Function &F, DominatorTree &DT, MemorySSA &MSSA)
static BasicBlock * usersDominator(const MemoryLocation &ML, Instruction *I, DominatorTree &DT, MemorySSA &MSSA)
Finds a BasicBlock in the CFG where instruction I can be moved to while not changing the Memory SSA o...
static bool hasAutoInitMetadata(const Instruction &I)
static std::optional< MemoryLocation > writeToAlloca(const Instruction &I)
This file contains some templates that are useful if you are working with the STL at all.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:405
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:367
const BasicBlock * getUniquePredecessor() const
Return the predecessor of this block if it has a unique predecessor block.
Definition: BasicBlock.cpp:467
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Represents analyses that only rely on functions' control flow.
Definition: Analysis.h:72
This class represents an Operation in the Expression.
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:321
Instruction * findNearestCommonDominator(Instruction *I1, Instruction *I2) const
Find the nearest instruction I that dominates both I1 and I2, in the sense that a result produced bef...
Definition: Dominators.cpp:344
Tracking metadata reference owned by Metadata.
Definition: Metadata.h:891
Representation for a specific memory location.
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
static MemoryLocation getForDest(const MemIntrinsic *MI)
Return a location representing the destination of a memory set or transfer.
An analysis that produces MemorySSA for a function.
Definition: MemorySSA.h:928
void moveToPlace(MemoryUseOrDef *What, BasicBlock *BB, MemorySSA::InsertionPlace Where)
Encapsulates MemorySSA, including all data associated with memory accesses.
Definition: MemorySSA.h:701
AliasAnalysis & getAA()
Definition: MemorySSA.h:799
void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
Definition: MemorySSA.cpp:1905
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
Definition: MemorySSA.h:719
Class that has the common methods + fields of memory uses/defs.
Definition: MemorySSA.h:253
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
void preserveSet()
Mark an analysis set as preserved.
Definition: Analysis.h:146
void preserve()
Mark an analysis as preserved.
Definition: Analysis.h:131
size_type size() const
Definition: SmallPtrSet.h:96
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:436
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:368
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:503
bool empty() const
Definition: SmallVector.h:95
size_t size() const
Definition: SmallVector.h:92
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:951
void push_back(const T &Elt)
Definition: SmallVector.h:427
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1210
iterator_range< user_iterator > users()
Definition: Value.h:421
const ParentTy * getParent() const
Definition: ilist_node.h:32
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto successors(const MachineBasicBlock *BB)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2073
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
auto map_range(ContainerTy &&C, FuncTy F)
Definition: STLExtras.h:377
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
bool VerifyMemorySSA
Enables verification of MemorySSA.
Definition: MemorySSA.cpp:84
auto predecessors(const MachineBasicBlock *BB)