LLVM 23.0.0git
MaterializationUtils.cpp
Go to the documentation of this file.
1//===- MaterializationUtils.cpp - Builds and manipulates coroutine frame
2//-------------===//
3//
4// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5// See https://llvm.org/LICENSE.txt for license information.
6// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7//
8//===----------------------------------------------------------------------===//
9// This file contains classes used to materialize insts after suspends points.
10//===----------------------------------------------------------------------===//
11
13#include "CoroInternal.h"
15#include "llvm/IR/Dominators.h"
17#include "llvm/IR/Instruction.h"
21#include <deque>
22
23using namespace llvm;
24
25using namespace coro;
26
27// The "coro-suspend-crossing" flag is very noisy. There is another debug type,
28// "coro-frame", which results in leaner debug spew.
29#define DEBUG_TYPE "coro-suspend-crossing"
30
31namespace {
32
33// RematGraph is used to construct a DAG for rematerializable instructions
34// When the constructor is invoked with a candidate instruction (which is
35// materializable) it builds a DAG of materializable instructions from that
36// point.
37// Typically, for each instruction identified as re-materializable across a
38// suspend point, a RematGraph will be created.
39struct RematGraph {
40 // Each RematNode in the graph contains the edges to instructions providing
41 // operands in the current node.
42 struct RematNode {
45 RematNode() = default;
46 RematNode(Instruction *V) : Node(V) {}
47 };
48
49 RematNode *EntryNode;
50 using RematNodeMap =
52 RematNodeMap Remats;
53 const std::function<bool(Instruction &)> &MaterializableCallback;
54 SuspendCrossingInfo &Checker;
55
56 RematGraph(const std::function<bool(Instruction &)> &MaterializableCallback,
58 : MaterializableCallback(MaterializableCallback), Checker(Checker) {
59 std::unique_ptr<RematNode> FirstNode = std::make_unique<RematNode>(I);
60 EntryNode = FirstNode.get();
61 std::deque<std::unique_ptr<RematNode>> WorkList;
62 addNode(std::move(FirstNode), WorkList, cast<User>(I));
63 while (WorkList.size()) {
64 std::unique_ptr<RematNode> N = std::move(WorkList.front());
65 WorkList.pop_front();
66 addNode(std::move(N), WorkList, cast<User>(I));
67 }
68 }
69
70 void addNode(std::unique_ptr<RematNode> NUPtr,
71 std::deque<std::unique_ptr<RematNode>> &WorkList,
72 User *FirstUse) {
73 RematNode *N = NUPtr.get();
74 auto [It, Inserted] = Remats.try_emplace(N->Node);
75 if (!Inserted)
76 return;
77
78 // We haven't see this node yet - add to the list
79 It->second = std::move(NUPtr);
80 for (auto &Def : N->Node->operands()) {
82 if (!D || !MaterializableCallback(*D) ||
83 !Checker.isDefinitionAcrossSuspend(*D, FirstUse))
84 continue;
85
86 if (auto It = Remats.find(D); It != Remats.end()) {
87 // Already have this in the graph
88 N->Operands.push_back(It->second.get());
89 continue;
90 }
91
92 bool NoMatch = true;
93 for (auto &I : WorkList) {
94 if (I->Node == D) {
95 NoMatch = false;
96 N->Operands.push_back(I.get());
97 break;
98 }
99 }
100 if (NoMatch) {
101 // Create a new node
102 std::unique_ptr<RematNode> ChildNode = std::make_unique<RematNode>(D);
103 N->Operands.push_back(ChildNode.get());
104 WorkList.push_back(std::move(ChildNode));
105 }
106 }
107 }
108
109#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
110 static void dumpBasicBlockLabel(const BasicBlock *BB,
111 ModuleSlotTracker &MST) {
112 if (BB->hasName()) {
113 dbgs() << BB->getName();
114 return;
115 }
116
117 dbgs() << MST.getLocalSlot(BB);
118 }
119
120 void dump() const {
121 BasicBlock *BB = EntryNode->Node->getParent();
122 Function *F = BB->getParent();
123
124 ModuleSlotTracker MST(F->getParent());
126
127 dbgs() << "Entry (";
128 dumpBasicBlockLabel(BB, MST);
129 dbgs() << ") : " << *EntryNode->Node << "\n";
130 for (auto &E : Remats) {
131 dbgs() << *(E.first) << "\n";
132 for (RematNode *U : E.second->Operands)
133 dbgs() << " " << *U->Node << "\n";
134 }
135 }
136#endif
137};
138
139} // namespace
140
141template <> struct llvm::GraphTraits<RematGraph *> {
142 using NodeRef = RematGraph::RematNode *;
143 using ChildIteratorType = RematGraph::RematNode **;
144
145 static NodeRef getEntryNode(RematGraph *G) { return G->EntryNode; }
147 return N->Operands.begin();
148 }
149 static ChildIteratorType child_end(NodeRef N) { return N->Operands.end(); }
150};
151
152// For each instruction identified as materializable across the suspend point,
153// and its associated DAG of other rematerializable instructions,
154// recreate the DAG of instructions after the suspend point.
156 const SmallMapVector<Instruction *, std::unique_ptr<RematGraph>, 8>
157 &AllRemats) {
158 // This has to be done in 2 phases
159 // Do the remats and record the required defs to be replaced in the
160 // original use instructions
161 // Once all the remats are complete, replace the uses in the final
162 // instructions with the new defs
163 typedef struct {
165 Instruction *Def;
166 Instruction *Remat;
167 } ProcessNode;
168
169 SmallVector<ProcessNode> FinalInstructionsToProcess;
170
171 for (const auto &E : AllRemats) {
172 Instruction *Use = E.first;
173 Instruction *CurrentMaterialization = nullptr;
174 RematGraph *RG = E.second.get();
176 SmallVector<Instruction *> InstructionsToProcess;
177
178 // If the target use is actually a suspend instruction then we have to
179 // insert the remats into the end of the predecessor (there should only be
180 // one). This is so that suspend blocks always have the suspend instruction
181 // as the first instruction.
182 BasicBlock::iterator InsertPoint = Use->getParent()->getFirstInsertionPt();
184 BasicBlock *SuspendPredecessorBlock =
185 Use->getParent()->getSinglePredecessor();
186 assert(SuspendPredecessorBlock && "malformed coro suspend instruction");
187 InsertPoint = SuspendPredecessorBlock->getTerminator()->getIterator();
188 }
189
190 // Note: skip the first instruction as this is the actual use that we're
191 // rematerializing everything for.
192 auto I = RPOT.begin();
193 ++I;
194 for (; I != RPOT.end(); ++I) {
195 Instruction *D = (*I)->Node;
196 CurrentMaterialization = D->clone();
197 CurrentMaterialization->setName(D->getName());
198 CurrentMaterialization->insertBefore(InsertPoint);
199 InsertPoint = CurrentMaterialization->getIterator();
200
201 // Replace all uses of Def in the instructions being added as part of this
202 // rematerialization group
203 for (auto &I : InstructionsToProcess)
204 I->replaceUsesOfWith(D, CurrentMaterialization);
205
206 // Don't replace the final use at this point as this can cause problems
207 // for other materializations. Instead, for any final use that uses a
208 // define that's being rematerialized, record the replace values
209 for (unsigned i = 0, E = Use->getNumOperands(); i != E; ++i)
210 if (Use->getOperand(i) == D) // Is this operand pointing to oldval?
211 FinalInstructionsToProcess.push_back(
212 {Use, D, CurrentMaterialization});
213
214 InstructionsToProcess.push_back(CurrentMaterialization);
215 }
216 }
217
218 // Finally, replace the uses with the defines that we've just rematerialized
219 for (auto &R : FinalInstructionsToProcess) {
220 if (auto *PN = dyn_cast<PHINode>(R.Use)) {
221 assert(PN->getNumIncomingValues() == 1 && "unexpected number of incoming "
222 "values in the PHINode");
223 PN->replaceAllUsesWith(R.Remat);
224 PN->eraseFromParent();
225 continue;
226 }
227 R.Use->replaceUsesOfWith(R.Def, R.Remat);
228 }
229}
230
231/// Default materializable callback
232// Check for instructions that we can recreate on resume as opposed to spill
233// the result into a coroutine frame.
235 if (isa<CastInst>(&V) || isa<GetElementPtrInst>(&V) ||
237 isa<SelectInst>(&V))
238 return true;
239
240 if (auto *II = dyn_cast<IntrinsicInst>(&V)) {
241 switch (II->getIntrinsicID()) {
242 // Floating-point unary math
243 case Intrinsic::fabs:
244 case Intrinsic::sqrt:
245 case Intrinsic::sin:
246 case Intrinsic::cos:
247 case Intrinsic::exp2:
248 case Intrinsic::log2:
249 // Floating-point binary math
250 case Intrinsic::minnum:
251 case Intrinsic::maxnum:
252 case Intrinsic::minimum:
253 case Intrinsic::maximum:
254 case Intrinsic::copysign:
255 case Intrinsic::ldexp:
256 // Floating-point conversion
257 case Intrinsic::fptoui_sat:
258 case Intrinsic::fptosi_sat:
259 case Intrinsic::is_fpclass:
260 // Rounding
261 case Intrinsic::floor:
262 case Intrinsic::ceil:
263 case Intrinsic::trunc:
264 case Intrinsic::rint:
265 case Intrinsic::nearbyint:
266 case Intrinsic::round:
267 case Intrinsic::roundeven:
268 // Integer bit ops
269 case Intrinsic::ctpop:
270 case Intrinsic::ctlz:
271 case Intrinsic::cttz:
272 case Intrinsic::bitreverse:
273 // Integer saturating arithmetic
274 case Intrinsic::sadd_sat:
275 case Intrinsic::uadd_sat:
276 case Intrinsic::ssub_sat:
277 case Intrinsic::usub_sat:
278 // Integer min and max
279 case Intrinsic::smax:
280 case Intrinsic::smin:
281 case Intrinsic::umax:
282 case Intrinsic::umin:
283 return true;
284 default:
285 break;
286 }
287 }
288
289 return false;
290}
291
295
296#ifndef NDEBUG
297static void dumpRemats(
298 StringRef Title,
299 const SmallMapVector<Instruction *, std::unique_ptr<RematGraph>, 8> &RM) {
300 dbgs() << "------------- " << Title << "--------------\n";
301 for (const auto &E : RM) {
302 E.second->dump();
303 dbgs() << "--\n";
304 }
305}
306#endif
307
309 Function &F, SuspendCrossingInfo &Checker,
310 std::function<bool(Instruction &)> IsMaterializable) {
311 if (F.hasOptNone())
312 return;
313
314 coro::SpillInfo Spills;
315
316 // See if there are materializable instructions across suspend points
317 // We record these as the starting point to also identify materializable
318 // defs of uses in these operations
319 for (Instruction &I : instructions(F)) {
320 if (!IsMaterializable(I))
321 continue;
322 for (User *U : I.users())
323 if (Checker.isDefinitionAcrossSuspend(I, U))
324 Spills[&I].push_back(cast<Instruction>(U));
325 }
326
327 // Process each of the identified rematerializable instructions
328 // and add predecessor instructions that can also be rematerialized.
329 // This is actually a graph of instructions since we could potentially
330 // have multiple uses of a def in the set of predecessor instructions.
331 // The approach here is to maintain a graph of instructions for each bottom
332 // level instruction - where we have a unique set of instructions (nodes)
333 // and edges between them. We then walk the graph in reverse post-dominator
334 // order to insert them past the suspend point, but ensure that ordering is
335 // correct. We also rely on CSE removing duplicate defs for remats of
336 // different instructions with a def in common (rather than maintaining more
337 // complex graphs for each suspend point)
338
339 // We can do this by adding new nodes to the list for each suspend
340 // point. Then using standard GraphTraits to give a reverse post-order
341 // traversal when we insert the nodes after the suspend
343 for (auto &E : Spills) {
344 for (Instruction *U : E.second) {
345 // Don't process a user twice (this can happen if the instruction uses
346 // more than one rematerializable def)
347 auto [It, Inserted] = AllRemats.try_emplace(U);
348 if (!Inserted)
349 continue;
350
351 // Constructor creates the whole RematGraph for the given Use
352 auto RematUPtr =
353 std::make_unique<RematGraph>(IsMaterializable, U, Checker);
354
355 LLVM_DEBUG(dbgs() << "***** Next remat group *****\n";
356 ReversePostOrderTraversal<RematGraph *> RPOT(RematUPtr.get());
357 for (auto I = RPOT.begin(); I != RPOT.end();
358 ++I) { (*I)->Node->dump(); } dbgs()
359 << "\n";);
360
361 It->second = std::move(RematUPtr);
362 }
363 }
364
365 // Rewrite materializable instructions to be materialized at the use
366 // point.
367 LLVM_DEBUG(dumpRemats("Materializations", AllRemats));
369}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Expand Atomic instructions
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define G(x, y, z)
Definition MD5.cpp:55
static void rewriteMaterializableInstructions(const SmallMapVector< Instruction *, std::unique_ptr< RematGraph >, 8 > &AllRemats)
static void dumpRemats(StringRef Title, const SmallMapVector< Instruction *, std::unique_ptr< RematGraph >, 8 > &RM)
uint64_t IntrinsicInst * II
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
#define LLVM_DEBUG(...)
Definition Debug.h:119
LLVM Basic Block Representation.
Definition BasicBlock.h:62
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
Definition BasicBlock.h:237
LLVM_ABI void insertBefore(InstListType::iterator InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified position.
std::pair< iterator, bool > try_emplace(const KeyT &Key, Ts &&...Args)
Definition MapVector.h:118
Manage lifetime of a slot tracker for printing IR.
int getLocalSlot(const Value *V)
Return the slot number of the specified local value.
void incorporateFunction(const Function &F)
Incorporate the given function.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
bool isDefinitionAcrossSuspend(BasicBlock *DefBB, User *U) const
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:393
bool hasName() const
Definition Value.h:261
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:318
self_iterator getIterator()
Definition ilist_node.h:123
SmallMapVector< Value *, SmallVector< Instruction *, 2 >, 8 > SpillInfo
Definition SpillUtils.h:18
bool defaultMaterializable(Instruction &V)
Default materializable callback.
LLVM_ABI bool isTriviallyMaterializable(Instruction &I)
LLVM_ABI void doRematerializations(Function &F, SuspendCrossingInfo &Checker, std::function< bool(Instruction &)> IsMaterializable)
This is an optimization pass for GlobalISel generic memory operations.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
static void dumpBasicBlockLabel(const BasicBlock *BB, ModuleSlotTracker &MST)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
#define N
static ChildIteratorType child_end(NodeRef N)
static NodeRef getEntryNode(RematGraph *G)
static ChildIteratorType child_begin(NodeRef N)
A MapVector that performs no allocations if smaller than a certain size.
Definition MapVector.h:334