LLVM 17.0.0git
CoroFrame.cpp
Go to the documentation of this file.
1//===- CoroFrame.cpp - Builds and manipulates coroutine frame -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8// This file contains classes used to discover if for a particular value
9// there from sue to definition that crosses a suspend block.
10//
11// Using the information discovered we form a Coroutine Frame structure to
12// contain those values. All uses of those values are replaced with appropriate
13// GEP + load from the coroutine frame. At the point of the definition we spill
14// the value into the coroutine frame.
15//===----------------------------------------------------------------------===//
16
17#include "CoroInternal.h"
18#include "llvm/ADT/BitVector.h"
20#include "llvm/ADT/ScopeExit.h"
24#include "llvm/Config/llvm-config.h"
25#include "llvm/IR/CFG.h"
26#include "llvm/IR/DIBuilder.h"
27#include "llvm/IR/DebugInfo.h"
28#include "llvm/IR/Dominators.h"
29#include "llvm/IR/IRBuilder.h"
32#include "llvm/Support/Debug.h"
40#include <algorithm>
41#include <deque>
42#include <optional>
43
44using namespace llvm;
45
46// The "coro-suspend-crossing" flag is very noisy. There is another debug type,
47// "coro-frame", which results in leaner debug spew.
48#define DEBUG_TYPE "coro-suspend-crossing"
49
51
52// Provides two way mapping between the blocks and numbers.
53namespace {
54class BlockToIndexMapping {
56
57public:
58 size_t size() const { return V.size(); }
59
60 BlockToIndexMapping(Function &F) {
61 for (BasicBlock &BB : F)
62 V.push_back(&BB);
63 llvm::sort(V);
64 }
65
66 size_t blockToIndex(BasicBlock *BB) const {
67 auto *I = llvm::lower_bound(V, BB);
68 assert(I != V.end() && *I == BB && "BasicBlockNumberng: Unknown block");
69 return I - V.begin();
70 }
71
72 BasicBlock *indexToBlock(unsigned Index) const { return V[Index]; }
73};
74} // end anonymous namespace
75
76// The SuspendCrossingInfo maintains data that allows to answer a question
77// whether given two BasicBlocks A and B there is a path from A to B that
78// passes through a suspend point.
79//
80// For every basic block 'i' it maintains a BlockData that consists of:
81// Consumes: a bit vector which contains a set of indices of blocks that can
82// reach block 'i'. A block can trivially reach itself.
83// Kills: a bit vector which contains a set of indices of blocks that can
84// reach block 'i' but there is a path crossing a suspend point
85// not repeating 'i' (path to 'i' without cycles containing 'i').
86// Suspend: a boolean indicating whether block 'i' contains a suspend point.
87// End: a boolean indicating whether block 'i' contains a coro.end intrinsic.
88// KillLoop: There is a path from 'i' to 'i' not otherwise repeating 'i' that
89// crosses a suspend point.
90//
91namespace {
92class SuspendCrossingInfo {
93 BlockToIndexMapping Mapping;
94
95 struct BlockData {
96 BitVector Consumes;
97 BitVector Kills;
98 bool Suspend = false;
99 bool End = false;
100 bool KillLoop = false;
101 bool Changed = false;
102 };
104
106 BasicBlock *BB = Mapping.indexToBlock(&BD - &Block[0]);
107 return llvm::predecessors(BB);
108 }
109
110 BlockData &getBlockData(BasicBlock *BB) {
111 return Block[Mapping.blockToIndex(BB)];
112 }
113
114 /// Compute the BlockData for the current function in one iteration.
115 /// Returns whether the BlockData changes in this iteration.
116 /// Initialize - Whether this is the first iteration, we can optimize
117 /// the initial case a little bit by manual loop switch.
118 template <bool Initialize = false> bool computeBlockData();
119
120public:
121#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
122 void dump() const;
123 void dump(StringRef Label, BitVector const &BV) const;
124#endif
125
126 SuspendCrossingInfo(Function &F, coro::Shape &Shape);
127
128 /// Returns true if there is a path from \p From to \p To crossing a suspend
129 /// point without crossing \p From a 2nd time.
130 bool hasPathCrossingSuspendPoint(BasicBlock *From, BasicBlock *To) const {
131 size_t const FromIndex = Mapping.blockToIndex(From);
132 size_t const ToIndex = Mapping.blockToIndex(To);
133 bool const Result = Block[ToIndex].Kills[FromIndex];
134 LLVM_DEBUG(dbgs() << From->getName() << " => " << To->getName()
135 << " answer is " << Result << "\n");
136 return Result;
137 }
138
139 /// Returns true if there is a path from \p From to \p To crossing a suspend
140 /// point without crossing \p From a 2nd time. If \p From is the same as \p To
141 /// this will also check if there is a looping path crossing a suspend point.
142 bool hasPathOrLoopCrossingSuspendPoint(BasicBlock *From,
143 BasicBlock *To) const {
144 size_t const FromIndex = Mapping.blockToIndex(From);
145 size_t const ToIndex = Mapping.blockToIndex(To);
146 bool Result = Block[ToIndex].Kills[FromIndex] ||
147 (From == To && Block[ToIndex].KillLoop);
148 LLVM_DEBUG(dbgs() << From->getName() << " => " << To->getName()
149 << " answer is " << Result << " (path or loop)\n");
150 return Result;
151 }
152
153 bool isDefinitionAcrossSuspend(BasicBlock *DefBB, User *U) const {
154 auto *I = cast<Instruction>(U);
155
156 // We rewrote PHINodes, so that only the ones with exactly one incoming
157 // value need to be analyzed.
158 if (auto *PN = dyn_cast<PHINode>(I))
159 if (PN->getNumIncomingValues() > 1)
160 return false;
161
162 BasicBlock *UseBB = I->getParent();
163
164 // As a special case, treat uses by an llvm.coro.suspend.retcon or an
165 // llvm.coro.suspend.async as if they were uses in the suspend's single
166 // predecessor: the uses conceptually occur before the suspend.
167 if (isa<CoroSuspendRetconInst>(I) || isa<CoroSuspendAsyncInst>(I)) {
168 UseBB = UseBB->getSinglePredecessor();
169 assert(UseBB && "should have split coro.suspend into its own block");
170 }
171
172 return hasPathCrossingSuspendPoint(DefBB, UseBB);
173 }
174
175 bool isDefinitionAcrossSuspend(Argument &A, User *U) const {
176 return isDefinitionAcrossSuspend(&A.getParent()->getEntryBlock(), U);
177 }
178
179 bool isDefinitionAcrossSuspend(Instruction &I, User *U) const {
180 auto *DefBB = I.getParent();
181
182 // As a special case, treat values produced by an llvm.coro.suspend.*
183 // as if they were defined in the single successor: the uses
184 // conceptually occur after the suspend.
185 if (isa<AnyCoroSuspendInst>(I)) {
186 DefBB = DefBB->getSingleSuccessor();
187 assert(DefBB && "should have split coro.suspend into its own block");
188 }
189
190 return isDefinitionAcrossSuspend(DefBB, U);
191 }
192
193 bool isDefinitionAcrossSuspend(Value &V, User *U) const {
194 if (auto *Arg = dyn_cast<Argument>(&V))
195 return isDefinitionAcrossSuspend(*Arg, U);
196 if (auto *Inst = dyn_cast<Instruction>(&V))
197 return isDefinitionAcrossSuspend(*Inst, U);
198
200 "Coroutine could only collect Argument and Instruction now.");
201 }
202};
203} // end anonymous namespace
204
205#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
206LLVM_DUMP_METHOD void SuspendCrossingInfo::dump(StringRef Label,
207 BitVector const &BV) const {
208 dbgs() << Label << ":";
209 for (size_t I = 0, N = BV.size(); I < N; ++I)
210 if (BV[I])
211 dbgs() << " " << Mapping.indexToBlock(I)->getName();
212 dbgs() << "\n";
213}
214
215LLVM_DUMP_METHOD void SuspendCrossingInfo::dump() const {
216 for (size_t I = 0, N = Block.size(); I < N; ++I) {
217 BasicBlock *const B = Mapping.indexToBlock(I);
218 dbgs() << B->getName() << ":\n";
219 dump(" Consumes", Block[I].Consumes);
220 dump(" Kills", Block[I].Kills);
221 }
222 dbgs() << "\n";
223}
224#endif
225
226template <bool Initialize> bool SuspendCrossingInfo::computeBlockData() {
227 const size_t N = Mapping.size();
228 bool Changed = false;
229
230 for (size_t I = 0; I < N; ++I) {
231 auto &B = Block[I];
232
233 // We don't need to count the predecessors when initialization.
234 if constexpr (!Initialize)
235 // If all the predecessors of the current Block don't change,
236 // the BlockData for the current block must not change too.
237 if (all_of(predecessors(B), [this](BasicBlock *BB) {
238 return !Block[Mapping.blockToIndex(BB)].Changed;
239 })) {
240 B.Changed = false;
241 continue;
242 }
243
244 // Saved Consumes and Kills bitsets so that it is easy to see
245 // if anything changed after propagation.
246 auto SavedConsumes = B.Consumes;
247 auto SavedKills = B.Kills;
248
249 for (BasicBlock *PI : predecessors(B)) {
250 auto PrevNo = Mapping.blockToIndex(PI);
251 auto &P = Block[PrevNo];
252
253 // Propagate Kills and Consumes from predecessors into B.
254 B.Consumes |= P.Consumes;
255 B.Kills |= P.Kills;
256
257 // If block P is a suspend block, it should propagate kills into block
258 // B for every block P consumes.
259 if (P.Suspend)
260 B.Kills |= P.Consumes;
261 }
262
263 if (B.Suspend) {
264 // If block S is a suspend block, it should kill all of the blocks it
265 // consumes.
266 B.Kills |= B.Consumes;
267 } else if (B.End) {
268 // If block B is an end block, it should not propagate kills as the
269 // blocks following coro.end() are reached during initial invocation
270 // of the coroutine while all the data are still available on the
271 // stack or in the registers.
272 B.Kills.reset();
273 } else {
274 // This is reached when B block it not Suspend nor coro.end and it
275 // need to make sure that it is not in the kill set.
276 B.KillLoop |= B.Kills[I];
277 B.Kills.reset(I);
278 }
279
280 if constexpr (!Initialize) {
281 B.Changed = (B.Kills != SavedKills) || (B.Consumes != SavedConsumes);
282 Changed |= B.Changed;
283 }
284 }
285
286 if constexpr (Initialize)
287 return true;
288
289 return Changed;
290}
291
292SuspendCrossingInfo::SuspendCrossingInfo(Function &F, coro::Shape &Shape)
293 : Mapping(F) {
294 const size_t N = Mapping.size();
295 Block.resize(N);
296
297 // Initialize every block so that it consumes itself
298 for (size_t I = 0; I < N; ++I) {
299 auto &B = Block[I];
300 B.Consumes.resize(N);
301 B.Kills.resize(N);
302 B.Consumes.set(I);
303 B.Changed = true;
304 }
305
306 // Mark all CoroEnd Blocks. We do not propagate Kills beyond coro.ends as
307 // the code beyond coro.end is reachable during initial invocation of the
308 // coroutine.
309 for (auto *CE : Shape.CoroEnds)
310 getBlockData(CE->getParent()).End = true;
311
312 // Mark all suspend blocks and indicate that they kill everything they
313 // consume. Note, that crossing coro.save also requires a spill, as any code
314 // between coro.save and coro.suspend may resume the coroutine and all of the
315 // state needs to be saved by that time.
316 auto markSuspendBlock = [&](IntrinsicInst *BarrierInst) {
317 BasicBlock *SuspendBlock = BarrierInst->getParent();
318 auto &B = getBlockData(SuspendBlock);
319 B.Suspend = true;
320 B.Kills |= B.Consumes;
321 };
322 for (auto *CSI : Shape.CoroSuspends) {
323 markSuspendBlock(CSI);
324 if (auto *Save = CSI->getCoroSave())
325 markSuspendBlock(Save);
326 }
327
328 computeBlockData</*Initialize=*/true>();
329
330 while (computeBlockData())
331 ;
332
333 LLVM_DEBUG(dump());
334}
335
336namespace {
337
338// RematGraph is used to construct a DAG for rematerializable instructions
339// When the constructor is invoked with a candidate instruction (which is
340// materializable) it builds a DAG of materializable instructions from that
341// point.
342// Typically, for each instruction identified as re-materializable across a
343// suspend point, a RematGraph will be created.
344struct RematGraph {
345 // Each RematNode in the graph contains the edges to instructions providing
346 // operands in the current node.
347 struct RematNode {
350 RematNode() = default;
351 RematNode(Instruction *V) : Node(V) {}
352 };
353
354 RematNode *EntryNode;
355 using RematNodeMap =
357 RematNodeMap Remats;
358 const std::function<bool(Instruction &)> &MaterializableCallback;
359 SuspendCrossingInfo &Checker;
360
361 RematGraph(const std::function<bool(Instruction &)> &MaterializableCallback,
362 Instruction *I, SuspendCrossingInfo &Checker)
363 : MaterializableCallback(MaterializableCallback), Checker(Checker) {
364 std::unique_ptr<RematNode> FirstNode = std::make_unique<RematNode>(I);
365 EntryNode = FirstNode.get();
366 std::deque<std::unique_ptr<RematNode>> WorkList;
367 addNode(std::move(FirstNode), WorkList, cast<User>(I));
368 while (WorkList.size()) {
369 std::unique_ptr<RematNode> N = std::move(WorkList.front());
370 WorkList.pop_front();
371 addNode(std::move(N), WorkList, cast<User>(I));
372 }
373 }
374
375 void addNode(std::unique_ptr<RematNode> NUPtr,
376 std::deque<std::unique_ptr<RematNode>> &WorkList,
377 User *FirstUse) {
378 RematNode *N = NUPtr.get();
379 if (Remats.count(N->Node))
380 return;
381
382 // We haven't see this node yet - add to the list
383 Remats[N->Node] = std::move(NUPtr);
384 for (auto &Def : N->Node->operands()) {
385 Instruction *D = dyn_cast<Instruction>(Def.get());
386 if (!D || !MaterializableCallback(*D) ||
387 !Checker.isDefinitionAcrossSuspend(*D, FirstUse))
388 continue;
389
390 if (Remats.count(D)) {
391 // Already have this in the graph
392 N->Operands.push_back(Remats[D].get());
393 continue;
394 }
395
396 bool NoMatch = true;
397 for (auto &I : WorkList) {
398 if (I->Node == D) {
399 NoMatch = false;
400 N->Operands.push_back(I.get());
401 break;
402 }
403 }
404 if (NoMatch) {
405 // Create a new node
406 std::unique_ptr<RematNode> ChildNode = std::make_unique<RematNode>(D);
407 N->Operands.push_back(ChildNode.get());
408 WorkList.push_back(std::move(ChildNode));
409 }
410 }
411 }
412
413#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
414 void dump() const {
415 dbgs() << "Entry (";
416 if (EntryNode->Node->getParent()->hasName())
417 dbgs() << EntryNode->Node->getParent()->getName();
418 else
419 EntryNode->Node->getParent()->printAsOperand(dbgs(), false);
420 dbgs() << ") : " << *EntryNode->Node << "\n";
421 for (auto &E : Remats) {
422 dbgs() << *(E.first) << "\n";
423 for (RematNode *U : E.second->Operands)
424 dbgs() << " " << *U->Node << "\n";
425 }
426 }
427#endif
428};
429} // end anonymous namespace
430
431namespace llvm {
432
433template <> struct GraphTraits<RematGraph *> {
434 using NodeRef = RematGraph::RematNode *;
435 using ChildIteratorType = RematGraph::RematNode **;
436
437 static NodeRef getEntryNode(RematGraph *G) { return G->EntryNode; }
439 return N->Operands.begin();
440 }
441 static ChildIteratorType child_end(NodeRef N) { return N->Operands.end(); }
442};
443
444} // end namespace llvm
445
446#undef DEBUG_TYPE // "coro-suspend-crossing"
447#define DEBUG_TYPE "coro-frame"
448
449namespace {
450class FrameTypeBuilder;
451// Mapping from the to-be-spilled value to all the users that need reload.
453struct AllocaInfo {
454 AllocaInst *Alloca;
456 bool MayWriteBeforeCoroBegin;
457 AllocaInfo(AllocaInst *Alloca,
458 DenseMap<Instruction *, std::optional<APInt>> Aliases,
459 bool MayWriteBeforeCoroBegin)
460 : Alloca(Alloca), Aliases(std::move(Aliases)),
461 MayWriteBeforeCoroBegin(MayWriteBeforeCoroBegin) {}
462};
463struct FrameDataInfo {
464 // All the values (that are not allocas) that needs to be spilled to the
465 // frame.
466 SpillInfo Spills;
467 // Allocas contains all values defined as allocas that need to live in the
468 // frame.
470
471 SmallVector<Value *, 8> getAllDefs() const {
473 for (const auto &P : Spills)
474 Defs.push_back(P.first);
475 for (const auto &A : Allocas)
476 Defs.push_back(A.Alloca);
477 return Defs;
478 }
479
480 uint32_t getFieldIndex(Value *V) const {
481 auto Itr = FieldIndexMap.find(V);
482 assert(Itr != FieldIndexMap.end() &&
483 "Value does not have a frame field index");
484 return Itr->second;
485 }
486
487 void setFieldIndex(Value *V, uint32_t Index) {
488 assert((LayoutIndexUpdateStarted || FieldIndexMap.count(V) == 0) &&
489 "Cannot set the index for the same field twice.");
490 FieldIndexMap[V] = Index;
491 }
492
493 Align getAlign(Value *V) const {
494 auto Iter = FieldAlignMap.find(V);
495 assert(Iter != FieldAlignMap.end());
496 return Iter->second;
497 }
498
499 void setAlign(Value *V, Align AL) {
500 assert(FieldAlignMap.count(V) == 0);
501 FieldAlignMap.insert({V, AL});
502 }
503
504 uint64_t getDynamicAlign(Value *V) const {
505 auto Iter = FieldDynamicAlignMap.find(V);
506 assert(Iter != FieldDynamicAlignMap.end());
507 return Iter->second;
508 }
509
510 void setDynamicAlign(Value *V, uint64_t Align) {
511 assert(FieldDynamicAlignMap.count(V) == 0);
512 FieldDynamicAlignMap.insert({V, Align});
513 }
514
515 uint64_t getOffset(Value *V) const {
516 auto Iter = FieldOffsetMap.find(V);
517 assert(Iter != FieldOffsetMap.end());
518 return Iter->second;
519 }
520
521 void setOffset(Value *V, uint64_t Offset) {
522 assert(FieldOffsetMap.count(V) == 0);
523 FieldOffsetMap.insert({V, Offset});
524 }
525
526 // Remap the index of every field in the frame, using the final layout index.
527 void updateLayoutIndex(FrameTypeBuilder &B);
528
529private:
530 // LayoutIndexUpdateStarted is used to avoid updating the index of any field
531 // twice by mistake.
532 bool LayoutIndexUpdateStarted = false;
533 // Map from values to their slot indexes on the frame. They will be first set
534 // with their original insertion field index. After the frame is built, their
535 // indexes will be updated into the final layout index.
536 DenseMap<Value *, uint32_t> FieldIndexMap;
537 // Map from values to their alignment on the frame. They would be set after
538 // the frame is built.
539 DenseMap<Value *, Align> FieldAlignMap;
540 DenseMap<Value *, uint64_t> FieldDynamicAlignMap;
541 // Map from values to their offset on the frame. They would be set after
542 // the frame is built.
543 DenseMap<Value *, uint64_t> FieldOffsetMap;
544};
545} // namespace
546
547#ifndef NDEBUG
548static void dumpSpills(StringRef Title, const SpillInfo &Spills) {
549 dbgs() << "------------- " << Title << "--------------\n";
550 for (const auto &E : Spills) {
551 E.first->dump();
552 dbgs() << " user: ";
553 for (auto *I : E.second)
554 I->dump();
555 }
556}
557static void dumpRemats(
558 StringRef Title,
559 const SmallMapVector<Instruction *, std::unique_ptr<RematGraph>, 8> &RM) {
560 dbgs() << "------------- " << Title << "--------------\n";
561 for (const auto &E : RM) {
562 E.second->dump();
563 dbgs() << "--\n";
564 }
565}
566
567static void dumpAllocas(const SmallVectorImpl<AllocaInfo> &Allocas) {
568 dbgs() << "------------- Allocas --------------\n";
569 for (const auto &A : Allocas) {
570 A.Alloca->dump();
571 }
572}
573#endif
574
575namespace {
576using FieldIDType = size_t;
577// We cannot rely solely on natural alignment of a type when building a
578// coroutine frame and if the alignment specified on the Alloca instruction
579// differs from the natural alignment of the alloca type we will need to insert
580// padding.
581class FrameTypeBuilder {
582private:
583 struct Field {
586 Type *Ty;
587 FieldIDType LayoutFieldIndex;
588 Align Alignment;
589 Align TyAlignment;
590 uint64_t DynamicAlignBuffer;
591 };
592
593 const DataLayout &DL;
595 uint64_t StructSize = 0;
596 Align StructAlign;
597 bool IsFinished = false;
598
599 std::optional<Align> MaxFrameAlignment;
600
602 DenseMap<Value*, unsigned> FieldIndexByKey;
603
604public:
605 FrameTypeBuilder(LLVMContext &Context, const DataLayout &DL,
606 std::optional<Align> MaxFrameAlignment)
607 : DL(DL), Context(Context), MaxFrameAlignment(MaxFrameAlignment) {}
608
609 /// Add a field to this structure for the storage of an `alloca`
610 /// instruction.
611 [[nodiscard]] FieldIDType addFieldForAlloca(AllocaInst *AI,
612 bool IsHeader = false) {
613 Type *Ty = AI->getAllocatedType();
614
615 // Make an array type if this is a static array allocation.
616 if (AI->isArrayAllocation()) {
617 if (auto *CI = dyn_cast<ConstantInt>(AI->getArraySize()))
618 Ty = ArrayType::get(Ty, CI->getValue().getZExtValue());
619 else
620 report_fatal_error("Coroutines cannot handle non static allocas yet");
621 }
622
623 return addField(Ty, AI->getAlign(), IsHeader);
624 }
625
626 /// We want to put the allocas whose lifetime-ranges are not overlapped
627 /// into one slot of coroutine frame.
628 /// Consider the example at:https://bugs.llvm.org/show_bug.cgi?id=45566
629 ///
630 /// cppcoro::task<void> alternative_paths(bool cond) {
631 /// if (cond) {
632 /// big_structure a;
633 /// process(a);
634 /// co_await something();
635 /// } else {
636 /// big_structure b;
637 /// process2(b);
638 /// co_await something();
639 /// }
640 /// }
641 ///
642 /// We want to put variable a and variable b in the same slot to
643 /// reduce the size of coroutine frame.
644 ///
645 /// This function use StackLifetime algorithm to partition the AllocaInsts in
646 /// Spills to non-overlapped sets in order to put Alloca in the same
647 /// non-overlapped set into the same slot in the Coroutine Frame. Then add
648 /// field for the allocas in the same non-overlapped set by using the largest
649 /// type as the field type.
650 ///
651 /// Side Effects: Because We sort the allocas, the order of allocas in the
652 /// frame may be different with the order in the source code.
653 void addFieldForAllocas(const Function &F, FrameDataInfo &FrameData,
654 coro::Shape &Shape);
655
656 /// Add a field to this structure.
657 [[nodiscard]] FieldIDType addField(Type *Ty, MaybeAlign MaybeFieldAlignment,
658 bool IsHeader = false,
659 bool IsSpillOfValue = false) {
660 assert(!IsFinished && "adding fields to a finished builder");
661 assert(Ty && "must provide a type for a field");
662
663 // The field size is always the alloc size of the type.
664 uint64_t FieldSize = DL.getTypeAllocSize(Ty);
665
666 // For an alloca with size=0, we don't need to add a field and they
667 // can just point to any index in the frame. Use index 0.
668 if (FieldSize == 0) {
669 return 0;
670 }
671
672 // The field alignment might not be the type alignment, but we need
673 // to remember the type alignment anyway to build the type.
674 // If we are spilling values we don't need to worry about ABI alignment
675 // concerns.
676 Align ABIAlign = DL.getABITypeAlign(Ty);
677 Align TyAlignment = ABIAlign;
678 if (IsSpillOfValue && MaxFrameAlignment && *MaxFrameAlignment < ABIAlign)
679 TyAlignment = *MaxFrameAlignment;
680 Align FieldAlignment = MaybeFieldAlignment.value_or(TyAlignment);
681
682 // The field alignment could be bigger than the max frame case, in that case
683 // we request additional storage to be able to dynamically align the
684 // pointer.
685 uint64_t DynamicAlignBuffer = 0;
686 if (MaxFrameAlignment && (FieldAlignment > *MaxFrameAlignment)) {
687 DynamicAlignBuffer =
688 offsetToAlignment(MaxFrameAlignment->value(), FieldAlignment);
689 FieldAlignment = *MaxFrameAlignment;
690 FieldSize = FieldSize + DynamicAlignBuffer;
691 }
692
693 // Lay out header fields immediately.
695 if (IsHeader) {
696 Offset = alignTo(StructSize, FieldAlignment);
697 StructSize = Offset + FieldSize;
698
699 // Everything else has a flexible offset.
700 } else {
702 }
703
704 Fields.push_back({FieldSize, Offset, Ty, 0, FieldAlignment, TyAlignment,
705 DynamicAlignBuffer});
706 return Fields.size() - 1;
707 }
708
709 /// Finish the layout and set the body on the given type.
710 void finish(StructType *Ty);
711
712 uint64_t getStructSize() const {
713 assert(IsFinished && "not yet finished!");
714 return StructSize;
715 }
716
717 Align getStructAlign() const {
718 assert(IsFinished && "not yet finished!");
719 return StructAlign;
720 }
721
722 FieldIDType getLayoutFieldIndex(FieldIDType Id) const {
723 assert(IsFinished && "not yet finished!");
724 return Fields[Id].LayoutFieldIndex;
725 }
726
727 Field getLayoutField(FieldIDType Id) const {
728 assert(IsFinished && "not yet finished!");
729 return Fields[Id];
730 }
731};
732} // namespace
733
734void FrameDataInfo::updateLayoutIndex(FrameTypeBuilder &B) {
735 auto Updater = [&](Value *I) {
736 auto Field = B.getLayoutField(getFieldIndex(I));
737 setFieldIndex(I, Field.LayoutFieldIndex);
738 setAlign(I, Field.Alignment);
739 uint64_t dynamicAlign =
740 Field.DynamicAlignBuffer
741 ? Field.DynamicAlignBuffer + Field.Alignment.value()
742 : 0;
743 setDynamicAlign(I, dynamicAlign);
744 setOffset(I, Field.Offset);
745 };
746 LayoutIndexUpdateStarted = true;
747 for (auto &S : Spills)
748 Updater(S.first);
749 for (const auto &A : Allocas)
750 Updater(A.Alloca);
751 LayoutIndexUpdateStarted = false;
752}
753
754void FrameTypeBuilder::addFieldForAllocas(const Function &F,
755 FrameDataInfo &FrameData,
756 coro::Shape &Shape) {
757 using AllocaSetType = SmallVector<AllocaInst *, 4>;
758 SmallVector<AllocaSetType, 4> NonOverlapedAllocas;
759
760 // We need to add field for allocas at the end of this function.
761 auto AddFieldForAllocasAtExit = make_scope_exit([&]() {
762 for (auto AllocaList : NonOverlapedAllocas) {
763 auto *LargestAI = *AllocaList.begin();
764 FieldIDType Id = addFieldForAlloca(LargestAI);
765 for (auto *Alloca : AllocaList)
766 FrameData.setFieldIndex(Alloca, Id);
767 }
768 });
769
770 if (!Shape.OptimizeFrame) {
771 for (const auto &A : FrameData.Allocas) {
772 AllocaInst *Alloca = A.Alloca;
773 NonOverlapedAllocas.emplace_back(AllocaSetType(1, Alloca));
774 }
775 return;
776 }
777
778 // Because there are paths from the lifetime.start to coro.end
779 // for each alloca, the liferanges for every alloca is overlaped
780 // in the blocks who contain coro.end and the successor blocks.
781 // So we choose to skip there blocks when we calculate the liferange
782 // for each alloca. It should be reasonable since there shouldn't be uses
783 // in these blocks and the coroutine frame shouldn't be used outside the
784 // coroutine body.
785 //
786 // Note that the user of coro.suspend may not be SwitchInst. However, this
787 // case seems too complex to handle. And it is harmless to skip these
788 // patterns since it just prevend putting the allocas to live in the same
789 // slot.
790 DenseMap<SwitchInst *, BasicBlock *> DefaultSuspendDest;
791 for (auto *CoroSuspendInst : Shape.CoroSuspends) {
792 for (auto *U : CoroSuspendInst->users()) {
793 if (auto *ConstSWI = dyn_cast<SwitchInst>(U)) {
794 auto *SWI = const_cast<SwitchInst *>(ConstSWI);
795 DefaultSuspendDest[SWI] = SWI->getDefaultDest();
796 SWI->setDefaultDest(SWI->getSuccessor(1));
797 }
798 }
799 }
800
801 auto ExtractAllocas = [&]() {
802 AllocaSetType Allocas;
803 Allocas.reserve(FrameData.Allocas.size());
804 for (const auto &A : FrameData.Allocas)
805 Allocas.push_back(A.Alloca);
806 return Allocas;
807 };
808 StackLifetime StackLifetimeAnalyzer(F, ExtractAllocas(),
809 StackLifetime::LivenessType::May);
810 StackLifetimeAnalyzer.run();
811 auto IsAllocaInferenre = [&](const AllocaInst *AI1, const AllocaInst *AI2) {
812 return StackLifetimeAnalyzer.getLiveRange(AI1).overlaps(
813 StackLifetimeAnalyzer.getLiveRange(AI2));
814 };
815 auto GetAllocaSize = [&](const AllocaInfo &A) {
816 std::optional<TypeSize> RetSize = A.Alloca->getAllocationSize(DL);
817 assert(RetSize && "Variable Length Arrays (VLA) are not supported.\n");
818 assert(!RetSize->isScalable() && "Scalable vectors are not yet supported");
819 return RetSize->getFixedValue();
820 };
821 // Put larger allocas in the front. So the larger allocas have higher
822 // priority to merge, which can save more space potentially. Also each
823 // AllocaSet would be ordered. So we can get the largest Alloca in one
824 // AllocaSet easily.
825 sort(FrameData.Allocas, [&](const auto &Iter1, const auto &Iter2) {
826 return GetAllocaSize(Iter1) > GetAllocaSize(Iter2);
827 });
828 for (const auto &A : FrameData.Allocas) {
829 AllocaInst *Alloca = A.Alloca;
830 bool Merged = false;
831 // Try to find if the Alloca is not inferenced with any existing
832 // NonOverlappedAllocaSet. If it is true, insert the alloca to that
833 // NonOverlappedAllocaSet.
834 for (auto &AllocaSet : NonOverlapedAllocas) {
835 assert(!AllocaSet.empty() && "Processing Alloca Set is not empty.\n");
836 bool NoInference = none_of(AllocaSet, [&](auto Iter) {
837 return IsAllocaInferenre(Alloca, Iter);
838 });
839 // If the alignment of A is multiple of the alignment of B, the address
840 // of A should satisfy the requirement for aligning for B.
841 //
842 // There may be other more fine-grained strategies to handle the alignment
843 // infomation during the merging process. But it seems hard to handle
844 // these strategies and benefit little.
845 bool Alignable = [&]() -> bool {
846 auto *LargestAlloca = *AllocaSet.begin();
847 return LargestAlloca->getAlign().value() % Alloca->getAlign().value() ==
848 0;
849 }();
850 bool CouldMerge = NoInference && Alignable;
851 if (!CouldMerge)
852 continue;
853 AllocaSet.push_back(Alloca);
854 Merged = true;
855 break;
856 }
857 if (!Merged) {
858 NonOverlapedAllocas.emplace_back(AllocaSetType(1, Alloca));
859 }
860 }
861 // Recover the default target destination for each Switch statement
862 // reserved.
863 for (auto SwitchAndDefaultDest : DefaultSuspendDest) {
864 SwitchInst *SWI = SwitchAndDefaultDest.first;
865 BasicBlock *DestBB = SwitchAndDefaultDest.second;
866 SWI->setDefaultDest(DestBB);
867 }
868 // This Debug Info could tell us which allocas are merged into one slot.
869 LLVM_DEBUG(for (auto &AllocaSet
870 : NonOverlapedAllocas) {
871 if (AllocaSet.size() > 1) {
872 dbgs() << "In Function:" << F.getName() << "\n";
873 dbgs() << "Find Union Set "
874 << "\n";
875 dbgs() << "\tAllocas are \n";
876 for (auto Alloca : AllocaSet)
877 dbgs() << "\t\t" << *Alloca << "\n";
878 }
879 });
880}
881
882void FrameTypeBuilder::finish(StructType *Ty) {
883 assert(!IsFinished && "already finished!");
884
885 // Prepare the optimal-layout field array.
886 // The Id in the layout field is a pointer to our Field for it.
888 LayoutFields.reserve(Fields.size());
889 for (auto &Field : Fields) {
890 LayoutFields.emplace_back(&Field, Field.Size, Field.Alignment,
891 Field.Offset);
892 }
893
894 // Perform layout.
895 auto SizeAndAlign = performOptimizedStructLayout(LayoutFields);
896 StructSize = SizeAndAlign.first;
897 StructAlign = SizeAndAlign.second;
898
899 auto getField = [](const OptimizedStructLayoutField &LayoutField) -> Field & {
900 return *static_cast<Field *>(const_cast<void*>(LayoutField.Id));
901 };
902
903 // We need to produce a packed struct type if there's a field whose
904 // assigned offset isn't a multiple of its natural type alignment.
905 bool Packed = [&] {
906 for (auto &LayoutField : LayoutFields) {
907 auto &F = getField(LayoutField);
908 if (!isAligned(F.TyAlignment, LayoutField.Offset))
909 return true;
910 }
911 return false;
912 }();
913
914 // Build the struct body.
915 SmallVector<Type*, 16> FieldTypes;
916 FieldTypes.reserve(LayoutFields.size() * 3 / 2);
917 uint64_t LastOffset = 0;
918 for (auto &LayoutField : LayoutFields) {
919 auto &F = getField(LayoutField);
920
921 auto Offset = LayoutField.Offset;
922
923 // Add a padding field if there's a padding gap and we're either
924 // building a packed struct or the padding gap is more than we'd
925 // get from aligning to the field type's natural alignment.
926 assert(Offset >= LastOffset);
927 if (Offset != LastOffset) {
928 if (Packed || alignTo(LastOffset, F.TyAlignment) != Offset)
929 FieldTypes.push_back(ArrayType::get(Type::getInt8Ty(Context),
930 Offset - LastOffset));
931 }
932
933 F.Offset = Offset;
934 F.LayoutFieldIndex = FieldTypes.size();
935
936 FieldTypes.push_back(F.Ty);
937 if (F.DynamicAlignBuffer) {
938 FieldTypes.push_back(
939 ArrayType::get(Type::getInt8Ty(Context), F.DynamicAlignBuffer));
940 }
941 LastOffset = Offset + F.Size;
942 }
943
944 Ty->setBody(FieldTypes, Packed);
945
946#ifndef NDEBUG
947 // Check that the IR layout matches the offsets we expect.
948 auto Layout = DL.getStructLayout(Ty);
949 for (auto &F : Fields) {
950 assert(Ty->getElementType(F.LayoutFieldIndex) == F.Ty);
951 assert(Layout->getElementOffset(F.LayoutFieldIndex) == F.Offset);
952 }
953#endif
954
955 IsFinished = true;
956}
957
958static void cacheDIVar(FrameDataInfo &FrameData,
960 for (auto *V : FrameData.getAllDefs()) {
961 if (DIVarCache.contains(V))
962 continue;
963
964 auto DDIs = FindDbgDeclareUses(V);
965 auto *I = llvm::find_if(DDIs, [](DbgDeclareInst *DDI) {
966 return DDI->getExpression()->getNumElements() == 0;
967 });
968 if (I != DDIs.end())
969 DIVarCache.insert({V, (*I)->getVariable()});
970 }
971}
972
973/// Create name for Type. It uses MDString to store new created string to
974/// avoid memory leak.
976 if (Ty->isIntegerTy()) {
977 // The longest name in common may be '__int_128', which has 9 bits.
978 SmallString<16> Buffer;
979 raw_svector_ostream OS(Buffer);
980 OS << "__int_" << cast<IntegerType>(Ty)->getBitWidth();
981 auto *MDName = MDString::get(Ty->getContext(), OS.str());
982 return MDName->getString();
983 }
984
985 if (Ty->isFloatingPointTy()) {
986 if (Ty->isFloatTy())
987 return "__float_";
988 if (Ty->isDoubleTy())
989 return "__double_";
990 return "__floating_type_";
991 }
992
993 if (auto *PtrTy = dyn_cast<PointerType>(Ty)) {
994 if (PtrTy->isOpaque())
995 return "PointerType";
996 Type *PointeeTy = PtrTy->getNonOpaquePointerElementType();
997 auto Name = solveTypeName(PointeeTy);
998 if (Name == "UnknownType")
999 return "PointerType";
1000 SmallString<16> Buffer;
1001 Twine(Name + "_Ptr").toStringRef(Buffer);
1002 auto *MDName = MDString::get(Ty->getContext(), Buffer.str());
1003 return MDName->getString();
1004 }
1005
1006 if (Ty->isStructTy()) {
1007 if (!cast<StructType>(Ty)->hasName())
1008 return "__LiteralStructType_";
1009
1010 auto Name = Ty->getStructName();
1011
1012 SmallString<16> Buffer(Name);
1013 for (auto &Iter : Buffer)
1014 if (Iter == '.' || Iter == ':')
1015 Iter = '_';
1016 auto *MDName = MDString::get(Ty->getContext(), Buffer.str());
1017 return MDName->getString();
1018 }
1019
1020 return "UnknownType";
1021}
1022
1023static DIType *solveDIType(DIBuilder &Builder, Type *Ty,
1024 const DataLayout &Layout, DIScope *Scope,
1025 unsigned LineNum,
1026 DenseMap<Type *, DIType *> &DITypeCache) {
1027 if (DIType *DT = DITypeCache.lookup(Ty))
1028 return DT;
1029
1031
1032 DIType *RetType = nullptr;
1033
1034 if (Ty->isIntegerTy()) {
1035 auto BitWidth = cast<IntegerType>(Ty)->getBitWidth();
1036 RetType = Builder.createBasicType(Name, BitWidth, dwarf::DW_ATE_signed,
1037 llvm::DINode::FlagArtificial);
1038 } else if (Ty->isFloatingPointTy()) {
1039 RetType = Builder.createBasicType(Name, Layout.getTypeSizeInBits(Ty),
1040 dwarf::DW_ATE_float,
1041 llvm::DINode::FlagArtificial);
1042 } else if (Ty->isPointerTy()) {
1043 // Construct PointerType points to null (aka void *) instead of exploring
1044 // pointee type to avoid infinite search problem. For example, we would be
1045 // in trouble if we traverse recursively:
1046 //
1047 // struct Node {
1048 // Node* ptr;
1049 // };
1050 RetType =
1051 Builder.createPointerType(nullptr, Layout.getTypeSizeInBits(Ty),
1052 Layout.getABITypeAlign(Ty).value() * CHAR_BIT,
1053 /*DWARFAddressSpace=*/std::nullopt, Name);
1054 } else if (Ty->isStructTy()) {
1055 auto *DIStruct = Builder.createStructType(
1056 Scope, Name, Scope->getFile(), LineNum, Layout.getTypeSizeInBits(Ty),
1057 Layout.getPrefTypeAlign(Ty).value() * CHAR_BIT,
1058 llvm::DINode::FlagArtificial, nullptr, llvm::DINodeArray());
1059
1060 auto *StructTy = cast<StructType>(Ty);
1062 for (unsigned I = 0; I < StructTy->getNumElements(); I++) {
1063 DIType *DITy = solveDIType(Builder, StructTy->getElementType(I), Layout,
1064 Scope, LineNum, DITypeCache);
1065 assert(DITy);
1066 Elements.push_back(Builder.createMemberType(
1067 Scope, DITy->getName(), Scope->getFile(), LineNum,
1068 DITy->getSizeInBits(), DITy->getAlignInBits(),
1069 Layout.getStructLayout(StructTy)->getElementOffsetInBits(I),
1070 llvm::DINode::FlagArtificial, DITy));
1071 }
1072
1073 Builder.replaceArrays(DIStruct, Builder.getOrCreateArray(Elements));
1074
1075 RetType = DIStruct;
1076 } else {
1077 LLVM_DEBUG(dbgs() << "Unresolved Type: " << *Ty << "\n");
1078 TypeSize Size = Layout.getTypeSizeInBits(Ty);
1079 auto *CharSizeType = Builder.createBasicType(
1080 Name, 8, dwarf::DW_ATE_unsigned_char, llvm::DINode::FlagArtificial);
1081
1082 if (Size <= 8)
1083 RetType = CharSizeType;
1084 else {
1085 if (Size % 8 != 0)
1086 Size = TypeSize::Fixed(Size + 8 - (Size % 8));
1087
1088 RetType = Builder.createArrayType(
1089 Size, Layout.getPrefTypeAlign(Ty).value(), CharSizeType,
1090 Builder.getOrCreateArray(Builder.getOrCreateSubrange(0, Size / 8)));
1091 }
1092 }
1093
1094 DITypeCache.insert({Ty, RetType});
1095 return RetType;
1096}
1097
1098/// Build artificial debug info for C++ coroutine frames to allow users to
1099/// inspect the contents of the frame directly
1100///
1101/// Create Debug information for coroutine frame with debug name "__coro_frame".
1102/// The debug information for the fields of coroutine frame is constructed from
1103/// the following way:
1104/// 1. For all the value in the Frame, we search the use of dbg.declare to find
1105/// the corresponding debug variables for the value. If we can find the
1106/// debug variable, we can get full and accurate debug information.
1107/// 2. If we can't get debug information in step 1 and 2, we could only try to
1108/// build the DIType by Type. We did this in solveDIType. We only handle
1109/// integer, float, double, integer type and struct type for now.
1111 FrameDataInfo &FrameData) {
1112 DISubprogram *DIS = F.getSubprogram();
1113 // If there is no DISubprogram for F, it implies the Function are not compiled
1114 // with debug info. So we also don't need to generate debug info for the frame
1115 // neither.
1116 if (!DIS || !DIS->getUnit() ||
1118 (dwarf::SourceLanguage)DIS->getUnit()->getSourceLanguage()))
1119 return;
1120
1121 assert(Shape.ABI == coro::ABI::Switch &&
1122 "We could only build debug infomation for C++ coroutine now.\n");
1123
1124 DIBuilder DBuilder(*F.getParent(), /*AllowUnresolved*/ false);
1125
1126 AllocaInst *PromiseAlloca = Shape.getPromiseAlloca();
1127 assert(PromiseAlloca &&
1128 "Coroutine with switch ABI should own Promise alloca");
1129
1131 if (DIs.empty())
1132 return;
1133
1134 DbgDeclareInst *PromiseDDI = DIs.front();
1135 DILocalVariable *PromiseDIVariable = PromiseDDI->getVariable();
1136 DILocalScope *PromiseDIScope = PromiseDIVariable->getScope();
1137 DIFile *DFile = PromiseDIScope->getFile();
1138 DILocation *DILoc = PromiseDDI->getDebugLoc().get();
1139 unsigned LineNum = PromiseDIVariable->getLine();
1140
1141 DICompositeType *FrameDITy = DBuilder.createStructType(
1142 DIS->getUnit(), Twine(F.getName() + ".coro_frame_ty").str(),
1143 DFile, LineNum, Shape.FrameSize * 8,
1144 Shape.FrameAlign.value() * 8, llvm::DINode::FlagArtificial, nullptr,
1145 llvm::DINodeArray());
1146 StructType *FrameTy = Shape.FrameTy;
1148 DataLayout Layout = F.getParent()->getDataLayout();
1149
1151 cacheDIVar(FrameData, DIVarCache);
1152
1153 unsigned ResumeIndex = coro::Shape::SwitchFieldIndex::Resume;
1154 unsigned DestroyIndex = coro::Shape::SwitchFieldIndex::Destroy;
1155 unsigned IndexIndex = Shape.SwitchLowering.IndexField;
1156
1158 NameCache.insert({ResumeIndex, "__resume_fn"});
1159 NameCache.insert({DestroyIndex, "__destroy_fn"});
1160 NameCache.insert({IndexIndex, "__coro_index"});
1161
1162 Type *ResumeFnTy = FrameTy->getElementType(ResumeIndex),
1163 *DestroyFnTy = FrameTy->getElementType(DestroyIndex),
1164 *IndexTy = FrameTy->getElementType(IndexIndex);
1165
1167 TyCache.insert(
1168 {ResumeIndex, DBuilder.createPointerType(
1169 nullptr, Layout.getTypeSizeInBits(ResumeFnTy))});
1170 TyCache.insert(
1171 {DestroyIndex, DBuilder.createPointerType(
1172 nullptr, Layout.getTypeSizeInBits(DestroyFnTy))});
1173
1174 /// FIXME: If we fill the field `SizeInBits` with the actual size of
1175 /// __coro_index in bits, then __coro_index wouldn't show in the debugger.
1176 TyCache.insert({IndexIndex, DBuilder.createBasicType(
1177 "__coro_index",
1178 (Layout.getTypeSizeInBits(IndexTy) < 8)
1179 ? 8
1180 : Layout.getTypeSizeInBits(IndexTy),
1181 dwarf::DW_ATE_unsigned_char)});
1182
1183 for (auto *V : FrameData.getAllDefs()) {
1184 if (!DIVarCache.contains(V))
1185 continue;
1186
1187 auto Index = FrameData.getFieldIndex(V);
1188
1189 NameCache.insert({Index, DIVarCache[V]->getName()});
1190 TyCache.insert({Index, DIVarCache[V]->getType()});
1191 }
1192
1193 // Cache from index to (Align, Offset Pair)
1195 // The Align and Offset of Resume function and Destroy function are fixed.
1196 OffsetCache.insert({ResumeIndex, {8, 0}});
1197 OffsetCache.insert({DestroyIndex, {8, 8}});
1198 OffsetCache.insert(
1199 {IndexIndex,
1201
1202 for (auto *V : FrameData.getAllDefs()) {
1203 auto Index = FrameData.getFieldIndex(V);
1204
1205 OffsetCache.insert(
1206 {Index, {FrameData.getAlign(V).value(), FrameData.getOffset(V)}});
1207 }
1208
1209 DenseMap<Type *, DIType *> DITypeCache;
1210 // This counter is used to avoid same type names. e.g., there would be
1211 // many i32 and i64 types in one coroutine. And we would use i32_0 and
1212 // i32_1 to avoid the same type. Since it makes no sense the name of the
1213 // fields confilicts with each other.
1214 unsigned UnknownTypeNum = 0;
1215 for (unsigned Index = 0; Index < FrameTy->getNumElements(); Index++) {
1216 if (!OffsetCache.contains(Index))
1217 continue;
1218
1219 std::string Name;
1220 uint64_t SizeInBits;
1221 uint32_t AlignInBits;
1222 uint64_t OffsetInBits;
1223 DIType *DITy = nullptr;
1224
1225 Type *Ty = FrameTy->getElementType(Index);
1226 assert(Ty->isSized() && "We can't handle type which is not sized.\n");
1227 SizeInBits = Layout.getTypeSizeInBits(Ty).getFixedValue();
1228 AlignInBits = OffsetCache[Index].first * 8;
1229 OffsetInBits = OffsetCache[Index].second * 8;
1230
1231 if (NameCache.contains(Index)) {
1232 Name = NameCache[Index].str();
1233 DITy = TyCache[Index];
1234 } else {
1235 DITy = solveDIType(DBuilder, Ty, Layout, FrameDITy, LineNum, DITypeCache);
1236 assert(DITy && "SolveDIType shouldn't return nullptr.\n");
1237 Name = DITy->getName().str();
1238 Name += "_" + std::to_string(UnknownTypeNum);
1239 UnknownTypeNum++;
1240 }
1241
1242 Elements.push_back(DBuilder.createMemberType(
1243 FrameDITy, Name, DFile, LineNum, SizeInBits, AlignInBits, OffsetInBits,
1244 llvm::DINode::FlagArtificial, DITy));
1245 }
1246
1247 DBuilder.replaceArrays(FrameDITy, DBuilder.getOrCreateArray(Elements));
1248
1249 auto *FrameDIVar = DBuilder.createAutoVariable(PromiseDIScope, "__coro_frame",
1250 DFile, LineNum, FrameDITy,
1251 true, DINode::FlagArtificial);
1252 assert(FrameDIVar->isValidLocationForIntrinsic(PromiseDDI->getDebugLoc()));
1253
1254 // Subprogram would have ContainedNodes field which records the debug
1255 // variables it contained. So we need to add __coro_frame to the
1256 // ContainedNodes of it.
1257 //
1258 // If we don't add __coro_frame to the RetainedNodes, user may get
1259 // `no symbol __coro_frame in context` rather than `__coro_frame`
1260 // is optimized out, which is more precise.
1261 if (auto *SubProgram = dyn_cast<DISubprogram>(PromiseDIScope)) {
1262 auto RetainedNodes = SubProgram->getRetainedNodes();
1263 SmallVector<Metadata *, 32> RetainedNodesVec(RetainedNodes.begin(),
1264 RetainedNodes.end());
1265 RetainedNodesVec.push_back(FrameDIVar);
1266 SubProgram->replaceOperandWith(
1267 7, (MDTuple::get(F.getContext(), RetainedNodesVec)));
1268 }
1269
1270 DBuilder.insertDeclare(Shape.FramePtr, FrameDIVar,
1271 DBuilder.createExpression(), DILoc,
1272 Shape.getInsertPtAfterFramePtr());
1273}
1274
1275// Build a struct that will keep state for an active coroutine.
1276// struct f.frame {
1277// ResumeFnTy ResumeFnAddr;
1278// ResumeFnTy DestroyFnAddr;
1279// int ResumeIndex;
1280// ... promise (if present) ...
1281// ... spills ...
1282// };
1284 FrameDataInfo &FrameData) {
1285 LLVMContext &C = F.getContext();
1286 const DataLayout &DL = F.getParent()->getDataLayout();
1287 StructType *FrameTy = [&] {
1288 SmallString<32> Name(F.getName());
1289 Name.append(".Frame");
1290 return StructType::create(C, Name);
1291 }();
1292
1293 // We will use this value to cap the alignment of spilled values.
1294 std::optional<Align> MaxFrameAlignment;
1295 if (Shape.ABI == coro::ABI::Async)
1296 MaxFrameAlignment = Shape.AsyncLowering.getContextAlignment();
1297 FrameTypeBuilder B(C, DL, MaxFrameAlignment);
1298
1299 AllocaInst *PromiseAlloca = Shape.getPromiseAlloca();
1300 std::optional<FieldIDType> SwitchIndexFieldId;
1301
1302 if (Shape.ABI == coro::ABI::Switch) {
1303 auto *FramePtrTy = FrameTy->getPointerTo();
1304 auto *FnTy = FunctionType::get(Type::getVoidTy(C), FramePtrTy,
1305 /*IsVarArg=*/false);
1306 auto *FnPtrTy = FnTy->getPointerTo();
1307
1308 // Add header fields for the resume and destroy functions.
1309 // We can rely on these being perfectly packed.
1310 (void)B.addField(FnPtrTy, std::nullopt, /*header*/ true);
1311 (void)B.addField(FnPtrTy, std::nullopt, /*header*/ true);
1312
1313 // PromiseAlloca field needs to be explicitly added here because it's
1314 // a header field with a fixed offset based on its alignment. Hence it
1315 // needs special handling and cannot be added to FrameData.Allocas.
1316 if (PromiseAlloca)
1317 FrameData.setFieldIndex(
1318 PromiseAlloca, B.addFieldForAlloca(PromiseAlloca, /*header*/ true));
1319
1320 // Add a field to store the suspend index. This doesn't need to
1321 // be in the header.
1322 unsigned IndexBits = std::max(1U, Log2_64_Ceil(Shape.CoroSuspends.size()));
1323 Type *IndexType = Type::getIntNTy(C, IndexBits);
1324
1325 SwitchIndexFieldId = B.addField(IndexType, std::nullopt);
1326 } else {
1327 assert(PromiseAlloca == nullptr && "lowering doesn't support promises");
1328 }
1329
1330 // Because multiple allocas may own the same field slot,
1331 // we add allocas to field here.
1332 B.addFieldForAllocas(F, FrameData, Shape);
1333 // Add PromiseAlloca to Allocas list so that
1334 // 1. updateLayoutIndex could update its index after
1335 // `performOptimizedStructLayout`
1336 // 2. it is processed in insertSpills.
1337 if (Shape.ABI == coro::ABI::Switch && PromiseAlloca)
1338 // We assume that the promise alloca won't be modified before
1339 // CoroBegin and no alias will be create before CoroBegin.
1340 FrameData.Allocas.emplace_back(
1341 PromiseAlloca, DenseMap<Instruction *, std::optional<APInt>>{}, false);
1342 // Create an entry for every spilled value.
1343 for (auto &S : FrameData.Spills) {
1344 Type *FieldType = S.first->getType();
1345 // For byval arguments, we need to store the pointed value in the frame,
1346 // instead of the pointer itself.
1347 if (const Argument *A = dyn_cast<Argument>(S.first))
1348 if (A->hasByValAttr())
1349 FieldType = A->getParamByValType();
1350 FieldIDType Id = B.addField(FieldType, std::nullopt, false /*header*/,
1351 true /*IsSpillOfValue*/);
1352 FrameData.setFieldIndex(S.first, Id);
1353 }
1354
1355 B.finish(FrameTy);
1356 FrameData.updateLayoutIndex(B);
1357 Shape.FrameAlign = B.getStructAlign();
1358 Shape.FrameSize = B.getStructSize();
1359
1360 switch (Shape.ABI) {
1361 case coro::ABI::Switch: {
1362 // In the switch ABI, remember the switch-index field.
1363 auto IndexField = B.getLayoutField(*SwitchIndexFieldId);
1364 Shape.SwitchLowering.IndexField = IndexField.LayoutFieldIndex;
1365 Shape.SwitchLowering.IndexAlign = IndexField.Alignment.value();
1366 Shape.SwitchLowering.IndexOffset = IndexField.Offset;
1367
1368 // Also round the frame size up to a multiple of its alignment, as is
1369 // generally expected in C/C++.
1370 Shape.FrameSize = alignTo(Shape.FrameSize, Shape.FrameAlign);
1371 break;
1372 }
1373
1374 // In the retcon ABI, remember whether the frame is inline in the storage.
1375 case coro::ABI::Retcon:
1376 case coro::ABI::RetconOnce: {
1377 auto Id = Shape.getRetconCoroId();
1379 = (B.getStructSize() <= Id->getStorageSize() &&
1380 B.getStructAlign() <= Id->getStorageAlignment());
1381 break;
1382 }
1383 case coro::ABI::Async: {
1386 // Also make the final context size a multiple of the context alignment to
1387 // make allocation easier for allocators.
1391 if (Shape.AsyncLowering.getContextAlignment() < Shape.FrameAlign) {
1393 "The alignment requirment of frame variables cannot be higher than "
1394 "the alignment of the async function context");
1395 }
1396 break;
1397 }
1398 }
1399
1400 return FrameTy;
1401}
1402
1403// We use a pointer use visitor to track how an alloca is being used.
1404// The goal is to be able to answer the following three questions:
1405// 1. Should this alloca be allocated on the frame instead.
1406// 2. Could the content of the alloca be modified prior to CoroBegn, which would
1407// require copying the data from alloca to the frame after CoroBegin.
1408// 3. Is there any alias created for this alloca prior to CoroBegin, but used
1409// after CoroBegin. In that case, we will need to recreate the alias after
1410// CoroBegin based off the frame. To answer question 1, we track two things:
1411// a. List of all BasicBlocks that use this alloca or any of the aliases of
1412// the alloca. In the end, we check if there exists any two basic blocks that
1413// cross suspension points. If so, this alloca must be put on the frame. b.
1414// Whether the alloca or any alias of the alloca is escaped at some point,
1415// either by storing the address somewhere, or the address is used in a
1416// function call that might capture. If it's ever escaped, this alloca must be
1417// put on the frame conservatively.
1418// To answer quetion 2, we track through the variable MayWriteBeforeCoroBegin.
1419// Whenever a potential write happens, either through a store instruction, a
1420// function call or any of the memory intrinsics, we check whether this
1421// instruction is prior to CoroBegin. To answer question 3, we track the offsets
1422// of all aliases created for the alloca prior to CoroBegin but used after
1423// CoroBegin. std::optional is used to be able to represent the case when the
1424// offset is unknown (e.g. when you have a PHINode that takes in different
1425// offset values). We cannot handle unknown offsets and will assert. This is the
1426// potential issue left out. An ideal solution would likely require a
1427// significant redesign.
1428namespace {
1429struct AllocaUseVisitor : PtrUseVisitor<AllocaUseVisitor> {
1431 AllocaUseVisitor(const DataLayout &DL, const DominatorTree &DT,
1432 const CoroBeginInst &CB, const SuspendCrossingInfo &Checker,
1433 bool ShouldUseLifetimeStartInfo)
1434 : PtrUseVisitor(DL), DT(DT), CoroBegin(CB), Checker(Checker),
1435 ShouldUseLifetimeStartInfo(ShouldUseLifetimeStartInfo) {}
1436
1437 void visit(Instruction &I) {
1438 Users.insert(&I);
1439 Base::visit(I);
1440 // If the pointer is escaped prior to CoroBegin, we have to assume it would
1441 // be written into before CoroBegin as well.
1442 if (PI.isEscaped() && !DT.dominates(&CoroBegin, PI.getEscapingInst())) {
1443 MayWriteBeforeCoroBegin = true;
1444 }
1445 }
1446 // We need to provide this overload as PtrUseVisitor uses a pointer based
1447 // visiting function.
1448 void visit(Instruction *I) { return visit(*I); }
1449
1450 void visitPHINode(PHINode &I) {
1451 enqueueUsers(I);
1452 handleAlias(I);
1453 }
1454
1456 enqueueUsers(I);
1457 handleAlias(I);
1458 }
1459
1460 void visitStoreInst(StoreInst &SI) {
1461 // Regardless whether the alias of the alloca is the value operand or the
1462 // pointer operand, we need to assume the alloca is been written.
1463 handleMayWrite(SI);
1464
1465 if (SI.getValueOperand() != U->get())
1466 return;
1467
1468 // We are storing the pointer into a memory location, potentially escaping.
1469 // As an optimization, we try to detect simple cases where it doesn't
1470 // actually escape, for example:
1471 // %ptr = alloca ..
1472 // %addr = alloca ..
1473 // store %ptr, %addr
1474 // %x = load %addr
1475 // ..
1476 // If %addr is only used by loading from it, we could simply treat %x as
1477 // another alias of %ptr, and not considering %ptr being escaped.
1478 auto IsSimpleStoreThenLoad = [&]() {
1479 auto *AI = dyn_cast<AllocaInst>(SI.getPointerOperand());
1480 // If the memory location we are storing to is not an alloca, it
1481 // could be an alias of some other memory locations, which is difficult
1482 // to analyze.
1483 if (!AI)
1484 return false;
1485 // StoreAliases contains aliases of the memory location stored into.
1486 SmallVector<Instruction *, 4> StoreAliases = {AI};
1487 while (!StoreAliases.empty()) {
1488 Instruction *I = StoreAliases.pop_back_val();
1489 for (User *U : I->users()) {
1490 // If we are loading from the memory location, we are creating an
1491 // alias of the original pointer.
1492 if (auto *LI = dyn_cast<LoadInst>(U)) {
1493 enqueueUsers(*LI);
1494 handleAlias(*LI);
1495 continue;
1496 }
1497 // If we are overriding the memory location, the pointer certainly
1498 // won't escape.
1499 if (auto *S = dyn_cast<StoreInst>(U))
1500 if (S->getPointerOperand() == I)
1501 continue;
1502 if (auto *II = dyn_cast<IntrinsicInst>(U))
1503 if (II->isLifetimeStartOrEnd())
1504 continue;
1505 // BitCastInst creats aliases of the memory location being stored
1506 // into.
1507 if (auto *BI = dyn_cast<BitCastInst>(U)) {
1508 StoreAliases.push_back(BI);
1509 continue;
1510 }
1511 return false;
1512 }
1513 }
1514
1515 return true;
1516 };
1517
1518 if (!IsSimpleStoreThenLoad())
1519 PI.setEscaped(&SI);
1520 }
1521
1522 // All mem intrinsics modify the data.
1523 void visitMemIntrinsic(MemIntrinsic &MI) { handleMayWrite(MI); }
1524
1525 void visitBitCastInst(BitCastInst &BC) {
1527 handleAlias(BC);
1528 }
1529
1532 handleAlias(ASC);
1533 }
1534
1536 // The base visitor will adjust Offset accordingly.
1538 handleAlias(GEPI);
1539 }
1540
1542 // When we found the lifetime markers refers to a
1543 // subrange of the original alloca, ignore the lifetime
1544 // markers to avoid misleading the analysis.
1545 if (II.getIntrinsicID() != Intrinsic::lifetime_start || !IsOffsetKnown ||
1546 !Offset.isZero())
1547 return Base::visitIntrinsicInst(II);
1548 LifetimeStarts.insert(&II);
1549 }
1550
1551 void visitCallBase(CallBase &CB) {
1552 for (unsigned Op = 0, OpCount = CB.arg_size(); Op < OpCount; ++Op)
1553 if (U->get() == CB.getArgOperand(Op) && !CB.doesNotCapture(Op))
1554 PI.setEscaped(&CB);
1555 handleMayWrite(CB);
1556 }
1557
1558 bool getShouldLiveOnFrame() const {
1559 if (!ShouldLiveOnFrame)
1560 ShouldLiveOnFrame = computeShouldLiveOnFrame();
1561 return *ShouldLiveOnFrame;
1562 }
1563
1564 bool getMayWriteBeforeCoroBegin() const { return MayWriteBeforeCoroBegin; }
1565
1566 DenseMap<Instruction *, std::optional<APInt>> getAliasesCopy() const {
1567 assert(getShouldLiveOnFrame() && "This method should only be called if the "
1568 "alloca needs to live on the frame.");
1569 for (const auto &P : AliasOffetMap)
1570 if (!P.second)
1571 report_fatal_error("Unable to handle an alias with unknown offset "
1572 "created before CoroBegin.");
1573 return AliasOffetMap;
1574 }
1575
1576private:
1577 const DominatorTree &DT;
1578 const CoroBeginInst &CoroBegin;
1579 const SuspendCrossingInfo &Checker;
1580 // All alias to the original AllocaInst, created before CoroBegin and used
1581 // after CoroBegin. Each entry contains the instruction and the offset in the
1582 // original Alloca. They need to be recreated after CoroBegin off the frame.
1585 SmallPtrSet<IntrinsicInst *, 2> LifetimeStarts{};
1586 bool MayWriteBeforeCoroBegin{false};
1587 bool ShouldUseLifetimeStartInfo{true};
1588
1589 mutable std::optional<bool> ShouldLiveOnFrame{};
1590
1591 bool computeShouldLiveOnFrame() const {
1592 // If lifetime information is available, we check it first since it's
1593 // more precise. We look at every pair of lifetime.start intrinsic and
1594 // every basic block that uses the pointer to see if they cross suspension
1595 // points. The uses cover both direct uses as well as indirect uses.
1596 if (ShouldUseLifetimeStartInfo && !LifetimeStarts.empty()) {
1597 for (auto *I : Users)
1598 for (auto *S : LifetimeStarts)
1599 if (Checker.isDefinitionAcrossSuspend(*S, I))
1600 return true;
1601 // Addresses are guaranteed to be identical after every lifetime.start so
1602 // we cannot use the local stack if the address escaped and there is a
1603 // suspend point between lifetime markers. This should also cover the
1604 // case of a single lifetime.start intrinsic in a loop with suspend point.
1605 if (PI.isEscaped()) {
1606 for (auto *A : LifetimeStarts) {
1607 for (auto *B : LifetimeStarts) {
1608 if (Checker.hasPathOrLoopCrossingSuspendPoint(A->getParent(),
1609 B->getParent()))
1610 return true;
1611 }
1612 }
1613 }
1614 return false;
1615 }
1616 // FIXME: Ideally the isEscaped check should come at the beginning.
1617 // However there are a few loose ends that need to be fixed first before
1618 // we can do that. We need to make sure we are not over-conservative, so
1619 // that the data accessed in-between await_suspend and symmetric transfer
1620 // is always put on the stack, and also data accessed after coro.end is
1621 // always put on the stack (esp the return object). To fix that, we need
1622 // to:
1623 // 1) Potentially treat sret as nocapture in calls
1624 // 2) Special handle the return object and put it on the stack
1625 // 3) Utilize lifetime.end intrinsic
1626 if (PI.isEscaped())
1627 return true;
1628
1629 for (auto *U1 : Users)
1630 for (auto *U2 : Users)
1631 if (Checker.isDefinitionAcrossSuspend(*U1, U2))
1632 return true;
1633
1634 return false;
1635 }
1636
1637 void handleMayWrite(const Instruction &I) {
1638 if (!DT.dominates(&CoroBegin, &I))
1639 MayWriteBeforeCoroBegin = true;
1640 }
1641
1642 bool usedAfterCoroBegin(Instruction &I) {
1643 for (auto &U : I.uses())
1644 if (DT.dominates(&CoroBegin, U))
1645 return true;
1646 return false;
1647 }
1648
1649 void handleAlias(Instruction &I) {
1650 // We track all aliases created prior to CoroBegin but used after.
1651 // These aliases may need to be recreated after CoroBegin if the alloca
1652 // need to live on the frame.
1653 if (DT.dominates(&CoroBegin, &I) || !usedAfterCoroBegin(I))
1654 return;
1655
1656 if (!IsOffsetKnown) {
1657 AliasOffetMap[&I].reset();
1658 } else {
1659 auto Itr = AliasOffetMap.find(&I);
1660 if (Itr == AliasOffetMap.end()) {
1661 AliasOffetMap[&I] = Offset;
1662 } else if (Itr->second && *Itr->second != Offset) {
1663 // If we have seen two different possible values for this alias, we set
1664 // it to empty.
1665 AliasOffetMap[&I].reset();
1666 }
1667 }
1668 }
1669};
1670} // namespace
1671
1672// We need to make room to insert a spill after initial PHIs, but before
1673// catchswitch instruction. Placing it before violates the requirement that
1674// catchswitch, like all other EHPads must be the first nonPHI in a block.
1675//
1676// Split away catchswitch into a separate block and insert in its place:
1677//
1678// cleanuppad <InsertPt> cleanupret.
1679//
1680// cleanupret instruction will act as an insert point for the spill.
1682 BasicBlock *CurrentBlock = CatchSwitch->getParent();
1683 BasicBlock *NewBlock = CurrentBlock->splitBasicBlock(CatchSwitch);
1684 CurrentBlock->getTerminator()->eraseFromParent();
1685
1686 auto *CleanupPad =
1687 CleanupPadInst::Create(CatchSwitch->getParentPad(), {}, "", CurrentBlock);
1688 auto *CleanupRet =
1689 CleanupReturnInst::Create(CleanupPad, NewBlock, CurrentBlock);
1690 return CleanupRet;
1691}
1692
1693static void createFramePtr(coro::Shape &Shape) {
1694 auto *CB = Shape.CoroBegin;
1696 StructType *FrameTy = Shape.FrameTy;
1697 PointerType *FramePtrTy = FrameTy->getPointerTo();
1698 Shape.FramePtr =
1699 cast<Instruction>(Builder.CreateBitCast(CB, FramePtrTy, "FramePtr"));
1700}
1701
1702// Replace all alloca and SSA values that are accessed across suspend points
1703// with GetElementPointer from coroutine frame + loads and stores. Create an
1704// AllocaSpillBB that will become the new entry block for the resume parts of
1705// the coroutine:
1706//
1707// %hdl = coro.begin(...)
1708// whatever
1709//
1710// becomes:
1711//
1712// %hdl = coro.begin(...)
1713// %FramePtr = bitcast i8* hdl to %f.frame*
1714// br label %AllocaSpillBB
1715//
1716// AllocaSpillBB:
1717// ; geps corresponding to allocas that were moved to coroutine frame
1718// br label PostSpill
1719//
1720// PostSpill:
1721// whatever
1722//
1723//
1724static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape) {
1725 auto *CB = Shape.CoroBegin;
1726 LLVMContext &C = CB->getContext();
1727 Function *F = CB->getFunction();
1729 StructType *FrameTy = Shape.FrameTy;
1730 Value *FramePtr = Shape.FramePtr;
1731 DominatorTree DT(*F);
1733
1734 // Create a GEP with the given index into the coroutine frame for the original
1735 // value Orig. Appends an extra 0 index for array-allocas, preserving the
1736 // original type.
1737 auto GetFramePointer = [&](Value *Orig) -> Value * {
1738 FieldIDType Index = FrameData.getFieldIndex(Orig);
1739 SmallVector<Value *, 3> Indices = {
1742 };
1743
1744 if (auto *AI = dyn_cast<AllocaInst>(Orig)) {
1745 if (auto *CI = dyn_cast<ConstantInt>(AI->getArraySize())) {
1746 auto Count = CI->getValue().getZExtValue();
1747 if (Count > 1) {
1749 }
1750 } else {
1751 report_fatal_error("Coroutines cannot handle non static allocas yet");
1752 }
1753 }
1754
1755 auto GEP = cast<GetElementPtrInst>(
1756 Builder.CreateInBoundsGEP(FrameTy, FramePtr, Indices));
1757 if (auto *AI = dyn_cast<AllocaInst>(Orig)) {
1758 if (FrameData.getDynamicAlign(Orig) != 0) {
1759 assert(FrameData.getDynamicAlign(Orig) == AI->getAlign().value());
1760 auto *M = AI->getModule();
1761 auto *IntPtrTy = M->getDataLayout().getIntPtrType(AI->getType());
1762 auto *PtrValue = Builder.CreatePtrToInt(GEP, IntPtrTy);
1763 auto *AlignMask =
1764 ConstantInt::get(IntPtrTy, AI->getAlign().value() - 1);
1765 PtrValue = Builder.CreateAdd(PtrValue, AlignMask);
1766 PtrValue = Builder.CreateAnd(PtrValue, Builder.CreateNot(AlignMask));
1767 return Builder.CreateIntToPtr(PtrValue, AI->getType());
1768 }
1769 // If the type of GEP is not equal to the type of AllocaInst, it implies
1770 // that the AllocaInst may be reused in the Frame slot of other
1771 // AllocaInst. So We cast GEP to the AllocaInst here to re-use
1772 // the Frame storage.
1773 //
1774 // Note: If we change the strategy dealing with alignment, we need to refine
1775 // this casting.
1776 if (GEP->getType() != Orig->getType())
1777 return Builder.CreateBitCast(GEP, Orig->getType(),
1778 Orig->getName() + Twine(".cast"));
1779 }
1780 return GEP;
1781 };
1782
1783 for (auto const &E : FrameData.Spills) {
1784 Value *Def = E.first;
1785 auto SpillAlignment = Align(FrameData.getAlign(Def));
1786 // Create a store instruction storing the value into the
1787 // coroutine frame.
1788 Instruction *InsertPt = nullptr;
1789 Type *ByValTy = nullptr;
1790 if (auto *Arg = dyn_cast<Argument>(Def)) {
1791 // For arguments, we will place the store instruction right after
1792 // the coroutine frame pointer instruction, i.e. bitcast of
1793 // coro.begin from i8* to %f.frame*.
1794 InsertPt = Shape.getInsertPtAfterFramePtr();
1795
1796 // If we're spilling an Argument, make sure we clear 'nocapture'
1797 // from the coroutine function.
1798 Arg->getParent()->removeParamAttr(Arg->getArgNo(), Attribute::NoCapture);
1799
1800 if (Arg->hasByValAttr())
1801 ByValTy = Arg->getParamByValType();
1802 } else if (auto *CSI = dyn_cast<AnyCoroSuspendInst>(Def)) {
1803 // Don't spill immediately after a suspend; splitting assumes
1804 // that the suspend will be followed by a branch.
1805 InsertPt = CSI->getParent()->getSingleSuccessor()->getFirstNonPHI();
1806 } else {
1807 auto *I = cast<Instruction>(Def);
1808 if (!DT.dominates(CB, I)) {
1809 // If it is not dominated by CoroBegin, then spill should be
1810 // inserted immediately after CoroFrame is computed.
1811 InsertPt = Shape.getInsertPtAfterFramePtr();
1812 } else if (auto *II = dyn_cast<InvokeInst>(I)) {
1813 // If we are spilling the result of the invoke instruction, split
1814 // the normal edge and insert the spill in the new block.
1815 auto *NewBB = SplitEdge(II->getParent(), II->getNormalDest());
1816 InsertPt = NewBB->getTerminator();
1817 } else if (isa<PHINode>(I)) {
1818 // Skip the PHINodes and EH pads instructions.
1819 BasicBlock *DefBlock = I->getParent();
1820 if (auto *CSI = dyn_cast<CatchSwitchInst>(DefBlock->getTerminator()))
1821 InsertPt = splitBeforeCatchSwitch(CSI);
1822 else
1823 InsertPt = &*DefBlock->getFirstInsertionPt();
1824 } else {
1825 assert(!I->isTerminator() && "unexpected terminator");
1826 // For all other values, the spill is placed immediately after
1827 // the definition.
1828 InsertPt = I->getNextNode();
1829 }
1830 }
1831
1832 auto Index = FrameData.getFieldIndex(Def);
1833 Builder.SetInsertPoint(InsertPt);
1834 auto *G = Builder.CreateConstInBoundsGEP2_32(
1835 FrameTy, FramePtr, 0, Index, Def->getName() + Twine(".spill.addr"));
1836 if (ByValTy) {
1837 // For byval arguments, we need to store the pointed value in the frame,
1838 // instead of the pointer itself.
1839 auto *Value = Builder.CreateLoad(ByValTy, Def);
1840 Builder.CreateAlignedStore(Value, G, SpillAlignment);
1841 } else {
1842 Builder.CreateAlignedStore(Def, G, SpillAlignment);
1843 }
1844
1845 BasicBlock *CurrentBlock = nullptr;
1846 Value *CurrentReload = nullptr;
1847 for (auto *U : E.second) {
1848 // If we have not seen the use block, create a load instruction to reload
1849 // the spilled value from the coroutine frame. Populates the Value pointer
1850 // reference provided with the frame GEP.
1851 if (CurrentBlock != U->getParent()) {
1852 CurrentBlock = U->getParent();
1853 Builder.SetInsertPoint(&*CurrentBlock->getFirstInsertionPt());
1854
1855 auto *GEP = GetFramePointer(E.first);
1856 GEP->setName(E.first->getName() + Twine(".reload.addr"));
1857 if (ByValTy)
1858 CurrentReload = GEP;
1859 else
1860 CurrentReload = Builder.CreateAlignedLoad(
1861 FrameTy->getElementType(FrameData.getFieldIndex(E.first)), GEP,
1862 SpillAlignment, E.first->getName() + Twine(".reload"));
1863
1865 // Try best to find dbg.declare. If the spill is a temp, there may not
1866 // be a direct dbg.declare. Walk up the load chain to find one from an
1867 // alias.
1868 if (F->getSubprogram()) {
1869 auto *CurDef = Def;
1870 while (DIs.empty() && isa<LoadInst>(CurDef)) {
1871 auto *LdInst = cast<LoadInst>(CurDef);
1872 // Only consider ptr to ptr same type load.
1873 if (LdInst->getPointerOperandType() != LdInst->getType())
1874 break;
1875 CurDef = LdInst->getPointerOperand();
1876 DIs = FindDbgDeclareUses(CurDef);
1877 }
1878 }
1879
1880 for (DbgDeclareInst *DDI : DIs) {
1881 bool AllowUnresolved = false;
1882 // This dbg.declare is preserved for all coro-split function
1883 // fragments. It will be unreachable in the main function, and
1884 // processed by coro::salvageDebugInfo() by CoroCloner.
1885 DIBuilder(*CurrentBlock->getParent()->getParent(), AllowUnresolved)
1886 .insertDeclare(CurrentReload, DDI->getVariable(),
1887 DDI->getExpression(), DDI->getDebugLoc(),
1888 &*Builder.GetInsertPoint());
1889 // This dbg.declare is for the main function entry point. It
1890 // will be deleted in all coro-split functions.
1891 coro::salvageDebugInfo(ArgToAllocaMap, DDI, Shape.OptimizeFrame);
1892 }
1893 }
1894
1895 // If we have a single edge PHINode, remove it and replace it with a
1896 // reload from the coroutine frame. (We already took care of multi edge
1897 // PHINodes by rewriting them in the rewritePHIs function).
1898 if (auto *PN = dyn_cast<PHINode>(U)) {
1899 assert(PN->getNumIncomingValues() == 1 &&
1900 "unexpected number of incoming "
1901 "values in the PHINode");
1902 PN->replaceAllUsesWith(CurrentReload);
1903 PN->eraseFromParent();
1904 continue;
1905 }
1906
1907 // Replace all uses of CurrentValue in the current instruction with
1908 // reload.
1909 U->replaceUsesOfWith(Def, CurrentReload);
1910 }
1911 }
1912
1913 BasicBlock *FramePtrBB = Shape.getInsertPtAfterFramePtr()->getParent();
1914
1915 auto SpillBlock = FramePtrBB->splitBasicBlock(
1916 Shape.getInsertPtAfterFramePtr(), "AllocaSpillBB");
1917 SpillBlock->splitBasicBlock(&SpillBlock->front(), "PostSpill");
1918 Shape.AllocaSpillBlock = SpillBlock;
1919
1920 // retcon and retcon.once lowering assumes all uses have been sunk.
1921 if (Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce ||
1922 Shape.ABI == coro::ABI::Async) {
1923 // If we found any allocas, replace all of their remaining uses with Geps.
1924 Builder.SetInsertPoint(&SpillBlock->front());
1925 for (const auto &P : FrameData.Allocas) {
1926 AllocaInst *Alloca = P.Alloca;
1927 auto *G = GetFramePointer(Alloca);
1928
1929 // We are not using ReplaceInstWithInst(P.first, cast<Instruction>(G))
1930 // here, as we are changing location of the instruction.
1931 G->takeName(Alloca);
1932 Alloca->replaceAllUsesWith(G);
1933 Alloca->eraseFromParent();
1934 }
1935 return;
1936 }
1937
1938 // If we found any alloca, replace all of their remaining uses with GEP
1939 // instructions. To remain debugbility, we replace the uses of allocas for
1940 // dbg.declares and dbg.values with the reload from the frame.
1941 // Note: We cannot replace the alloca with GEP instructions indiscriminately,
1942 // as some of the uses may not be dominated by CoroBegin.
1943 Builder.SetInsertPoint(&Shape.AllocaSpillBlock->front());
1944 SmallVector<Instruction *, 4> UsersToUpdate;
1945 for (const auto &A : FrameData.Allocas) {
1946 AllocaInst *Alloca = A.Alloca;
1947 UsersToUpdate.clear();
1948 for (User *U : Alloca->users()) {
1949 auto *I = cast<Instruction>(U);
1950 if (DT.dominates(CB, I))
1951 UsersToUpdate.push_back(I);
1952 }
1953 if (UsersToUpdate.empty())
1954 continue;
1955 auto *G = GetFramePointer(Alloca);
1956 G->setName(Alloca->getName() + Twine(".reload.addr"));
1957
1959 findDbgUsers(DIs, Alloca);
1960 for (auto *DVI : DIs)
1961 DVI->replaceUsesOfWith(Alloca, G);
1962
1963 for (Instruction *I : UsersToUpdate) {
1964 // It is meaningless to retain the lifetime intrinsics refer for the
1965 // member of coroutine frames and the meaningless lifetime intrinsics
1966 // are possible to block further optimizations.
1967 if (I->isLifetimeStartOrEnd()) {
1968 I->eraseFromParent();
1969 continue;
1970 }
1971
1972 I->replaceUsesOfWith(Alloca, G);
1973 }
1974 }
1975 Builder.SetInsertPoint(Shape.getInsertPtAfterFramePtr());
1976 for (const auto &A : FrameData.Allocas) {
1977 AllocaInst *Alloca = A.Alloca;
1978 if (A.MayWriteBeforeCoroBegin) {
1979 // isEscaped really means potentially modified before CoroBegin.
1980 if (Alloca->isArrayAllocation())
1982 "Coroutines cannot handle copying of array allocas yet");
1983
1984 auto *G = GetFramePointer(Alloca);
1985 auto *Value = Builder.CreateLoad(Alloca->getAllocatedType(), Alloca);
1986 Builder.CreateStore(Value, G);
1987 }
1988 // For each alias to Alloca created before CoroBegin but used after
1989 // CoroBegin, we recreate them after CoroBegin by appplying the offset
1990 // to the pointer in the frame.
1991 for (const auto &Alias : A.Aliases) {
1992 auto *FramePtr = GetFramePointer(Alloca);
1993 auto *FramePtrRaw =
1994 Builder.CreateBitCast(FramePtr, Type::getInt8PtrTy(C));
1995 auto &Value = *Alias.second;
1996 auto ITy = IntegerType::get(C, Value.getBitWidth());
1997 auto *AliasPtr = Builder.CreateGEP(Type::getInt8Ty(C), FramePtrRaw,
1998 ConstantInt::get(ITy, Value));
1999 auto *AliasPtrTyped =
2000 Builder.CreateBitCast(AliasPtr, Alias.first->getType());
2001 Alias.first->replaceUsesWithIf(
2002 AliasPtrTyped, [&](Use &U) { return DT.dominates(CB, U); });
2003 }
2004 }
2005
2006 // PromiseAlloca is not collected in FrameData.Allocas. So we don't handle
2007 // the case that the PromiseAlloca may have writes before CoroBegin in the
2008 // above codes. And it may be problematic in edge cases. See
2009 // https://github.com/llvm/llvm-project/issues/57861 for an example.
2010 if (Shape.ABI == coro::ABI::Switch && Shape.SwitchLowering.PromiseAlloca) {
2012 // If there is memory accessing to promise alloca before CoroBegin;
2013 bool HasAccessingPromiseBeforeCB = llvm::any_of(PA->uses(), [&](Use &U) {
2014 auto *Inst = dyn_cast<Instruction>(U.getUser());
2015 if (!Inst || DT.dominates(CB, Inst))
2016 return false;
2017
2018 if (auto *CI = dyn_cast<CallInst>(Inst)) {
2019 // It is fine if the call wouldn't write to the Promise.
2020 // This is possible for @llvm.coro.id intrinsics, which
2021 // would take the promise as the second argument as a
2022 // marker.
2023 if (CI->onlyReadsMemory() ||
2024 CI->onlyReadsMemory(CI->getArgOperandNo(&U)))
2025 return false;
2026 return true;
2027 }
2028
2029 return isa<StoreInst>(Inst) ||
2030 // It may take too much time to track the uses.
2031 // Be conservative about the case the use may escape.
2032 isa<GetElementPtrInst>(Inst) ||
2033 // There would always be a bitcast for the promise alloca
2034 // before we enabled Opaque pointers. And now given
2035 // opaque pointers are enabled by default. This should be
2036 // fine.
2037 isa<BitCastInst>(Inst);
2038 });
2039 if (HasAccessingPromiseBeforeCB) {
2040 Builder.SetInsertPoint(Shape.getInsertPtAfterFramePtr());
2041 auto *G = GetFramePointer(PA);
2042 auto *Value = Builder.CreateLoad(PA->getAllocatedType(), PA);
2043 Builder.CreateStore(Value, G);
2044 }
2045 }
2046}
2047
2048// Moves the values in the PHIs in SuccBB that correspong to PredBB into a new
2049// PHI in InsertedBB.
2051 BasicBlock *InsertedBB,
2052 BasicBlock *PredBB,
2053 PHINode *UntilPHI = nullptr) {
2054 auto *PN = cast<PHINode>(&SuccBB->front());
2055 do {
2056 int Index = PN->getBasicBlockIndex(InsertedBB);
2057 Value *V = PN->getIncomingValue(Index);
2058 PHINode *InputV = PHINode::Create(
2059 V->getType(), 1, V->getName() + Twine(".") + SuccBB->getName(),
2060 &InsertedBB->front());
2061 InputV->addIncoming(V, PredBB);
2062 PN->setIncomingValue(Index, InputV);
2063 PN = dyn_cast<PHINode>(PN->getNextNode());
2064 } while (PN != UntilPHI);
2065}
2066
2067// Rewrites the PHI Nodes in a cleanuppad.
2068static void rewritePHIsForCleanupPad(BasicBlock *CleanupPadBB,
2069 CleanupPadInst *CleanupPad) {
2070 // For every incoming edge to a CleanupPad we will create a new block holding
2071 // all incoming values in single-value PHI nodes. We will then create another
2072 // block to act as a dispather (as all unwind edges for related EH blocks
2073 // must be the same).
2074 //
2075 // cleanuppad:
2076 // %2 = phi i32[%0, %catchswitch], [%1, %catch.1]
2077 // %3 = cleanuppad within none []
2078 //
2079 // It will create:
2080 //
2081 // cleanuppad.corodispatch
2082 // %2 = phi i8[0, %catchswitch], [1, %catch.1]
2083 // %3 = cleanuppad within none []
2084 // switch i8 % 2, label %unreachable
2085 // [i8 0, label %cleanuppad.from.catchswitch
2086 // i8 1, label %cleanuppad.from.catch.1]
2087 // cleanuppad.from.catchswitch:
2088 // %4 = phi i32 [%0, %catchswitch]
2089 // br %label cleanuppad
2090 // cleanuppad.from.catch.1:
2091 // %6 = phi i32 [%1, %catch.1]
2092 // br %label cleanuppad
2093 // cleanuppad:
2094 // %8 = phi i32 [%4, %cleanuppad.from.catchswitch],
2095 // [%6, %cleanuppad.from.catch.1]
2096
2097 // Unreachable BB, in case switching on an invalid value in the dispatcher.
2098 auto *UnreachBB = BasicBlock::Create(
2099 CleanupPadBB->getContext(), "unreachable", CleanupPadBB->getParent());
2100 IRBuilder<> Builder(UnreachBB);
2101 Builder.CreateUnreachable();
2102
2103 // Create a new cleanuppad which will be the dispatcher.
2104 auto *NewCleanupPadBB =
2105 BasicBlock::Create(CleanupPadBB->getContext(),
2106 CleanupPadBB->getName() + Twine(".corodispatch"),
2107 CleanupPadBB->getParent(), CleanupPadBB);
2108 Builder.SetInsertPoint(NewCleanupPadBB);
2109 auto *SwitchType = Builder.getInt8Ty();
2110 auto *SetDispatchValuePN =
2111 Builder.CreatePHI(SwitchType, pred_size(CleanupPadBB));
2112 CleanupPad->removeFromParent();
2113 CleanupPad->insertAfter(SetDispatchValuePN);
2114 auto *SwitchOnDispatch = Builder.CreateSwitch(SetDispatchValuePN, UnreachBB,
2115 pred_size(CleanupPadBB));
2116
2117 int SwitchIndex = 0;
2118 SmallVector<BasicBlock *, 8> Preds(predecessors(CleanupPadBB));
2119 for (BasicBlock *Pred : Preds) {
2120 // Create a new cleanuppad and move the PHI values to there.
2121 auto *CaseBB = BasicBlock::Create(CleanupPadBB->getContext(),
2122 CleanupPadBB->getName() +
2123 Twine(".from.") + Pred->getName(),
2124 CleanupPadBB->getParent(), CleanupPadBB);
2125 updatePhiNodes(CleanupPadBB, Pred, CaseBB);
2126 CaseBB->setName(CleanupPadBB->getName() + Twine(".from.") +
2127 Pred->getName());
2128 Builder.SetInsertPoint(CaseBB);
2129 Builder.CreateBr(CleanupPadBB);
2130 movePHIValuesToInsertedBlock(CleanupPadBB, CaseBB, NewCleanupPadBB);
2131
2132 // Update this Pred to the new unwind point.
2133 setUnwindEdgeTo(Pred->getTerminator(), NewCleanupPadBB);
2134
2135 // Setup the switch in the dispatcher.
2136 auto *SwitchConstant = ConstantInt::get(SwitchType, SwitchIndex);
2137 SetDispatchValuePN->addIncoming(SwitchConstant, Pred);
2138 SwitchOnDispatch->addCase(SwitchConstant, CaseBB);
2139 SwitchIndex++;
2140 }
2141}
2142
2145 for (auto &BB : F) {
2146 for (auto &Phi : BB.phis()) {
2147 if (Phi.getNumIncomingValues() == 1) {
2148 Worklist.push_back(&Phi);
2149 } else
2150 break;
2151 }
2152 }
2153 while (!Worklist.empty()) {
2154 auto *Phi = Worklist.pop_back_val();
2155 auto *OriginalValue = Phi->getIncomingValue(0);
2156 Phi->replaceAllUsesWith(OriginalValue);
2157 }
2158}
2159
2160static void rewritePHIs(BasicBlock &BB) {
2161 // For every incoming edge we will create a block holding all
2162 // incoming values in a single PHI nodes.
2163 //
2164 // loop:
2165 // %n.val = phi i32[%n, %entry], [%inc, %loop]
2166 //
2167 // It will create:
2168 //
2169 // loop.from.entry:
2170 // %n.loop.pre = phi i32 [%n, %entry]
2171 // br %label loop
2172 // loop.from.loop:
2173 // %inc.loop.pre = phi i32 [%inc, %loop]
2174 // br %label loop
2175 //
2176 // After this rewrite, further analysis will ignore any phi nodes with more
2177 // than one incoming edge.
2178
2179 // TODO: Simplify PHINodes in the basic block to remove duplicate
2180 // predecessors.
2181
2182 // Special case for CleanupPad: all EH blocks must have the same unwind edge
2183 // so we need to create an additional "dispatcher" block.
2184 if (auto *CleanupPad =
2185 dyn_cast_or_null<CleanupPadInst>(BB.getFirstNonPHI())) {
2187 for (BasicBlock *Pred : Preds) {
2188 if (CatchSwitchInst *CS =
2189 dyn_cast<CatchSwitchInst>(Pred->getTerminator())) {
2190 // CleanupPad with a CatchSwitch predecessor: therefore this is an
2191 // unwind destination that needs to be handle specially.
2192 assert(CS->getUnwindDest() == &BB);
2193 (void)CS;
2194 rewritePHIsForCleanupPad(&BB, CleanupPad);
2195 return;
2196 }
2197 }
2198 }
2199
2200 LandingPadInst *LandingPad = nullptr;
2201 PHINode *ReplPHI = nullptr;
2202 if ((LandingPad = dyn_cast_or_null<LandingPadInst>(BB.getFirstNonPHI()))) {
2203 // ehAwareSplitEdge will clone the LandingPad in all the edge blocks.
2204 // We replace the original landing pad with a PHINode that will collect the
2205 // results from all of them.
2206 ReplPHI = PHINode::Create(LandingPad->getType(), 1, "", LandingPad);
2207 ReplPHI->takeName(LandingPad);
2208 LandingPad->replaceAllUsesWith(ReplPHI);
2209 // We will erase the original landing pad at the end of this function after
2210 // ehAwareSplitEdge cloned it in the transition blocks.
2211 }
2212
2214 for (BasicBlock *Pred : Preds) {
2215 auto *IncomingBB = ehAwareSplitEdge(Pred, &BB, LandingPad, ReplPHI);
2216 IncomingBB->setName(BB.getName() + Twine(".from.") + Pred->getName());
2217
2218 // Stop the moving of values at ReplPHI, as this is either null or the PHI
2219 // that replaced the landing pad.
2220 movePHIValuesToInsertedBlock(&BB, IncomingBB, Pred, ReplPHI);
2221 }
2222
2223 if (LandingPad) {
2224 // Calls to ehAwareSplitEdge function cloned the original lading pad.
2225 // No longer need it.
2226 LandingPad->eraseFromParent();
2227 }
2228}
2229
2230static void rewritePHIs(Function &F) {
2232
2233 for (BasicBlock &BB : F)
2234 if (auto *PN = dyn_cast<PHINode>(&BB.front()))
2235 if (PN->getNumIncomingValues() > 1)
2236 WorkList.push_back(&BB);
2237
2238 for (BasicBlock *BB : WorkList)
2239 rewritePHIs(*BB);
2240}
2241
2242/// Default materializable callback
2243// Check for instructions that we can recreate on resume as opposed to spill
2244// the result into a coroutine frame.
2246 return (isa<CastInst>(&V) || isa<GetElementPtrInst>(&V) ||
2247 isa<BinaryOperator>(&V) || isa<CmpInst>(&V) || isa<SelectInst>(&V));
2248}
2249
2250// Check for structural coroutine intrinsics that should not be spilled into
2251// the coroutine frame.
2253 return isa<CoroIdInst>(&I) || isa<CoroSaveInst>(&I) ||
2254 isa<CoroSuspendInst>(&I);
2255}
2256
2257// For each instruction identified as materializable across the suspend point,
2258// and its associated DAG of other rematerializable instructions,
2259// recreate the DAG of instructions after the suspend point.
2261 const SmallMapVector<Instruction *, std::unique_ptr<RematGraph>, 8>
2262 &AllRemats) {
2263 // This has to be done in 2 phases
2264 // Do the remats and record the required defs to be replaced in the
2265 // original use instructions
2266 // Once all the remats are complete, replace the uses in the final
2267 // instructions with the new defs
2268 typedef struct {
2270 Instruction *Def;
2271 Instruction *Remat;
2272 } ProcessNode;
2273
2274 SmallVector<ProcessNode> FinalInstructionsToProcess;
2275
2276 for (const auto &E : AllRemats) {
2277 Instruction *Use = E.first;
2278 Instruction *CurrentMaterialization = nullptr;
2279 RematGraph *RG = E.second.get();
2281 SmallVector<Instruction *> InstructionsToProcess;
2282
2283 // If the target use is actually a suspend instruction then we have to
2284 // insert the remats into the end of the predecessor (there should only be
2285 // one). This is so that suspend blocks always have the suspend instruction
2286 // as the first instruction.
2287 auto InsertPoint = &*Use->getParent()->getFirstInsertionPt();
2288 if (isa<AnyCoroSuspendInst>(Use)) {
2289 BasicBlock *SuspendPredecessorBlock =
2290 Use->getParent()->getSinglePredecessor();
2291 assert(SuspendPredecessorBlock && "malformed coro suspend instruction");
2292 InsertPoint = SuspendPredecessorBlock->getTerminator();
2293 }
2294
2295 // Note: skip the first instruction as this is the actual use that we're
2296 // rematerializing everything for.
2297 auto I = RPOT.begin();
2298 ++I;
2299 for (; I != RPOT.end(); ++I) {
2300 Instruction *D = (*I)->Node;
2301 CurrentMaterialization = D->clone();
2302 CurrentMaterialization->setName(D->getName());
2303 CurrentMaterialization->insertBefore(InsertPoint);
2304 InsertPoint = CurrentMaterialization;
2305
2306 // Replace all uses of Def in the instructions being added as part of this
2307 // rematerialization group
2308 for (auto &I : InstructionsToProcess)
2309 I->replaceUsesOfWith(D, CurrentMaterialization);
2310
2311 // Don't replace the final use at this point as this can cause problems
2312 // for other materializations. Instead, for any final use that uses a
2313 // define that's being rematerialized, record the replace values
2314 for (unsigned i = 0, E = Use->getNumOperands(); i != E; ++i)
2315 if (Use->getOperand(i) == D) // Is this operand pointing to oldval?
2316 FinalInstructionsToProcess.push_back(
2317 {Use, D, CurrentMaterialization});
2318
2319 InstructionsToProcess.push_back(CurrentMaterialization);
2320 }
2321 }
2322
2323 // Finally, replace the uses with the defines that we've just rematerialized
2324 for (auto &R : FinalInstructionsToProcess) {
2325 if (auto *PN = dyn_cast<PHINode>(R.Use)) {
2326 assert(PN->getNumIncomingValues() == 1 && "unexpected number of incoming "
2327 "values in the PHINode");
2328 PN->replaceAllUsesWith(R.Remat);
2329 PN->eraseFromParent();
2330 continue;
2331 }
2332 R.Use->replaceUsesOfWith(R.Def, R.Remat);
2333 }
2334}
2335
2336// Splits the block at a particular instruction unless it is the first
2337// instruction in the block with a single predecessor.
2339 auto *BB = I->getParent();
2340 if (&BB->front() == I) {
2341 if (BB->getSinglePredecessor()) {
2342 BB->setName(Name);
2343 return BB;
2344 }
2345 }
2346 return BB->splitBasicBlock(I, Name);
2347}
2348
2349// Split above and below a particular instruction so that it
2350// will be all alone by itself in a block.
2351static void splitAround(Instruction *I, const Twine &Name) {
2353 splitBlockIfNotFirst(I->getNextNode(), "After" + Name);
2354}
2355
2356static bool isSuspendBlock(BasicBlock *BB) {
2357 return isa<AnyCoroSuspendInst>(BB->front());
2358}
2359
2361
2362/// Does control flow starting at the given block ever reach a suspend
2363/// instruction before reaching a block in VisitedOrFreeBBs?
2365 VisitedBlocksSet &VisitedOrFreeBBs) {
2366 // Eagerly try to add this block to the visited set. If it's already
2367 // there, stop recursing; this path doesn't reach a suspend before
2368 // either looping or reaching a freeing block.
2369 if (!VisitedOrFreeBBs.insert(From).second)
2370 return false;
2371
2372 // We assume that we'll already have split suspends into their own blocks.
2373 if (isSuspendBlock(From))
2374 return true;
2375
2376 // Recurse on the successors.
2377 for (auto *Succ : successors(From)) {
2378 if (isSuspendReachableFrom(Succ, VisitedOrFreeBBs))
2379 return true;
2380 }
2381
2382 return false;
2383}
2384
2385/// Is the given alloca "local", i.e. bounded in lifetime to not cross a
2386/// suspend point?
2388 // Seed the visited set with all the basic blocks containing a free
2389 // so that we won't pass them up.
2390 VisitedBlocksSet VisitedOrFreeBBs;
2391 for (auto *User : AI->users()) {
2392 if (auto FI = dyn_cast<CoroAllocaFreeInst>(User))
2393 VisitedOrFreeBBs.insert(FI->getParent());
2394 }
2395
2396 return !isSuspendReachableFrom(AI->getParent(), VisitedOrFreeBBs);
2397}
2398
2399/// After we split the coroutine, will the given basic block be along
2400/// an obvious exit path for the resumption function?
2402 unsigned depth = 3) {
2403 // If we've bottomed out our depth count, stop searching and assume
2404 // that the path might loop back.
2405 if (depth == 0) return false;
2406
2407 // If this is a suspend block, we're about to exit the resumption function.
2408 if (isSuspendBlock(BB)) return true;
2409
2410 // Recurse into the successors.
2411 for (auto *Succ : successors(BB)) {
2412 if (!willLeaveFunctionImmediatelyAfter(Succ, depth - 1))
2413 return false;
2414 }
2415
2416 // If none of the successors leads back in a loop, we're on an exit/abort.
2417 return true;
2418}
2419
2421 // Look for a free that isn't sufficiently obviously followed by
2422 // either a suspend or a termination, i.e. something that will leave
2423 // the coro resumption frame.
2424 for (auto *U : AI->users()) {
2425 auto FI = dyn_cast<CoroAllocaFreeInst>(U);
2426 if (!FI) continue;
2427
2428 if (!willLeaveFunctionImmediatelyAfter(FI->getParent()))
2429 return true;
2430 }
2431
2432 // If we never found one, we don't need a stack save.
2433 return false;
2434}
2435
2436/// Turn each of the given local allocas into a normal (dynamic) alloca
2437/// instruction.
2439 SmallVectorImpl<Instruction*> &DeadInsts) {
2440 for (auto *AI : LocalAllocas) {
2441 auto M = AI->getModule();
2442 IRBuilder<> Builder(AI);
2443
2444 // Save the stack depth. Try to avoid doing this if the stackrestore
2445 // is going to immediately precede a return or something.
2446 Value *StackSave = nullptr;
2448 StackSave = Builder.CreateCall(
2449 Intrinsic::getDeclaration(M, Intrinsic::stacksave));
2450
2451 // Allocate memory.
2452 auto Alloca = Builder.CreateAlloca(Builder.getInt8Ty(), AI->getSize());
2453 Alloca->setAlignment(AI->getAlignment());
2454
2455 for (auto *U : AI->users()) {
2456 // Replace gets with the allocation.
2457 if (isa<CoroAllocaGetInst>(U)) {
2458 U->replaceAllUsesWith(Alloca);
2459
2460 // Replace frees with stackrestores. This is safe because
2461 // alloca.alloc is required to obey a stack discipline, although we
2462 // don't enforce that structurally.
2463 } else {
2464 auto FI = cast<CoroAllocaFreeInst>(U);
2465 if (StackSave) {
2466 Builder.SetInsertPoint(FI);
2467 Builder.CreateCall(
2468 Intrinsic::getDeclaration(M, Intrinsic::stackrestore),
2469 StackSave);
2470 }
2471 }
2472 DeadInsts.push_back(cast<Instruction>(U));
2473 }
2474
2475 DeadInsts.push_back(AI);
2476 }
2477}
2478
2479/// Turn the given coro.alloca.alloc call into a dynamic allocation.
2480/// This happens during the all-instructions iteration, so it must not
2481/// delete the call.
2483 coro::Shape &Shape,
2484 SmallVectorImpl<Instruction*> &DeadInsts) {
2485 IRBuilder<> Builder(AI);
2486 auto Alloc = Shape.emitAlloc(Builder, AI->getSize(), nullptr);
2487
2488 for (User *U : AI->users()) {
2489 if (isa<CoroAllocaGetInst>(U)) {
2490 U->replaceAllUsesWith(Alloc);
2491 } else {
2492 auto FI = cast<CoroAllocaFreeInst>(U);
2493 Builder.SetInsertPoint(FI);
2494 Shape.emitDealloc(Builder, Alloc, nullptr);
2495 }
2496 DeadInsts.push_back(cast<Instruction>(U));
2497 }
2498
2499 // Push this on last so that it gets deleted after all the others.
2500 DeadInsts.push_back(AI);
2501
2502 // Return the new allocation value so that we can check for needed spills.
2503 return cast<Instruction>(Alloc);
2504}
2505
2506/// Get the current swifterror value.
2508 coro::Shape &Shape) {
2509 // Make a fake function pointer as a sort of intrinsic.
2510 auto FnTy = FunctionType::get(ValueTy, {}, false);
2511 auto Fn = ConstantPointerNull::get(FnTy->getPointerTo());
2512
2513 auto Call = Builder.CreateCall(FnTy, Fn, {});
2514 Shape.SwiftErrorOps.push_back(Call);
2515
2516 return Call;
2517}
2518
2519/// Set the given value as the current swifterror value.
2520///
2521/// Returns a slot that can be used as a swifterror slot.
2523 coro::Shape &Shape) {
2524 // Make a fake function pointer as a sort of intrinsic.
2525 auto FnTy = FunctionType::get(V->getType()->getPointerTo(),
2526 {V->getType()}, false);
2527 auto Fn = ConstantPointerNull::get(FnTy->getPointerTo());
2528
2529 auto Call = Builder.CreateCall(FnTy, Fn, { V });
2530 Shape.SwiftErrorOps.push_back(Call);
2531
2532 return Call;
2533}
2534
2535/// Set the swifterror value from the given alloca before a call,
2536/// then put in back in the alloca afterwards.
2537///
2538/// Returns an address that will stand in for the swifterror slot
2539/// until splitting.
2541 AllocaInst *Alloca,
2542 coro::Shape &Shape) {
2543 auto ValueTy = Alloca->getAllocatedType();
2544 IRBuilder<> Builder(Call);
2545
2546 // Load the current value from the alloca and set it as the
2547 // swifterror value.
2548 auto ValueBeforeCall = Builder.CreateLoad(ValueTy, Alloca);
2549 auto Addr = emitSetSwiftErrorValue(Builder, ValueBeforeCall, Shape);
2550
2551 // Move to after the call. Since swifterror only has a guaranteed
2552 // value on normal exits, we can ignore implicit and explicit unwind
2553 // edges.
2554 if (isa<CallInst>(Call)) {
2555 Builder.SetInsertPoint(Call->getNextNode());
2556 } else {
2557 auto Invoke = cast<InvokeInst>(Call);
2558 Builder.SetInsertPoint(Invoke->getNormalDest()->getFirstNonPHIOrDbg());
2559 }
2560
2561 // Get the current swifterror value and store it to the alloca.
2562 auto ValueAfterCall = emitGetSwiftErrorValue(Builder, ValueTy, Shape);
2563 Builder.CreateStore(ValueAfterCall, Alloca);
2564
2565 return Addr;
2566}
2567
2568/// Eliminate a formerly-swifterror alloca by inserting the get/set
2569/// intrinsics and attempting to MemToReg the alloca away.
2571 coro::Shape &Shape) {
2572 for (Use &Use : llvm::make_early_inc_range(Alloca->uses())) {
2573 // swifterror values can only be used in very specific ways.
2574 // We take advantage of that here.
2575 auto User = Use.getUser();
2576 if (isa<LoadInst>(User) || isa<StoreInst>(User))
2577 continue;
2578
2579 assert(isa<CallInst>(User) || isa<InvokeInst>(User));
2580 auto Call = cast<Instruction>(User);
2581
2582 auto Addr = emitSetAndGetSwiftErrorValueAround(Call, Alloca, Shape);
2583
2584 // Use the returned slot address as the call argument.
2585 Use.set(Addr);
2586 }
2587
2588 // All the uses should be loads and stores now.
2589 assert(isAllocaPromotable(Alloca));
2590}
2591
2592/// "Eliminate" a swifterror argument by reducing it to the alloca case
2593/// and then loading and storing in the prologue and epilog.
2594///
2595/// The argument keeps the swifterror flag.
2597 coro::Shape &Shape,
2598 SmallVectorImpl<AllocaInst*> &AllocasToPromote) {
2599 IRBuilder<> Builder(F.getEntryBlock().getFirstNonPHIOrDbg());
2600
2601 auto ArgTy = cast<PointerType>(Arg.getType());
2602 // swifterror arguments are required to have pointer-to-pointer type,
2603 // so create a pointer-typed alloca with opaque pointers.
2604 auto ValueTy = ArgTy->isOpaque() ? PointerType::getUnqual(F.getContext())
2605 : ArgTy->getNonOpaquePointerElementType();
2606
2607 // Reduce to the alloca case:
2608
2609 // Create an alloca and replace all uses of the arg with it.
2610 auto Alloca = Builder.CreateAlloca(ValueTy, ArgTy->getAddressSpace());
2611 Arg.replaceAllUsesWith(Alloca);
2612
2613 // Set an initial value in the alloca. swifterror is always null on entry.
2614 auto InitialValue = Constant::getNullValue(ValueTy);
2615 Builder.CreateStore(InitialValue, Alloca);
2616
2617 // Find all the suspends in the function and save and restore around them.
2618 for (auto *Suspend : Shape.CoroSuspends) {
2619 (void) emitSetAndGetSwiftErrorValueAround(Suspend, Alloca, Shape);
2620 }
2621
2622 // Find all the coro.ends in the function and restore the error value.
2623 for (auto *End : Shape.CoroEnds) {
2624 Builder.SetInsertPoint(End);
2625 auto FinalValue = Builder.CreateLoad(ValueTy, Alloca);
2626 (void) emitSetSwiftErrorValue(Builder, FinalValue, Shape);
2627 }
2628
2629 // Now we can use the alloca logic.
2630 AllocasToPromote.push_back(Alloca);
2631 eliminateSwiftErrorAlloca(F, Alloca, Shape);
2632}
2633
2634/// Eliminate all problematic uses of swifterror arguments and allocas
2635/// from the function. We'll fix them up later when splitting the function.
2637 SmallVector<AllocaInst*, 4> AllocasToPromote;
2638
2639 // Look for a swifterror argument.
2640 for (auto &Arg : F.args()) {
2641 if (!Arg.hasSwiftErrorAttr()) continue;
2642
2643 eliminateSwiftErrorArgument(F, Arg, Shape, AllocasToPromote);
2644 break;
2645 }
2646
2647 // Look for swifterror allocas.
2648 for (auto &Inst : F.getEntryBlock()) {
2649 auto Alloca = dyn_cast<AllocaInst>(&Inst);
2650 if (!Alloca || !Alloca->isSwiftError()) continue;
2651
2652 // Clear the swifterror flag.
2653 Alloca->setSwiftError(false);
2654
2655 AllocasToPromote.push_back(Alloca);
2656 eliminateSwiftErrorAlloca(F, Alloca, Shape);
2657 }
2658
2659 // If we have any allocas to promote, compute a dominator tree and
2660 // promote them en masse.
2661 if (!AllocasToPromote.empty()) {
2662 DominatorTree DT(F);
2663 PromoteMemToReg(AllocasToPromote, DT);
2664 }
2665}
2666
2667/// retcon and retcon.once conventions assume that all spill uses can be sunk
2668/// after the coro.begin intrinsic.
2670 const FrameDataInfo &FrameData,
2671 CoroBeginInst *CoroBegin) {
2672 DominatorTree Dom(F);
2673
2676
2677 // Collect all users that precede coro.begin.
2678 for (auto *Def : FrameData.getAllDefs()) {
2679 for (User *U : Def->users()) {
2680 auto Inst = cast<Instruction>(U);
2681 if (Inst->getParent() != CoroBegin->getParent() ||
2682 Dom.dominates(CoroBegin, Inst))
2683 continue;
2684 if (ToMove.insert(Inst))
2685 Worklist.push_back(Inst);
2686 }
2687 }
2688 // Recursively collect users before coro.begin.
2689 while (!Worklist.empty()) {
2690 auto *Def = Worklist.pop_back_val();
2691 for (User *U : Def->users()) {
2692 auto Inst = cast<Instruction>(U);
2693 if (Dom.dominates(CoroBegin, Inst))
2694 continue;
2695 if (ToMove.insert(Inst))
2696 Worklist.push_back(Inst);
2697 }
2698 }
2699
2700 // Sort by dominance.
2701 SmallVector<Instruction *, 64> InsertionList(ToMove.begin(), ToMove.end());
2702 llvm::sort(InsertionList, [&Dom](Instruction *A, Instruction *B) -> bool {
2703 // If a dominates b it should preceed (<) b.
2704 return Dom.dominates(A, B);
2705 });
2706
2707 Instruction *InsertPt = CoroBegin->getNextNode();
2708 for (Instruction *Inst : InsertionList)
2709 Inst->moveBefore(InsertPt);
2710}
2711
2712/// For each local variable that all of its user are only used inside one of
2713/// suspended region, we sink their lifetime.start markers to the place where
2714/// after the suspend block. Doing so minimizes the lifetime of each variable,
2715/// hence minimizing the amount of data we end up putting on the frame.
2717 SuspendCrossingInfo &Checker) {
2718 if (F.hasOptNone())
2719 return;
2720
2721 DominatorTree DT(F);
2722
2723 // Collect all possible basic blocks which may dominate all uses of allocas.
2725 DomSet.insert(&F.getEntryBlock());
2726 for (auto *CSI : Shape.CoroSuspends) {
2727 BasicBlock *SuspendBlock = CSI->getParent();
2728 assert(isSuspendBlock(SuspendBlock) && SuspendBlock->getSingleSuccessor() &&
2729 "should have split coro.suspend into its own block");
2730 DomSet.insert(SuspendBlock->getSingleSuccessor());
2731 }
2732
2733 for (Instruction &I : instructions(F)) {
2734 AllocaInst* AI = dyn_cast<AllocaInst>(&I);
2735 if (!AI)
2736 continue;
2737
2738 for (BasicBlock *DomBB : DomSet) {
2739 bool Valid = true;
2741
2742 auto isLifetimeStart = [](Instruction* I) {
2743 if (auto* II = dyn_cast<IntrinsicInst>(I))
2744 return II->getIntrinsicID() == Intrinsic::lifetime_start;
2745 return false;
2746 };
2747
2748 auto collectLifetimeStart = [&](Instruction *U, AllocaInst *AI) {
2749 if (isLifetimeStart(U)) {
2750 Lifetimes.push_back(U);
2751 return true;
2752 }
2753 if (!U->hasOneUse() || U->stripPointerCasts() != AI)
2754 return false;
2755 if (isLifetimeStart(U->user_back())) {
2756 Lifetimes.push_back(U->user_back());
2757 return true;
2758 }
2759 return false;
2760 };
2761
2762 for (User *U : AI->users()) {
2763 Instruction *UI = cast<Instruction>(U);
2764 // For all users except lifetime.start markers, if they are all
2765 // dominated by one of the basic blocks and do not cross
2766 // suspend points as well, then there is no need to spill the
2767 // instruction.
2768 if (!DT.dominates(DomBB, UI->getParent()) ||
2769 Checker.isDefinitionAcrossSuspend(DomBB, UI)) {
2770 // Skip lifetime.start, GEP and bitcast used by lifetime.start
2771 // markers.
2772 if (collectLifetimeStart(UI, AI))
2773 continue;
2774 Valid = false;
2775 break;
2776 }
2777 }
2778 // Sink lifetime.start markers to dominate block when they are
2779 // only used outside the region.
2780 if (Valid && Lifetimes.size() != 0) {
2781 // May be AI itself, when the type of AI is i8*
2782 auto *NewBitCast = [&](AllocaInst *AI) -> Value* {
2783 if (isa<AllocaInst>(Lifetimes[0]->getOperand(1)))
2784 return AI;
2785 auto *Int8PtrTy = Type::getInt8PtrTy(F.getContext());
2786 return CastInst::Create(Instruction::BitCast, AI, Int8PtrTy, "",
2787 DomBB->getTerminator());
2788 }(AI);
2789
2790 auto *NewLifetime = Lifetimes[0]->clone();
2791 NewLifetime->replaceUsesOfWith(NewLifetime->getOperand(1), NewBitCast);
2792 NewLifetime->insertBefore(DomBB->getTerminator());
2793
2794 // All the outsided lifetime.start markers are no longer necessary.
2795 for (Instruction *S : Lifetimes)
2796 S->eraseFromParent();
2797
2798 break;
2799 }
2800 }
2801 }
2802}
2803
2805 const SuspendCrossingInfo &Checker,
2807 const DominatorTree &DT) {
2808 if (Shape.CoroSuspends.empty())
2809 return;
2810
2811 // The PromiseAlloca will be specially handled since it needs to be in a
2812 // fixed position in the frame.
2813 if (AI == Shape.SwitchLowering.PromiseAlloca)
2814 return;
2815
2816 // The code that uses lifetime.start intrinsic does not work for functions
2817 // with loops without exit. Disable it on ABIs we know to generate such
2818 // code.
2819 bool ShouldUseLifetimeStartInfo =
2820 (Shape.ABI != coro::ABI::Async && Shape.ABI != coro::ABI::Retcon &&
2821 Shape.ABI != coro::ABI::RetconOnce);
2822 AllocaUseVisitor Visitor{AI->getModule()->getDataLayout(), DT,
2823 *Shape.CoroBegin, Checker,
2824 ShouldUseLifetimeStartInfo};
2825 Visitor.visitPtr(*AI);
2826 if (!Visitor.getShouldLiveOnFrame())
2827 return;
2828 Allocas.emplace_back(AI, Visitor.getAliasesCopy(),
2829 Visitor.getMayWriteBeforeCoroBegin());
2830}
2831
2834 DbgVariableIntrinsic *DVI, bool OptimizeFrame) {
2835 Function *F = DVI->getFunction();
2836 IRBuilder<> Builder(F->getContext());
2837 auto InsertPt = F->getEntryBlock().getFirstInsertionPt();
2838 while (isa<IntrinsicInst>(InsertPt))
2839 ++InsertPt;
2840 Builder.SetInsertPoint(&F->getEntryBlock(), InsertPt);
2841 DIExpression *Expr = DVI->getExpression();
2842 // Follow the pointer arithmetic all the way to the incoming
2843 // function argument and convert into a DIExpression.
2844 bool SkipOutermostLoad = !isa<DbgValueInst>(DVI);
2845 Value *Storage = DVI->getVariableLocationOp(0);
2846 Value *OriginalStorage = Storage;
2847
2848 while (auto *Inst = dyn_cast_or_null<Instruction>(Storage)) {
2849 if (auto *LdInst = dyn_cast<LoadInst>(Inst)) {
2850 Storage = LdInst->getPointerOperand();
2851 // FIXME: This is a heuristic that works around the fact that
2852 // LLVM IR debug intrinsics cannot yet distinguish between
2853 // memory and value locations: Because a dbg.declare(alloca) is
2854 // implicitly a memory location no DW_OP_deref operation for the
2855 // last direct load from an alloca is necessary. This condition
2856 // effectively drops the *last* DW_OP_deref in the expression.
2857 if (!SkipOutermostLoad)
2859 } else if (auto *StInst = dyn_cast<StoreInst>(Inst)) {
2860 Storage = StInst->getValueOperand();
2861 } else {
2863 SmallVector<Value *, 0> AdditionalValues;
2865 *Inst, Expr ? Expr->getNumLocationOperands() : 0, Ops,
2866 AdditionalValues);
2867 if (!Op || !AdditionalValues.empty()) {
2868 // If salvaging failed or salvaging produced more than one location
2869 // operand, give up.
2870 break;
2871 }
2872 Storage = Op;
2873 Expr = DIExpression::appendOpsToArg(Expr, Ops, 0, /*StackValue*/ false);
2874 }
2875 SkipOutermostLoad = false;
2876 }
2877 if (!Storage)
2878 return;
2879
2880 auto *StorageAsArg = dyn_cast<Argument>(Storage);
2881 const bool IsSwiftAsyncArg =
2882 StorageAsArg && StorageAsArg->hasAttribute(Attribute::SwiftAsync);
2883
2884 // Swift async arguments are described by an entry value of the ABI-defined
2885 // register containing the coroutine context.
2886 if (IsSwiftAsyncArg && !Expr->isEntryValue())
2888
2889 // If the coroutine frame is an Argument, store it in an alloca to improve
2890 // its availability (e.g. registers may be clobbered).
2891 // Avoid this if optimizations are enabled (they would remove the alloca) or
2892 // if the value is guaranteed to be available through other means (e.g. swift
2893 // ABI guarantees).
2894 if (StorageAsArg && !OptimizeFrame && !IsSwiftAsyncArg) {
2895 auto &Cached = ArgToAllocaMap[StorageAsArg];
2896 if (!Cached) {
2897 Cached = Builder.CreateAlloca(Storage->getType(), 0, nullptr,
2898 Storage->getName() + ".debug");
2899 Builder.CreateStore(Storage, Cached);
2900 }
2901 Storage = Cached;
2902 // FIXME: LLVM lacks nuanced semantics to differentiate between
2903 // memory and direct locations at the IR level. The backend will
2904 // turn a dbg.declare(alloca, ..., DIExpression()) into a memory
2905 // location. Thus, if there are deref and offset operations in the
2906 // expression, we need to add a DW_OP_deref at the *start* of the
2907 // expression to first load the contents of the alloca before
2908 // adjusting it with the expression.
2910 }
2911
2912 DVI->replaceVariableLocationOp(OriginalStorage, Storage);
2913 DVI->setExpression(Expr);
2914 // We only hoist dbg.declare today since it doesn't make sense to hoist
2915 // dbg.value since it does not have the same function wide guarantees that
2916 // dbg.declare does.
2917 if (isa<DbgDeclareInst>(DVI)) {
2918 Instruction *InsertPt = nullptr;
2919 if (auto *I = dyn_cast<Instruction>(Storage))
2920 InsertPt = I->getInsertionPointAfterDef();
2921 else if (isa<Argument>(Storage))
2922 InsertPt = &*F->getEntryBlock().begin();
2923 if (InsertPt)
2924 DVI->moveBefore(InsertPt);
2925 }
2926}
2927
2929 Function &F, SuspendCrossingInfo &Checker,
2930 const std::function<bool(Instruction &)> &MaterializableCallback) {
2931 if (F.hasOptNone())
2932 return;
2933
2934 SpillInfo Spills;
2935
2936 // See if there are materializable instructions across suspend points
2937 // We record these as the starting point to also identify materializable
2938 // defs of uses in these operations
2939 for (Instruction &I : instructions(F)) {
2940 if (!MaterializableCallback(I))
2941 continue;
2942 for (User *U : I.users())
2943 if (Checker.isDefinitionAcrossSuspend(I, U))
2944 Spills[&I].push_back(cast<Instruction>(U));
2945 }
2946
2947 // Process each of the identified rematerializable instructions
2948 // and add predecessor instructions that can also be rematerialized.
2949 // This is actually a graph of instructions since we could potentially
2950 // have multiple uses of a def in the set of predecessor instructions.
2951 // The approach here is to maintain a graph of instructions for each bottom
2952 // level instruction - where we have a unique set of instructions (nodes)
2953 // and edges between them. We then walk the graph in reverse post-dominator
2954 // order to insert them past the suspend point, but ensure that ordering is
2955 // correct. We also rely on CSE removing duplicate defs for remats of
2956 // different instructions with a def in common (rather than maintaining more
2957 // complex graphs for each suspend point)
2958
2959 // We can do this by adding new nodes to the list for each suspend
2960 // point. Then using standard GraphTraits to give a reverse post-order
2961 // traversal when we insert the nodes after the suspend
2963 for (auto &E : Spills) {
2964 for (Instruction *U : E.second) {
2965 // Don't process a user twice (this can happen if the instruction uses
2966 // more than one rematerializable def)
2967 if (AllRemats.count(U))
2968 continue;
2969
2970 // Constructor creates the whole RematGraph for the given Use
2971 auto RematUPtr =
2972 std::make_unique<RematGraph>(MaterializableCallback, U, Checker);
2973
2974 LLVM_DEBUG(dbgs() << "***** Next remat group *****\n";
2975 ReversePostOrderTraversal<RematGraph *> RPOT(RematUPtr.get());
2976 for (auto I = RPOT.begin(); I != RPOT.end();
2977 ++I) { (*I)->Node->dump(); } dbgs()
2978 << "\n";);
2979
2980 AllRemats[U] = std::move(RematUPtr);
2981 }
2982 }
2983
2984 // Rewrite materializable instructions to be materialized at the use
2985 // point.
2986 LLVM_DEBUG(dumpRemats("Materializations", AllRemats));
2988}
2989
2991 Function &F, Shape &Shape,
2992 const std::function<bool(Instruction &)> &MaterializableCallback) {
2993 // Don't eliminate swifterror in async functions that won't be split.
2994 if (Shape.ABI != coro::ABI::Async || !Shape.CoroSuspends.empty())
2996
2997 if (Shape.ABI == coro::ABI::Switch &&
3000 }
3001
3002 // Make sure that all coro.save, coro.suspend and the fallthrough coro.end
3003 // intrinsics are in their own blocks to simplify the logic of building up
3004 // SuspendCrossing data.
3005 for (auto *CSI : Shape.CoroSuspends) {
3006 if (auto *Save = CSI->getCoroSave())
3007 splitAround(Save, "CoroSave");
3008 splitAround(CSI, "CoroSuspend");
3009 }
3010
3011 // Put CoroEnds into their own blocks.
3012 for (AnyCoroEndInst *CE : Shape.CoroEnds) {
3013 splitAround(CE, "CoroEnd");
3014
3015 // Emit the musttail call function in a new block before the CoroEnd.
3016 // We do this here so that the right suspend crossing info is computed for
3017 // the uses of the musttail call function call. (Arguments to the coro.end
3018 // instructions would be ignored)
3019 if (auto *AsyncEnd = dyn_cast<CoroAsyncEndInst>(CE)) {
3020 auto *MustTailCallFn = AsyncEnd->getMustTailCallFunction();
3021 if (!MustTailCallFn)
3022 continue;
3023 IRBuilder<> Builder(AsyncEnd);
3024 SmallVector<Value *, 8> Args(AsyncEnd->args());
3025 auto Arguments = ArrayRef<Value *>(Args).drop_front(3);
3026 auto *Call = createMustTailCall(AsyncEnd->getDebugLoc(), MustTailCallFn,
3028 splitAround(Call, "MustTailCall.Before.CoroEnd");
3029 }
3030 }
3031
3032 // Later code makes structural assumptions about single predecessors phis e.g
3033 // that they are not live across a suspend point.
3035
3036 // Transforms multi-edge PHI Nodes, so that any value feeding into a PHI will
3037 // never has its definition separated from the PHI by the suspend point.
3038 rewritePHIs(F);
3039
3040 // Build suspend crossing info.
3041 SuspendCrossingInfo Checker(F, Shape);
3042
3043 doRematerializations(F, Checker, MaterializableCallback);
3044
3045 FrameDataInfo FrameData;
3047 SmallVector<Instruction*, 4> DeadInstructions;
3048 if (Shape.ABI != coro::ABI::Async && Shape.ABI != coro::ABI::Retcon &&
3049 Shape.ABI != coro::ABI::RetconOnce)
3050 sinkLifetimeStartMarkers(F, Shape, Checker);
3051
3052 // Collect the spills for arguments and other not-materializable values.
3053 for (Argument &A : F.args())
3054 for (User *U : A.users())
3055 if (Checker.isDefinitionAcrossSuspend(A, U))
3056 FrameData.Spills[&A].push_back(cast<Instruction>(U));
3057
3058 const DominatorTree DT(F);
3059 for (Instruction &I : instructions(F)) {
3060 // Values returned from coroutine structure intrinsics should not be part
3061 // of the Coroutine Frame.
3063 continue;
3064
3065 // Handle alloca.alloc specially here.
3066 if (auto AI = dyn_cast<CoroAllocaAllocInst>(&I)) {
3067 // Check whether the alloca's lifetime is bounded by suspend points.
3068 if (isLocalAlloca(AI)) {
3069 LocalAllocas.push_back(AI);
3070 continue;
3071 }
3072
3073 // If not, do a quick rewrite of the alloca and then add spills of
3074 // the rewritten value. The rewrite doesn't invalidate anything in
3075 // Spills because the other alloca intrinsics have no other operands
3076 // besides AI, and it doesn't invalidate the iteration because we delay
3077 // erasing AI.
3078 auto Alloc = lowerNonLocalAlloca(AI, Shape, DeadInstructions);
3079
3080 for (User *U : Alloc->users()) {
3081 if (Checker.isDefinitionAcrossSuspend(*Alloc, U))
3082 FrameData.Spills[Alloc].push_back(cast<Instruction>(U));
3083 }
3084 continue;
3085 }
3086
3087 // Ignore alloca.get; we process this as part of coro.alloca.alloc.
3088 if (isa<CoroAllocaGetInst>(I))
3089 continue;
3090
3091 if (auto *AI = dyn_cast<AllocaInst>(&I)) {
3092 collectFrameAlloca(AI, Shape, Checker, FrameData.Allocas, DT);
3093 continue;
3094 }
3095
3096 for (User *U : I.users())
3097 if (Checker.isDefinitionAcrossSuspend(I, U)) {
3098 // We cannot spill a token.
3099 if (I.getType()->isTokenTy())
3101 "token definition is separated from the use by a suspend point");
3102 FrameData.Spills[&I].push_back(cast<Instruction>(U));
3103 }
3104 }
3105
3106 LLVM_DEBUG(dumpAllocas(FrameData.Allocas));
3107
3108 // We don't want the layout of coroutine frame to be affected
3109 // by debug information. So we only choose to salvage DbgValueInst for
3110 // whose value is already in the frame.
3111 // We would handle the dbg.values for allocas specially
3112 for (auto &Iter : FrameData.Spills) {
3113 auto *V = Iter.first;
3115 findDbgValues(DVIs, V);
3116 for (DbgValueInst *DVI : DVIs)
3117 if (Checker.isDefinitionAcrossSuspend(*V, DVI))
3118 FrameData.Spills[V].push_back(DVI);
3119 }
3120
3121 LLVM_DEBUG(dumpSpills("Spills", FrameData.Spills));
3122 if (Shape.ABI == coro::ABI::Retcon || Shape.ABI == coro::ABI::RetconOnce ||
3123 Shape.ABI == coro::ABI::Async)
3125 Shape.FrameTy = buildFrameType(F, Shape, FrameData);
3127 // For now, this works for C++ programs only.
3128 buildFrameDebugInfo(F, Shape, FrameData);
3129 insertSpills(FrameData, Shape);
3130 lowerLocalAllocas(LocalAllocas, DeadInstructions);
3131
3132 for (auto *I : DeadInstructions)
3133 I->eraseFromParent();
3134}
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
AMDGPU Lower Kernel Arguments
assume Assume Builder
This file implements the BitVector class.
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:492
static void cleanupSinglePredPHIs(Function &F)
Definition: CoroFrame.cpp:2143
static bool isSuspendReachableFrom(BasicBlock *From, VisitedBlocksSet &VisitedOrFreeBBs)
Does control flow starting at the given block ever reach a suspend instruction before reaching a bloc...
Definition: CoroFrame.cpp:2364
static bool isCoroutineStructureIntrinsic(Instruction &I)
Definition: CoroFrame.cpp:2252
SmallPtrSet< BasicBlock *, 8 > VisitedBlocksSet
Definition: CoroFrame.cpp:2360
static void createFramePtr(coro::Shape &Shape)
Definition: CoroFrame.cpp:1693
static Instruction * lowerNonLocalAlloca(CoroAllocaAllocInst *AI, coro::Shape &Shape, SmallVectorImpl< Instruction * > &DeadInsts)
Turn the given coro.alloca.alloc call into a dynamic allocation.
Definition: CoroFrame.cpp:2482
static Instruction * splitBeforeCatchSwitch(CatchSwitchInst *CatchSwitch)
Definition: CoroFrame.cpp:1681
static void eliminateSwiftError(Function &F, coro::Shape &Shape)
Eliminate all problematic uses of swifterror arguments and allocas from the function.
Definition: CoroFrame.cpp:2636
static void lowerLocalAllocas(ArrayRef< CoroAllocaAllocInst * > LocalAllocas, SmallVectorImpl< Instruction * > &DeadInsts)
Turn each of the given local allocas into a normal (dynamic) alloca instruction.
Definition: CoroFrame.cpp:2438
static bool isLocalAlloca(CoroAllocaAllocInst *AI)
Is the given alloca "local", i.e.
Definition: CoroFrame.cpp:2387
static Value * emitSetSwiftErrorValue(IRBuilder<> &Builder, Value *V, coro::Shape &Shape)
Set the given value as the current swifterror value.
Definition: CoroFrame.cpp:2522
static Value * emitSetAndGetSwiftErrorValueAround(Instruction *Call, AllocaInst *Alloca, coro::Shape &Shape)
Set the swifterror value from the given alloca before a call, then put in back in the alloca afterwar...
Definition: CoroFrame.cpp:2540
static void cacheDIVar(FrameDataInfo &FrameData, DenseMap< Value *, DILocalVariable * > &DIVarCache)
Definition: CoroFrame.cpp:958
static void collectFrameAlloca(AllocaInst *AI, coro::Shape &Shape, const SuspendCrossingInfo &Checker, SmallVectorImpl< AllocaInfo > &Allocas, const DominatorTree &DT)
Definition: CoroFrame.cpp:2804
static bool localAllocaNeedsStackSave(CoroAllocaAllocInst *AI)
Definition: CoroFrame.cpp:2420
static void splitAround(Instruction *I, const Twine &Name)
Definition: CoroFrame.cpp:2351
static void eliminateSwiftErrorAlloca(Function &F, AllocaInst *Alloca, coro::Shape &Shape)
Eliminate a formerly-swifterror alloca by inserting the get/set intrinsics and attempting to MemToReg...
Definition: CoroFrame.cpp:2570
static void rewritePHIs(BasicBlock &BB)
Definition: CoroFrame.cpp:2160
static void movePHIValuesToInsertedBlock(BasicBlock *SuccBB, BasicBlock *InsertedBB, BasicBlock *PredBB, PHINode *UntilPHI=nullptr)
Definition: CoroFrame.cpp:2050
static DIType * solveDIType(DIBuilder &Builder, Type *Ty, const DataLayout &Layout, DIScope *Scope, unsigned LineNum, DenseMap< Type *, DIType * > &DITypeCache)
Definition: CoroFrame.cpp:1023
static bool willLeaveFunctionImmediatelyAfter(BasicBlock *BB, unsigned depth=3)
After we split the coroutine, will the given basic block be along an obvious exit path for the resump...
Definition: CoroFrame.cpp:2401
static void eliminateSwiftErrorArgument(Function &F, Argument &Arg, coro::Shape &Shape, SmallVectorImpl< AllocaInst * > &AllocasToPromote)
"Eliminate" a swifterror argument by reducing it to the alloca case and then loading and storing in t...
Definition: CoroFrame.cpp:2596
static void buildFrameDebugInfo(Function &F, coro::Shape &Shape, FrameDataInfo &FrameData)
Build artificial debug info for C++ coroutine frames to allow users to inspect the contents of the fr...
Definition: CoroFrame.cpp:1110
static StructType * buildFrameType(Function &F, coro::Shape &Shape, FrameDataInfo &FrameData)
Definition: CoroFrame.cpp:1283
static BasicBlock * splitBlockIfNotFirst(Instruction *I, const Twine &Name)
Definition: CoroFrame.cpp:2338
static void sinkSpillUsesAfterCoroBegin(Function &F, const FrameDataInfo &FrameData, CoroBeginInst *CoroBegin)
retcon and retcon.once conventions assume that all spill uses can be sunk after the coro....
Definition: CoroFrame.cpp:2669
static void sinkLifetimeStartMarkers(Function &F, coro::Shape &Shape, SuspendCrossingInfo &Checker)
For each local variable that all of its user are only used inside one of suspended region,...
Definition: CoroFrame.cpp:2716
static bool isSuspendBlock(BasicBlock *BB)
Definition: CoroFrame.cpp:2356
static void rewritePHIsForCleanupPad(BasicBlock *CleanupPadBB, CleanupPadInst *CleanupPad)
Definition: CoroFrame.cpp:2068
static void rewriteMaterializableInstructions(const SmallMapVector< Instruction *, std::unique_ptr< RematGraph >, 8 > &AllRemats)
Definition: CoroFrame.cpp:2260
static void dumpAllocas(const SmallVectorImpl< AllocaInfo > &Allocas)
Definition: CoroFrame.cpp:567
@ SmallVectorThreshold
Definition: CoroFrame.cpp:50
static StringRef solveTypeName(Type *Ty)
Create name for Type.
Definition: CoroFrame.cpp:975
static void dumpSpills(StringRef Title, const SpillInfo &Spills)
Definition: CoroFrame.cpp:548
static void doRematerializations(Function &F, SuspendCrossingInfo &Checker, const std::function< bool(Instruction &)> &MaterializableCallback)
Definition: CoroFrame.cpp:2928
static Value * emitGetSwiftErrorValue(IRBuilder<> &Builder, Type *ValueTy, coro::Shape &Shape)
Get the current swifterror value.
Definition: CoroFrame.cpp:2507
static void dumpRemats(StringRef Title, const SmallMapVector< Instruction *, std::unique_ptr< RematGraph >, 8 > &RM)
Definition: CoroFrame.cpp:557
static void insertSpills(const FrameDataInfo &FrameData, coro::Shape &Shape)
Definition: CoroFrame.cpp:1724
Given that RA is a live value
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
std::string Name
uint64_t Size
bool End
Definition: ELF_riscv.cpp:464
static bool isLifetimeStart(const Instruction *Inst)
Definition: GVN.cpp:1047
Hexagon Common GEP
static MaybeAlign getAlign(Value *Ptr)
Definition: IRBuilder.cpp:537
IRTranslator LLVM IR MI
This file provides various utilities for inspecting and working with the control flow graph in LLVM I...
iv Induction Variable Users
Definition: IVUsers.cpp:48
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
mir Rename Register Operands
print must be executed print the must be executed context for all instructions
LLVMContext & Context
This file provides an interface for laying out a sequence of fields as a struct in a way that attempt...
#define P(N)
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
This file provides a collection of visitors which walk the (instruction) uses of a pointer.
@ SI
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file defines the SmallString class.
static const unsigned FramePtr
This class represents a conversion between pointers from one address space to another.
an instruction to allocate memory on the stack
Definition: Instructions.h:58
bool isSwiftError() const
Return true if this alloca is used as a swifterror argument to a call.
Definition: Instructions.h:150
void setSwiftError(bool V)
Specify whether this alloca is used to represent a swifterror.
Definition: Instructions.h:152
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:125
PointerType * getType() const
Overload to return most specific pointer type.
Definition: Instructions.h:100
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
Definition: Instructions.h:118
bool isArrayAllocation() const
Return true if there is an allocation size parameter to the allocation instruction that is not 1.
void setAlignment(Align Align)
Definition: Instructions.h:129
const Value * getArraySize() const
Get the number of elements allocated.
Definition: Instructions.h:96
This class represents an incoming formal argument to a Function.
Definition: Argument.h:28
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
Definition: ArrayRef.h:202
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
iterator_range< const_phi_iterator > phis() const
Returns a range that iterates over the phis in the basic block.
Definition: BasicBlock.h:381
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:254
const Instruction * getFirstNonPHI() const
Returns a pointer to the first instruction in this block that is not a PHINode instruction.
Definition: BasicBlock.cpp:217
const Instruction & front() const
Definition: BasicBlock.h:335
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:105
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
Definition: BasicBlock.cpp:410
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:293
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
Definition: BasicBlock.cpp:323
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:112
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:35
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:127
This class represents a no-op cast from one type to another.
size_type size() const
size - Returns the number of bits in this bitvector.
Definition: BitVector.h:159
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1190
bool doesNotCapture(unsigned OpNo) const
Determine whether this data operand is not captured.
Definition: InstrTypes.h:1694
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1357
unsigned arg_size() const
Definition: InstrTypes.h:1355
static CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", Instruction *InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
Value * getParentPad() const
static CleanupPadInst * Create(Value *ParentPad, ArrayRef< Value * > Args=std::nullopt, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
static CleanupReturnInst * Create(Value *CleanupPad, BasicBlock *UnwindBB=nullptr, Instruction *InsertBefore=nullptr)
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:888
static ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
Definition: Constants.cpp:1691
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:356
This represents the llvm.coro.alloca.alloc instruction.
Definition: CoroInstr.h:670
Value * getSize() const
Definition: CoroInstr.h:673
This class represents the llvm.coro.begin instruction.
Definition: CoroInstr.h:420
void clearPromise()
Definition: CoroInstr.h:124
This represents the llvm.coro.suspend instruction.
Definition: CoroInstr.h:493
DICompositeType * createStructType(DIScope *Scope, StringRef Name, DIFile *File, unsigned LineNumber, uint64_t SizeInBits, uint32_t AlignInBits, DINode::DIFlags Flags, DIType *DerivedFrom, DINodeArray Elements, unsigned RunTimeLang=0, DIType *VTableHolder=nullptr, StringRef UniqueIdentifier="")
Create debugging information entry for a struct.
Definition: DIBuilder.cpp:503
DIDerivedType * createPointerType(DIType *PointeeTy, uint64_t SizeInBits, uint32_t AlignInBits=0, std::optional< unsigned > DWARFAddressSpace=std::nullopt, StringRef Name="", DINodeArray Annotations=nullptr)
Create debugging information entry for a pointer.
Definition: DIBuilder.cpp:313
DIExpression * createExpression(ArrayRef< uint64_t > Addr=std::nullopt)
Create a new descriptor for the specified variable which has a complex address expression for its add...
Definition: DIBuilder.cpp:831
DIBasicType * createBasicType(StringRef Name, uint64_t SizeInBits, unsigned Encoding, DINode::DIFlags Flags=DINode::FlagZero)
Create debugging information entry for a basic type.
Definition: DIBuilder.cpp:277
DINodeArray getOrCreateArray(ArrayRef< Metadata * > Elements)
Get a DINodeArray, create one if required.
Definition: DIBuilder.cpp:674
DIDerivedType * createMemberType(DIScope *Scope, StringRef Name, DIFile *File, unsigned LineNo, uint64_t SizeInBits, uint32_t AlignInBits, uint64_t OffsetInBits, DINode::DIFlags Flags, DIType *Ty, DINodeArray Annotations=nullptr)
Create debugging information entry for a member.
Definition: DIBuilder.cpp:374
DILocalVariable * createAutoVariable(DIScope *Scope, StringRef Name, DIFile *File, unsigned LineNo, DIType *Ty, bool AlwaysPreserve=false, DINode::DIFlags Flags=DINode::FlagZero, uint32_t AlignInBits=0)
Create a new descriptor for an auto variable.
Definition: DIBuilder.cpp:793
void replaceArrays(DICompositeType *&T, DINodeArray Elements, DINodeArray TParams=DINodeArray())
Replace arrays on a composite type.
Definition: DIBuilder.cpp:1135
DWARF expression.
bool isEntryValue() const
Check if the expression consists of exactly one entry value operand.
static DIExpression * appendOpsToArg(const DIExpression *Expr, ArrayRef< uint64_t > Ops, unsigned ArgNo, bool StackValue=false)
Create a copy of Expr by appending the given list of Ops to each instance of the operand DW_OP_LLVM_a...
uint64_t getNumLocationOperands() const
Return the number of unique location operands referred to (via DW_OP_LLVM_arg) in this expression; th...
static DIExpression * prepend(const DIExpression *Expr, uint8_t Flags, int64_t Offset=0)
Prepend DIExpr with a deref and offset operation and optionally turn it into a stack value or/and an ...
A scope for locals.
DILocalScope * getScope() const
Get the local scope for this variable.
Debug location.
Base class for scope-like contexts.
DIFile * getFile() const
Subprogram description.
Base class for types.
StringRef getName() const
uint64_t getSizeInBits() const
uint32_t getAlignInBits() const
unsigned getLine() const
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
const StructLayout * getStructLayout(StructType *Ty) const
Returns a StructLayout object, indicating the alignment of the struct, its size, and the offsets of i...
Definition: DataLayout.cpp:718
Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:868
TypeSize getTypeSizeInBits(Type *Ty) const
Size examples:
Definition: DataLayout.h:673
Align getPrefTypeAlign(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:877
This represents the llvm.dbg.declare instruction.
This represents the llvm.dbg.value instruction.
This is the common base class for debug info intrinsics for variables.
void replaceVariableLocationOp(Value *OldValue, Value *NewValue)
Value * getVariableLocationOp(unsigned OpIdx) const
void setExpression(DIExpression *NewExpr)
DILocalVariable * getVariable() const
DIExpression * getExpression() const
DILocation * get() const
Get the underlying DILocation.
Definition: DebugLoc.cpp:20
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:202
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:145
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
void reserve(size_type NumEntries)
Grow the densemap so that it can contain at least NumEntries items before resizing again.
Definition: DenseMap.h:103
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:940
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:652
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2564
void visitIntrinsicInst(IntrinsicInst &I)
Definition: InstVisitor.h:219
void visitBitCastInst(BitCastInst &I)
Definition: InstVisitor.h:187
void visit(Iterator Start, Iterator End)
Definition: InstVisitor.h:87
void visitPHINode(PHINode &I)
Definition: InstVisitor.h:175
void visitAddrSpaceCastInst(AddrSpaceCastInst &I)
Definition: InstVisitor.h:188
void visitSelectInst(SelectInst &I)
Definition: InstVisitor.h:189
void visitGetElementPtrInst(GetElementPtrInst &I)
Definition: InstVisitor.h:174
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
Definition: Instruction.cpp:78
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
Definition: Instruction.cpp:88
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:365
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:70
const BasicBlock * getParent() const
Definition: Instruction.h:90
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:74
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:82
void insertAfter(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately after the specified instruction.
Definition: Instruction.cpp:94
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:339
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:47
Intrinsic::ID getIntrinsicID() const
Return the intrinsic ID of this intrinsic.
Definition: IntrinsicInst.h:54
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
The landingpad instruction holds all of the information necessary to generate correct exception handl...
static MDString * get(LLVMContext &Context, StringRef Str)
Definition: Metadata.cpp:499
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1373
size_type count(const KeyT &Key) const
Definition: MapVector.h:145
This is the common base class for memset/memcpy/memmove.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:398
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", Instruction *InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
A base class for visitors over the uses of a pointer value.
void visitCallBase(CallBase &CB)
void visitGetElementPtrInst(GetElementPtrInst &GEPI)
void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC)
void visitBitCastInst(BitCastInst &BC)
void visitStoreInst(StoreInst &SI)
void visitIntrinsicInst(IntrinsicInst &II)
void visitMemIntrinsic(MemIntrinsic &I)
This class represents the LLVM 'select' instruction.
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:152
iterator begin()
Get an iterator to the beginning of the SetVector.
Definition: SetVector.h:93
iterator end()
Get an iterator to the end of the SetVector.
Definition: SetVector.h:103
iterator find(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:386
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:312
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
StringRef str() const
Explicit conversion to StringRef.
Definition: SmallString.h:261
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:941
void reserve(size_type N)
Definition: SmallVector.h:667
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
Compute live ranges of allocas.
Definition: StackLifetime.h:37
An instruction for storing to memory.
Definition: Instructions.h:301
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
std::string str() const
str - Get the contents as an std::string.
Definition: StringRef.h:222
TypeSize getElementOffsetInBits(unsigned Idx) const
Definition: DataLayout.h:657
Class to represent struct types.
Definition: DerivedTypes.h:213
void setBody(ArrayRef< Type * > Elements, bool isPacked=false)
Specify a body for an opaque identified type.
Definition: Type.cpp:506
static StructType * create(LLVMContext &Context, StringRef Name)
This creates an identified struct.
Definition: Type.cpp:574
unsigned getNumElements() const
Random access to the elements.
Definition: DerivedTypes.h:338
Type * getElementType(unsigned N) const
Definition: DerivedTypes.h:339
Multiway switch.
void setDefaultDest(BasicBlock *DefaultCase)
TinyPtrVector - This class is specialized for cases where there are normally 0 or 1 element in a vect...
Definition: TinyPtrVector.h:29
EltTy front() const
bool empty() const
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
std::string str() const
Return the twine contents as a std::string.
Definition: Twine.cpp:17
StringRef toStringRef(SmallVectorImpl< char > &Out) const
This returns the twine as a single StringRef if it can be represented as such.
Definition: Twine.h:473
static constexpr TypeSize Fixed(ScalarTy ExactSize)
Definition: TypeSize.h:331
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:256
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:154
Type * getNonOpaquePointerElementType() const
Only use this method in code that is not reachable with opaque pointers, or part of deprecated method...
Definition: Type.h:423
StringRef getStructName() const
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static Type * getVoidTy(LLVMContext &C)
bool isStructTy() const
True if this is an instance of StructType.
Definition: Type.h:250
bool isSized(SmallPtrSetImpl< Type * > *Visited=nullptr) const
Return true if it makes sense to take the size of this type.
Definition: Type.h:302
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:129
static IntegerType * getInt8Ty(LLVMContext &C)
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:157
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:185
static IntegerType * getInt32Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:229
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
void set(Value *Val)
Definition: Value.h:870
User * getUser() const
Returns the User that contains this Use.
Definition: Use.h:72
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void setName(const Twine &Name)
Change the name of the value.
Definition: Value.cpp:378
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:535
iterator_range< user_iterator > users()
Definition: Value.h:421
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1069
iterator_range< use_iterator > uses()
Definition: Value.h:376
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:384
APInt Offset
The constant offset of the use if that is known.
void enqueueUsers(Instruction &I)
Enqueue the users of this instruction in the visit worklist.
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:182
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:289
A range adaptor for a pair of iterators.
A raw_ostream that writes to an SmallVector or SmallString.
Definition: raw_ostream.h:672
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1465
@ CE
Windows NT (Windows on ARM)
void salvageDebugInfo(SmallDenseMap< Argument *, AllocaInst *, 4 > &ArgToAllocaMap, DbgVariableIntrinsic *DVI, bool OptimizeFrame)
Attempts to rewrite the location operand of debug intrinsics in terms of the coroutine frame pointer,...
Definition: CoroFrame.cpp:2832
void buildCoroutineFrame(Function &F, Shape &Shape, const std::function< bool(Instruction &)> &MaterializableCallback)
Definition: CoroFrame.cpp:2990
bool defaultMaterializable(Instruction &V)
Default materializable callback.
Definition: CoroFrame.cpp:2245
CallInst * createMustTailCall(DebugLoc Loc, Function *MustTailCallFn, ArrayRef< Value * > Arguments, IRBuilder<> &)
Definition: CoroSplit.cpp:1683
SourceLanguage
Definition: Dwarf.h:199
bool isCPlusPlus(SourceLanguage S)
Definition: Dwarf.h:207
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
@ Offset
Definition: DWP.cpp:406
TinyPtrVector< DbgDeclareInst * > FindDbgDeclareUses(Value *V)
Finds dbg.declare intrinsics declaring local variables as living in the memory that 'V' points to.
Definition: DebugInfo.cpp:46
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1819
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1777
void PromoteMemToReg(ArrayRef< AllocaInst * > Allocas, DominatorTree &DT, AssumptionCache *AC=nullptr)
Promote the specified list of alloca instructions into scalar registers, inserting PHI nodes as appro...
detail::scope_exit< std::decay_t< Callable > > make_scope_exit(Callable &&F)
Definition: ScopeExit.h:59
unsigned Log2_64_Ceil(uint64_t Value)
Return the ceil log base 2 of the specified value, 64 if the value is zero.
Definition: MathExtras.h:401
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:145
auto successors(const MachineBasicBlock *BB)
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:748
bool isAllocaPromotable(const AllocaInst *AI)
Return true if this alloca is legal for promotion.
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1826
void findDbgUsers(SmallVectorImpl< DbgVariableIntrinsic * > &DbgInsts, Value *V)
Finds the debug info intrinsics describing a value.
Definition: DebugInfo.cpp:103
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1744
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1833
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:145
BasicBlock * ehAwareSplitEdge(BasicBlock *BB, BasicBlock *Succ, LandingPadInst *OriginalPad=nullptr, PHINode *LandingPadReplacement=nullptr, const CriticalEdgeSplittingOptions &Options=CriticalEdgeSplittingOptions(), const Twine &BBName="")
Split the edge connect the specficed blocks in the case that Succ is an Exception Handling Block.
void findDbgValues(SmallVectorImpl< DbgValueInst * > &DbgValues, Value *V)
Finds the llvm.dbg.value intrinsics describing a value.
Definition: DebugInfo.cpp:99
Value * salvageDebugInfoImpl(Instruction &I, uint64_t CurrentLocOps, SmallVectorImpl< uint64_t > &Ops, SmallVectorImpl< Value * > &AdditionalValues)
Definition: Local.cpp:2083
uint64_t offsetToAlignment(uint64_t Value, Align Alignment)
Returns the offset to the next integer (mod 2**64) that is greater than or equal to Value and is a mu...
Definition: Alignment.h:197
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:2038
std::pair< uint64_t, Align > performOptimizedStructLayout(MutableArrayRef< OptimizedStructLayoutField > Fields)
Compute a layout for a struct containing the given fields, making a best-effort attempt to minimize t...
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:184
void updatePhiNodes(BasicBlock *DestBB, BasicBlock *OldPred, BasicBlock *NewPred, PHINode *Until=nullptr)
Replaces all uses of OldPred with the NewPred block in all PHINodes in a block.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1846
auto predecessors(const MachineBasicBlock *BB)
BasicBlock * SplitEdge(BasicBlock *From, BasicBlock *To, DominatorTree *DT=nullptr, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the edge connecting the specified blocks, and return the newly created basic block between From...
void setUnwindEdgeTo(Instruction *TI, BasicBlock *Succ)
Sets the unwind edge of an instruction to a particular successor.
unsigned pred_size(const MachineBasicBlock *BB)
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
RematGraph::RematNode * NodeRef
Definition: CoroFrame.cpp:434
static ChildIteratorType child_end(NodeRef N)
Definition: CoroFrame.cpp:441
RematGraph::RematNode ** ChildIteratorType
Definition: CoroFrame.cpp:435
static NodeRef getEntryNode(RematGraph *G)
Definition: CoroFrame.cpp:437
static ChildIteratorType child_begin(NodeRef N)
Definition: CoroFrame.cpp:438
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
Align Alignment
The required alignment of this field.
uint64_t Offset
The offset of this field in the final layout.
uint64_t Size
The required size of this field in bytes.
static constexpr uint64_t FlexibleOffset
A special value for Offset indicating that the field can be moved anywhere.
A MapVector that performs no allocations if smaller than a certain size.
Definition: MapVector.h:234
AsyncLoweringStorage AsyncLowering
Definition: CoroInternal.h:146
StructType * FrameTy
Definition: CoroInternal.h:101
AnyCoroIdRetconInst * getRetconCoroId() const
Definition: CoroInternal.h:154
CoroIdInst * getSwitchCoroId() const
Definition: CoroInternal.h:149
Instruction * getInsertPtAfterFramePtr() const
Definition: CoroInternal.h:243
SmallVector< AnyCoroSuspendInst *, 4 > CoroSuspends
Definition: CoroInternal.h:81
Value * emitAlloc(IRBuilder<> &Builder, Value *Size, CallGraph *CG) const
Allocate memory according to the rules of the active lowering.
Definition: Coroutines.cpp:453
SmallVector< CallInst *, 2 > SwiftErrorOps
Definition: CoroInternal.h:82
AllocaInst * getPromiseAlloca() const
Definition: CoroInternal.h:237
bool OptimizeFrame
This would only be true if optimization are enabled.
Definition: CoroInternal.h:108
SwitchLoweringStorage SwitchLowering
Definition: CoroInternal.h:144
CoroBeginInst * CoroBegin
Definition: CoroInternal.h:77
void emitDealloc(IRBuilder<> &Builder, Value *Ptr, CallGraph *CG) const
Deallocate memory according to the rules of the active lowering.
Definition: Coroutines.cpp:476
RetconLoweringStorage RetconLowering
Definition: CoroInternal.h:145
SmallVector< AnyCoroEndInst *, 4 > CoroEnds
Definition: CoroInternal.h:78
BasicBlock * AllocaSpillBlock
Definition: CoroInternal.h:105