LLVM 23.0.0git
StackColoring.cpp
Go to the documentation of this file.
1//===- StackColoring.cpp --------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass implements the stack-coloring optimization that looks for
10// lifetime markers machine instructions (LIFETIME_START and LIFETIME_END),
11// which represent the possible lifetime of stack slots. It attempts to
12// merge disjoint stack slots and reduce the used stack space.
13// NOTE: This pass is not StackSlotColoring, which optimizes spill slots.
14//
15// TODO: In the future we plan to improve stack coloring in the following ways:
16// 1. Allow merging multiple small slots into a single larger slot at different
17// offsets.
18// 2. Merge this pass with StackSlotColoring and allow merging of allocas with
19// spill slots.
20//
21//===----------------------------------------------------------------------===//
22
24#include "llvm/ADT/BitVector.h"
25#include "llvm/ADT/DenseMap.h"
29#include "llvm/ADT/Statistic.h"
39#include "llvm/CodeGen/Passes.h"
44#include "llvm/Config/llvm-config.h"
45#include "llvm/IR/Constants.h"
48#include "llvm/IR/Metadata.h"
49#include "llvm/IR/Use.h"
50#include "llvm/IR/Value.h"
52#include "llvm/Pass.h"
56#include "llvm/Support/Debug.h"
58#include <algorithm>
59#include <cassert>
60#include <limits>
61#include <memory>
62#include <utility>
63
64using namespace llvm;
65
66#define DEBUG_TYPE "stack-coloring"
67
68static cl::opt<bool>
69DisableColoring("no-stack-coloring",
70 cl::init(false), cl::Hidden,
71 cl::desc("Disable stack coloring"));
72
73/// The user may write code that uses allocas outside of the declared lifetime
74/// zone. This can happen when the user returns a reference to a local
75/// data-structure. We can detect these cases and decide not to optimize the
76/// code. If this flag is enabled, we try to save the user. This option
77/// is treated as overriding LifetimeStartOnFirstUse below.
78static cl::opt<bool>
79ProtectFromEscapedAllocas("protect-from-escaped-allocas",
80 cl::init(false), cl::Hidden,
81 cl::desc("Do not optimize lifetime zones that "
82 "are broken"));
83
84/// Enable enhanced dataflow scheme for lifetime analysis (treat first
85/// use of stack slot as start of slot lifetime, as opposed to looking
86/// for LIFETIME_START marker). See "Implementation notes" below for
87/// more info.
88static cl::opt<bool>
89LifetimeStartOnFirstUse("stackcoloring-lifetime-start-on-first-use",
90 cl::init(true), cl::Hidden,
91 cl::desc("Treat stack lifetimes as starting on first use, not on START marker."));
92
93
94STATISTIC(NumMarkerSeen, "Number of lifetime markers found.");
95STATISTIC(StackSpaceSaved, "Number of bytes saved due to merging slots.");
96STATISTIC(StackSlotMerged, "Number of stack slot merged.");
97STATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region");
98
99//===----------------------------------------------------------------------===//
100// StackColoring Pass
101//===----------------------------------------------------------------------===//
102//
103// Stack Coloring reduces stack usage by merging stack slots when they
104// can't be used together. For example, consider the following C program:
105//
106// void bar(char *, int);
107// void foo(bool var) {
108// A: {
109// char z[4096];
110// bar(z, 0);
111// }
112//
113// char *p;
114// char x[4096];
115// char y[4096];
116// if (var) {
117// p = x;
118// } else {
119// bar(y, 1);
120// p = y + 1024;
121// }
122// B:
123// bar(p, 2);
124// }
125//
126// Naively-compiled, this program would use 12k of stack space. However, the
127// stack slot corresponding to `z` is always destroyed before either of the
128// stack slots for `x` or `y` are used, and then `x` is only used if `var`
129// is true, while `y` is only used if `var` is false. So in no time are 2
130// of the stack slots used together, and therefore we can merge them,
131// compiling the function using only a single 4k alloca:
132//
133// void foo(bool var) { // equivalent
134// char x[4096];
135// char *p;
136// bar(x, 0);
137// if (var) {
138// p = x;
139// } else {
140// bar(x, 1);
141// p = x + 1024;
142// }
143// bar(p, 2);
144// }
145//
146// This is an important optimization if we want stack space to be under
147// control in large functions, both open-coded ones and ones created by
148// inlining.
149//
150// Implementation Notes:
151// ---------------------
152//
153// An important part of the above reasoning is that `z` can't be accessed
154// while the latter 2 calls to `bar` are running. This is justified because
155// `z`'s lifetime is over after we exit from block `A:`, so any further
156// accesses to it would be UB. The way we represent this information
157// in LLVM is by having frontends delimit blocks with `lifetime.start`
158// and `lifetime.end` intrinsics.
159//
160// The effect of these intrinsics seems to be as follows (maybe I should
161// specify this in the reference?):
162//
163// L1) at start, each stack-slot is marked as *out-of-scope*, unless no
164// lifetime intrinsic refers to that stack slot, in which case
165// it is marked as *in-scope*.
166// L2) on a `lifetime.start`, a stack slot is marked as *in-scope* and
167// the stack slot is overwritten with `undef`.
168// L3) on a `lifetime.end`, a stack slot is marked as *out-of-scope*.
169// L4) on function exit, all stack slots are marked as *out-of-scope*.
170// L5) `lifetime.end` is a no-op when called on a slot that is already
171// *out-of-scope*.
172// L6) memory accesses to *out-of-scope* stack slots are UB.
173// L7) when a stack-slot is marked as *out-of-scope*, all pointers to it
174// are invalidated, unless the slot is "degenerate". This is used to
175// justify not marking slots as in-use until the pointer to them is
176// used, but feels a bit hacky in the presence of things like LICM. See
177// the "Degenerate Slots" section for more details.
178//
179// Now, let's ground stack coloring on these rules. We'll define a slot
180// as *in-use* at a (dynamic) point in execution if it either can be
181// written to at that point, or if it has a live and non-undef content
182// at that point.
183//
184// Obviously, slots that are never *in-use* together can be merged, and
185// in our example `foo`, the slots for `x`, `y` and `z` are never
186// in-use together (of course, sometimes slots that *are* in-use together
187// might still be mergable, but we don't care about that here).
188//
189// In this implementation, we successively merge pairs of slots that are
190// not *in-use* together. We could be smarter - for example, we could merge
191// a single large slot with 2 small slots, or we could construct the
192// interference graph and run a "smart" graph coloring algorithm, but with
193// that aside, how do we find out whether a pair of slots might be *in-use*
194// together?
195//
196// From our rules, we see that *out-of-scope* slots are never *in-use*,
197// and from (L7) we see that "non-degenerate" slots remain non-*in-use*
198// until their address is taken. Therefore, we can approximate slot activity
199// using dataflow.
200//
201// A subtle point: naively, we might try to figure out which pairs of
202// stack-slots interfere by propagating `S in-use` through the CFG for every
203// stack-slot `S`, and having `S` and `T` interfere if there is a CFG point in
204// which they are both *in-use*.
205//
206// That is sound, but overly conservative in some cases: in our (artificial)
207// example `foo`, either `x` or `y` might be in use at the label `B:`, but
208// as `x` is only in use if we came in from the `var` edge and `y` only
209// if we came from the `!var` edge, they still can't be in use together.
210// See PR32488 for an important real-life case.
211//
212// If we wanted to find all points of interference precisely, we could
213// propagate `S in-use` and `S&T in-use` predicates through the CFG. That
214// would be precise, but requires propagating `O(n^2)` dataflow facts.
215//
216// However, we aren't interested in the *set* of points of interference
217// between 2 stack slots, only *whether* there *is* such a point. So we
218// can rely on a little trick: for `S` and `T` to be in-use together,
219// one of them needs to become in-use while the other is in-use (or
220// they might both become in use simultaneously). We can check this
221// by also keeping track of the points at which a stack slot might *start*
222// being in-use.
223//
224// Exact first use:
225// ----------------
226//
227// Consider the following motivating example:
228//
229// int foo() {
230// char b1[1024], b2[1024];
231// if (...) {
232// char b3[1024];
233// <uses of b1, b3>;
234// return x;
235// } else {
236// char b4[1024], b5[1024];
237// <uses of b2, b4, b5>;
238// return y;
239// }
240// }
241//
242// In the code above, "b3" and "b4" are declared in distinct lexical
243// scopes, meaning that it is easy to prove that they can share the
244// same stack slot. Variables "b1" and "b2" are declared in the same
245// scope, meaning that from a lexical point of view, their lifetimes
246// overlap. From a control flow pointer of view, however, the two
247// variables are accessed in disjoint regions of the CFG, thus it
248// should be possible for them to share the same stack slot. An ideal
249// stack allocation for the function above would look like:
250//
251// slot 0: b1, b2
252// slot 1: b3, b4
253// slot 2: b5
254//
255// Achieving this allocation is tricky, however, due to the way
256// lifetime markers are inserted. Here is a simplified view of the
257// control flow graph for the code above:
258//
259// +------ block 0 -------+
260// 0| LIFETIME_START b1, b2 |
261// 1| <test 'if' condition> |
262// +-----------------------+
263// ./ \.
264// +------ block 1 -------+ +------ block 2 -------+
265// 2| LIFETIME_START b3 | 5| LIFETIME_START b4, b5 |
266// 3| <uses of b1, b3> | 6| <uses of b2, b4, b5> |
267// 4| LIFETIME_END b3 | 7| LIFETIME_END b4, b5 |
268// +-----------------------+ +-----------------------+
269// \. /.
270// +------ block 3 -------+
271// 8| <cleanupcode> |
272// 9| LIFETIME_END b1, b2 |
273// 10| return |
274// +-----------------------+
275//
276// If we create live intervals for the variables above strictly based
277// on the lifetime markers, we'll get the set of intervals on the
278// left. If we ignore the lifetime start markers and instead treat a
279// variable's lifetime as beginning with the first reference to the
280// var, then we get the intervals on the right.
281//
282// LIFETIME_START First Use
283// b1: [0,9] [3,4] [8,9]
284// b2: [0,9] [6,9]
285// b3: [2,4] [3,4]
286// b4: [5,7] [6,7]
287// b5: [5,7] [6,7]
288//
289// For the intervals on the left, the best we can do is overlap two
290// variables (b3 and b4, for example); this gives us a stack size of
291// 4*1024 bytes, not ideal. When treating first-use as the start of a
292// lifetime, we can additionally overlap b1 and b5, giving us a 3*1024
293// byte stack (better).
294//
295// Degenerate Slots:
296// -----------------
297//
298// Relying entirely on first-use of stack slots is problematic,
299// however, due to the fact that optimizations can sometimes migrate
300// uses of a variable outside of its lifetime start/end region. Here
301// is an example:
302//
303// int bar() {
304// char b1[1024], b2[1024];
305// if (...) {
306// <uses of b2>
307// return y;
308// } else {
309// <uses of b1>
310// while (...) {
311// char b3[1024];
312// <uses of b3>
313// }
314// }
315// }
316//
317// Before optimization, the control flow graph for the code above
318// might look like the following:
319//
320// +------ block 0 -------+
321// 0| LIFETIME_START b1, b2 |
322// 1| <test 'if' condition> |
323// +-----------------------+
324// ./ \.
325// +------ block 1 -------+ +------- block 2 -------+
326// 2| <uses of b2> | 3| <uses of b1> |
327// +-----------------------+ +-----------------------+
328// | |
329// | +------- block 3 -------+ <-\.
330// | 4| <while condition> | |
331// | +-----------------------+ |
332// | / | |
333// | / +------- block 4 -------+
334// \ / 5| LIFETIME_START b3 | |
335// \ / 6| <uses of b3> | |
336// \ / 7| LIFETIME_END b3 | |
337// \ | +------------------------+ |
338// \ | \ /
339// +------ block 5 -----+ \---------------
340// 8| <cleanupcode> |
341// 9| LIFETIME_END b1, b2 |
342// 10| return |
343// +---------------------+
344//
345// During optimization, however, it can happen that an instruction
346// computing an address in "b3" (for example, a loop-invariant GEP) is
347// hoisted up out of the loop from block 4 to block 2. [Note that
348// this is not an actual load from the stack, only an instruction that
349// computes the address to be loaded]. If this happens, there is now a
350// path leading from the first use of b3 to the return instruction
351// that does not encounter the b3 LIFETIME_END, hence b3's lifetime is
352// now larger than if we were computing live intervals strictly based
353// on lifetime markers. In the example above, this lengthened lifetime
354// would mean that it would appear illegal to overlap b3 with b2.
355//
356// To deal with this such cases, the code in ::collectMarkers() below
357// tries to identify "degenerate" slots -- those slots where on a single
358// forward pass through the CFG we encounter a first reference to slot
359// K before we hit the slot K lifetime start marker. For such slots,
360// we fall back on using the lifetime start marker as the beginning of
361// the variable's lifetime. NB: with this implementation, slots can
362// appear degenerate in cases where there is unstructured control flow:
363//
364// if (q) goto mid;
365// if (x > 9) {
366// int b[100];
367// memcpy(&b[0], ...);
368// mid: b[k] = ...;
369// abc(&b);
370// }
371//
372// If in RPO ordering chosen to walk the CFG we happen to visit the b[k]
373// before visiting the memcpy block (which will contain the lifetime start
374// for "b" then it will appear that 'b' has a degenerate lifetime.
375
376namespace {
377
378/// StackColoring - A machine pass for merging disjoint stack allocations,
379/// marked by the LIFETIME_START and LIFETIME_END pseudo instructions.
380class StackColoring {
381 MachineFrameInfo *MFI = nullptr;
382 MachineFunction *MF = nullptr;
383
384 /// A class representing liveness information for a single basic block.
385 /// Each bit in the BitVector represents the liveness property
386 /// for a different stack slot.
387 struct BlockLifetimeInfo {
388 /// Which slots BEGINs in each basic block.
389 BitVector Begin;
390
391 /// Which slots ENDs in each basic block.
392 BitVector End;
393
394 /// Which slots are marked as LIVE_IN, coming into each basic block.
395 BitVector LiveIn;
396
397 /// Which slots are marked as LIVE_OUT, coming out of each basic block.
398 BitVector LiveOut;
399 };
400
401 /// Maps active slots (per bit) for each basic block.
402 using LivenessMap = DenseMap<const MachineBasicBlock *, BlockLifetimeInfo>;
403 LivenessMap BlockLiveness;
404
405 /// Maps serial numbers to basic blocks.
406 DenseMap<const MachineBasicBlock *, int> BasicBlocks;
407
408 /// Maps basic blocks to a serial number.
410
411 /// Maps slots to their use interval. Outside of this interval, slots
412 /// values are either dead or `undef` and they will not be written to.
414
415 /// Maps slots to the points where they can become in-use.
417
418 /// VNInfo is used for the construction of LiveIntervals.
419 VNInfo::Allocator VNInfoAllocator;
420
421 /// SlotIndex analysis object.
422 SlotIndexes *Indexes = nullptr;
423
424 /// The list of lifetime markers found. These markers are to be removed
425 /// once the coloring is done.
426 SmallVector<MachineInstr*, 8> Markers;
427
428 /// Record the FI slots for which we have seen some sort of
429 /// lifetime marker (either start or end).
430 BitVector InterestingSlots;
431
432 /// FI slots that need to be handled conservatively (for these
433 /// slots lifetime-start-on-first-use is disabled).
434 BitVector ConservativeSlots;
435
436 /// Number of iterations taken during data flow analysis.
437 unsigned NumIterations;
438
439public:
440 StackColoring(SlotIndexes *Indexes) : Indexes(Indexes) {}
441 bool run(MachineFunction &Func, bool OnlyRemoveMarkers = false);
442
443private:
444 /// Used in collectMarkers
445 using BlockBitVecMap = DenseMap<const MachineBasicBlock *, BitVector>;
446
447 /// Debug.
448 void dump() const;
449 void dumpIntervals() const;
450 void dumpBB(MachineBasicBlock *MBB) const;
451 void dumpBV(const char *tag, const BitVector &BV) const;
452
453 /// Removes all of the lifetime marker instructions from the function.
454 /// \returns true if any markers were removed.
455 bool removeAllMarkers();
456
457 /// Scan the machine function and find all of the lifetime markers.
458 /// Record the findings in the BEGIN and END vectors.
459 /// \returns the number of markers found.
460 unsigned collectMarkers(unsigned NumSlot);
461
462 /// Perform the dataflow calculation and calculate the lifetime for each of
463 /// the slots, based on the BEGIN/END vectors. Set the LifetimeLIVE_IN and
464 /// LifetimeLIVE_OUT maps that represent which stack slots are live coming
465 /// in and out blocks.
466 void calculateLocalLiveness();
467
468 /// Returns TRUE if we're using the first-use-begins-lifetime method for
469 /// this slot (if FALSE, then the start marker is treated as start of lifetime).
470 bool applyFirstUse(int Slot) {
472 return false;
473 if (ConservativeSlots.test(Slot))
474 return false;
475 return true;
476 }
477
478 /// Examines the specified instruction and returns TRUE if the instruction
479 /// represents the start or end of an interesting lifetime. The slot or slots
480 /// starting or ending are added to the vector "slots" and "isStart" is set
481 /// accordingly.
482 /// \returns True if inst contains a lifetime start or end
483 bool isLifetimeStartOrEnd(const MachineInstr &MI,
484 SmallVector<int, 4> &slots,
485 bool &isStart);
486
487 /// Construct the LiveIntervals for the slots.
488 void calculateLiveIntervals(unsigned NumSlots);
489
490 /// Go over the machine function and change instructions which use stack
491 /// slots to use the joint slots.
492 void remapInstructions(DenseMap<int, int> &SlotRemap);
493
494 /// The input program may contain instructions which are not inside lifetime
495 /// markers. This can happen due to a bug in the compiler or due to a bug in
496 /// user code (for example, returning a reference to a local variable).
497 /// This procedure checks all of the instructions in the function and
498 /// invalidates lifetime ranges which do not contain all of the instructions
499 /// which access that frame slot.
500 void removeInvalidSlotRanges();
501
502 /// Map entries which point to other entries to their destination.
503 /// A->B->C becomes A->C.
504 void expungeSlotMap(DenseMap<int, int> &SlotRemap, unsigned NumSlots);
505};
506
507class StackColoringLegacy : public MachineFunctionPass {
508public:
509 static char ID;
510
511 StackColoringLegacy() : MachineFunctionPass(ID) {}
512
513 void getAnalysisUsage(AnalysisUsage &AU) const override;
514 bool runOnMachineFunction(MachineFunction &Func) override;
515};
516
517} // end anonymous namespace
518
519char StackColoringLegacy::ID = 0;
520
521char &llvm::StackColoringLegacyID = StackColoringLegacy::ID;
522
524 "Merge disjoint stack slots", false, false)
526INITIALIZE_PASS_END(StackColoringLegacy, DEBUG_TYPE,
527 "Merge disjoint stack slots", false, false)
528
529void StackColoringLegacy::getAnalysisUsage(AnalysisUsage &AU) const {
530 AU.addRequired<SlotIndexesWrapperPass>();
532}
533
534#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
535LLVM_DUMP_METHOD void StackColoring::dumpBV(const char *tag,
536 const BitVector &BV) const {
537 dbgs() << tag << " : { ";
538 for (unsigned I = 0, E = BV.size(); I != E; ++I)
539 dbgs() << BV.test(I) << " ";
540 dbgs() << "}\n";
541}
542
543LLVM_DUMP_METHOD void StackColoring::dumpBB(MachineBasicBlock *MBB) const {
544 LivenessMap::const_iterator BI = BlockLiveness.find(MBB);
545 assert(BI != BlockLiveness.end() && "Block not found");
546 const BlockLifetimeInfo &BlockInfo = BI->second;
547
548 dumpBV("BEGIN", BlockInfo.Begin);
549 dumpBV("END", BlockInfo.End);
550 dumpBV("LIVE_IN", BlockInfo.LiveIn);
551 dumpBV("LIVE_OUT", BlockInfo.LiveOut);
552}
553
554LLVM_DUMP_METHOD void StackColoring::dump() const {
555 for (MachineBasicBlock *MBB : depth_first(MF)) {
556 dbgs() << "Inspecting block #" << MBB->getNumber() << " ["
557 << MBB->getName() << "]\n";
558 dumpBB(MBB);
559 }
560}
561
562LLVM_DUMP_METHOD void StackColoring::dumpIntervals() const {
563 for (unsigned I = 0, E = Intervals.size(); I != E; ++I) {
564 dbgs() << "Interval[" << I << "]:\n";
565 Intervals[I]->dump();
566 }
567}
568#endif
569
570static inline int getStartOrEndSlot(const MachineInstr &MI)
571{
572 assert((MI.getOpcode() == TargetOpcode::LIFETIME_START ||
573 MI.getOpcode() == TargetOpcode::LIFETIME_END) &&
574 "Expected LIFETIME_START or LIFETIME_END op");
575 const MachineOperand &MO = MI.getOperand(0);
576 int Slot = MO.getIndex();
577 if (Slot >= 0)
578 return Slot;
579 return -1;
580}
581
582// At the moment the only way to end a variable lifetime is with
583// a VARIABLE_LIFETIME op (which can't contain a start). If things
584// change and the IR allows for a single inst that both begins
585// and ends lifetime(s), this interface will need to be reworked.
586bool StackColoring::isLifetimeStartOrEnd(const MachineInstr &MI,
587 SmallVector<int, 4> &slots,
588 bool &isStart) {
589 if (MI.getOpcode() == TargetOpcode::LIFETIME_START ||
590 MI.getOpcode() == TargetOpcode::LIFETIME_END) {
592 if (Slot < 0)
593 return false;
594 if (!InterestingSlots.test(Slot))
595 return false;
596 slots.push_back(Slot);
597 if (MI.getOpcode() == TargetOpcode::LIFETIME_END) {
598 isStart = false;
599 return true;
600 }
601 if (!applyFirstUse(Slot)) {
602 isStart = true;
603 return true;
604 }
606 if (!MI.isDebugInstr()) {
607 bool found = false;
608 for (const MachineOperand &MO : MI.operands()) {
609 if (!MO.isFI())
610 continue;
611 int Slot = MO.getIndex();
612 if (Slot<0)
613 continue;
614 if (InterestingSlots.test(Slot) && applyFirstUse(Slot)) {
615 slots.push_back(Slot);
616 found = true;
617 }
618 }
619 if (found) {
620 isStart = true;
621 return true;
622 }
623 }
624 }
625 return false;
626}
627
628unsigned StackColoring::collectMarkers(unsigned NumSlot) {
629 unsigned MarkersFound = 0;
630 BlockBitVecMap SeenStartMap;
631 InterestingSlots.clear();
632 InterestingSlots.resize(NumSlot);
633 ConservativeSlots.clear();
634 ConservativeSlots.resize(NumSlot);
635
636 // number of start and end lifetime ops for each slot
637 SmallVector<int, 8> NumStartLifetimes(NumSlot, 0);
638 SmallVector<int, 8> NumEndLifetimes(NumSlot, 0);
639
640 // Step 1: collect markers and populate the "InterestingSlots"
641 // and "ConservativeSlots" sets.
642 for (MachineBasicBlock *MBB : depth_first(MF)) {
643 // Compute the set of slots for which we've seen a START marker but have
644 // not yet seen an END marker at this point in the walk (e.g. on entry
645 // to this bb).
646 BitVector BetweenStartEnd;
647 BetweenStartEnd.resize(NumSlot);
648 for (const MachineBasicBlock *Pred : MBB->predecessors()) {
649 BlockBitVecMap::const_iterator I = SeenStartMap.find(Pred);
650 if (I != SeenStartMap.end()) {
651 BetweenStartEnd |= I->second;
652 }
653 }
654
655 // Walk the instructions in the block to look for start/end ops.
656 for (MachineInstr &MI : *MBB) {
657 if (MI.isDebugInstr())
658 continue;
659 if (MI.getOpcode() == TargetOpcode::LIFETIME_START ||
660 MI.getOpcode() == TargetOpcode::LIFETIME_END) {
662 if (Slot < 0)
663 continue;
664 InterestingSlots.set(Slot);
665 if (MI.getOpcode() == TargetOpcode::LIFETIME_START) {
666 BetweenStartEnd.set(Slot);
667 NumStartLifetimes[Slot] += 1;
668 } else {
669 BetweenStartEnd.reset(Slot);
670 NumEndLifetimes[Slot] += 1;
671 }
672 const AllocaInst *Allocation = MFI->getObjectAllocation(Slot);
673 if (Allocation) {
674 LLVM_DEBUG(dbgs() << "Found a lifetime ");
675 LLVM_DEBUG(dbgs() << (MI.getOpcode() == TargetOpcode::LIFETIME_START
676 ? "start"
677 : "end"));
678 LLVM_DEBUG(dbgs() << " marker for slot #" << Slot);
680 << " with allocation: " << Allocation->getName() << "\n");
681 }
682 Markers.push_back(&MI);
683 MarkersFound += 1;
684 } else {
685 for (const MachineOperand &MO : MI.operands()) {
686 if (!MO.isFI())
687 continue;
688 int Slot = MO.getIndex();
689 if (Slot < 0)
690 continue;
691 if (! BetweenStartEnd.test(Slot)) {
692 ConservativeSlots.set(Slot);
693 }
694 }
695 }
696 }
697 BitVector &SeenStart = SeenStartMap[MBB];
698 SeenStart |= BetweenStartEnd;
699 }
700 if (!MarkersFound) {
701 return 0;
702 }
703
704 // PR27903: slots with multiple start or end lifetime ops are not
705 // safe to enable for "lifetime-start-on-first-use".
706 for (unsigned slot = 0; slot < NumSlot; ++slot) {
707 if (NumStartLifetimes[slot] > 1 || NumEndLifetimes[slot] > 1)
708 ConservativeSlots.set(slot);
709 }
710
711 // The write to the catch object by the personality function is not propely
712 // modeled in IR: It happens before any cleanuppads are executed, even if the
713 // first mention of the catch object is in a catchpad. As such, mark catch
714 // object slots as conservative, so they are excluded from first-use analysis.
715 if (WinEHFuncInfo *EHInfo = MF->getWinEHFuncInfo())
716 for (WinEHTryBlockMapEntry &TBME : EHInfo->TryBlockMap)
717 for (WinEHHandlerType &H : TBME.HandlerArray)
718 if (H.CatchObj.FrameIndex != std::numeric_limits<int>::max() &&
719 H.CatchObj.FrameIndex >= 0)
720 ConservativeSlots.set(H.CatchObj.FrameIndex);
721
722 // Treat all stack slots as conservative if we happen to have calls to
723 // setjmp/sigsetjmp, as longjmp may re-enter the function on a different path.
724 if (MF->exposesReturnsTwice())
725 ConservativeSlots.set();
726
727 LLVM_DEBUG(dumpBV("Conservative slots", ConservativeSlots));
728
729 // Step 2: compute begin/end sets for each block
730
731 // NOTE: We use a depth-first iteration to ensure that we obtain a
732 // deterministic numbering.
733 for (MachineBasicBlock *MBB : depth_first(MF)) {
734 // Assign a serial number to this basic block.
735 BasicBlocks[MBB] = BasicBlockNumbering.size();
736 BasicBlockNumbering.push_back(MBB);
737
738 // Keep a reference to avoid repeated lookups.
739 BlockLifetimeInfo &BlockInfo = BlockLiveness[MBB];
740
741 BlockInfo.Begin.resize(NumSlot);
742 BlockInfo.End.resize(NumSlot);
743
744 SmallVector<int, 4> slots;
745 for (MachineInstr &MI : *MBB) {
746 bool isStart = false;
747 slots.clear();
748 if (isLifetimeStartOrEnd(MI, slots, isStart)) {
749 if (!isStart) {
750 assert(slots.size() == 1 && "unexpected: MI ends multiple slots");
751 int Slot = slots[0];
752 if (BlockInfo.Begin.test(Slot)) {
753 BlockInfo.Begin.reset(Slot);
754 }
755 BlockInfo.End.set(Slot);
756 } else {
757 for (auto Slot : slots) {
758 LLVM_DEBUG(dbgs() << "Found a use of slot #" << Slot);
760 << " at " << printMBBReference(*MBB) << " index ");
762 const AllocaInst *Allocation = MFI->getObjectAllocation(Slot);
763 if (Allocation) {
765 << " with allocation: " << Allocation->getName());
766 }
767 LLVM_DEBUG(dbgs() << "\n");
768 if (BlockInfo.End.test(Slot)) {
769 BlockInfo.End.reset(Slot);
770 }
771 BlockInfo.Begin.set(Slot);
772 }
773 }
774 }
775 }
776 }
777
778 // Update statistics.
779 NumMarkerSeen += MarkersFound;
780 return MarkersFound;
781}
782
783void StackColoring::calculateLocalLiveness() {
784 unsigned NumIters = 0;
785 bool changed = true;
786 // Create BitVector outside the loop and reuse them to avoid repeated heap
787 // allocations.
788 BitVector LocalLiveIn;
789 BitVector LocalLiveOut;
790 while (changed) {
791 changed = false;
792 ++NumIters;
793
794 for (const MachineBasicBlock *BB : BasicBlockNumbering) {
795 // Use an iterator to avoid repeated lookups.
796 LivenessMap::iterator BI = BlockLiveness.find(BB);
797 assert(BI != BlockLiveness.end() && "Block not found");
798 BlockLifetimeInfo &BlockInfo = BI->second;
799
800 // Compute LiveIn by unioning together the LiveOut sets of all preds.
801 LocalLiveIn.clear();
802 for (MachineBasicBlock *Pred : BB->predecessors()) {
803 LivenessMap::const_iterator I = BlockLiveness.find(Pred);
804 // PR37130: transformations prior to stack coloring can
805 // sometimes leave behind statically unreachable blocks; these
806 // can be safely skipped here.
807 if (I != BlockLiveness.end())
808 LocalLiveIn |= I->second.LiveOut;
809 }
810
811 // Compute LiveOut by subtracting out lifetimes that end in this
812 // block, then adding in lifetimes that begin in this block. If
813 // we have both BEGIN and END markers in the same basic block
814 // then we know that the BEGIN marker comes after the END,
815 // because we already handle the case where the BEGIN comes
816 // before the END when collecting the markers (and building the
817 // BEGIN/END vectors).
818 LocalLiveOut = LocalLiveIn;
819 LocalLiveOut.reset(BlockInfo.End);
820 LocalLiveOut |= BlockInfo.Begin;
821
822 // Update block LiveIn set, noting whether it has changed.
823 if (!LocalLiveIn.subsetOf(BlockInfo.LiveIn)) {
824 changed = true;
825 BlockInfo.LiveIn |= LocalLiveIn;
826 }
827
828 // Update block LiveOut set, noting whether it has changed.
829 if (!LocalLiveOut.subsetOf(BlockInfo.LiveOut)) {
830 changed = true;
831 BlockInfo.LiveOut |= LocalLiveOut;
832 }
833 }
834 } // while changed.
835
836 NumIterations = NumIters;
837}
838
839void StackColoring::calculateLiveIntervals(unsigned NumSlots) {
841 SmallVector<bool, 16> DefinitelyInUse;
842
843 // For each block, find which slots are active within this block
844 // and update the live intervals.
845 for (const MachineBasicBlock &MBB : *MF) {
846 Starts.clear();
847 Starts.resize(NumSlots);
848 DefinitelyInUse.clear();
849 DefinitelyInUse.resize(NumSlots);
850
851 // Start the interval of the slots that we previously found to be 'in-use'.
852 BlockLifetimeInfo &MBBLiveness = BlockLiveness[&MBB];
853 for (int pos = MBBLiveness.LiveIn.find_first(); pos != -1;
854 pos = MBBLiveness.LiveIn.find_next(pos)) {
855 Starts[pos] = Indexes->getMBBStartIdx(&MBB);
856 }
857
858 // Create the interval for the basic blocks containing lifetime begin/end.
859 for (const MachineInstr &MI : MBB) {
860 SmallVector<int, 4> slots;
861 bool IsStart = false;
862 if (!isLifetimeStartOrEnd(MI, slots, IsStart))
863 continue;
864 SlotIndex ThisIndex = Indexes->getInstructionIndex(MI);
865 for (auto Slot : slots) {
866 if (IsStart) {
867 // If a slot is already definitely in use, we don't have to emit
868 // a new start marker because there is already a pre-existing
869 // one.
870 if (!DefinitelyInUse[Slot]) {
871 LiveStarts[Slot].push_back(ThisIndex);
872 DefinitelyInUse[Slot] = true;
873 }
874 if (!Starts[Slot].isValid())
875 Starts[Slot] = ThisIndex;
876 } else {
877 if (Starts[Slot].isValid()) {
878 VNInfo *VNI = Intervals[Slot]->getValNumInfo(0);
879 Intervals[Slot]->addSegment(
880 LiveInterval::Segment(Starts[Slot], ThisIndex, VNI));
881 Starts[Slot] = SlotIndex(); // Invalidate the start index
882 DefinitelyInUse[Slot] = false;
883 }
884 }
885 }
886 }
887
888 // Finish up started segments
889 for (unsigned i = 0; i < NumSlots; ++i) {
890 if (!Starts[i].isValid())
891 continue;
892
893 SlotIndex EndIdx = Indexes->getMBBEndIdx(&MBB);
894 VNInfo *VNI = Intervals[i]->getValNumInfo(0);
895 Intervals[i]->addSegment(LiveInterval::Segment(Starts[i], EndIdx, VNI));
896 }
897 }
898}
899
900bool StackColoring::removeAllMarkers() {
901 unsigned Count = 0;
902 for (MachineInstr *MI : Markers) {
903 MI->eraseFromParent();
904 Count++;
905 }
906 Markers.clear();
907
908 LLVM_DEBUG(dbgs() << "Removed " << Count << " markers.\n");
909 return Count;
910}
911
912void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
913 unsigned FixedInstr = 0;
914 unsigned FixedMemOp = 0;
915 unsigned FixedDbg = 0;
916
917 // Remap debug information that refers to stack slots.
918 for (auto &VI : MF->getVariableDbgInfo()) {
919 if (!VI.Var || !VI.inStackSlot())
920 continue;
921 int Slot = VI.getStackSlot();
922 if (auto It = SlotRemap.find(Slot); It != SlotRemap.end()) {
923 LLVM_DEBUG(dbgs() << "Remapping debug info for ["
924 << cast<DILocalVariable>(VI.Var)->getName() << "].\n");
925 VI.updateStackSlot(It->second);
926 FixedDbg++;
927 }
928 }
929
930 // Keep a list of *allocas* which need to be remapped.
931 DenseMap<const AllocaInst*, const AllocaInst*> Allocas;
932
933 // Keep a list of allocas which has been affected by the remap.
934 SmallPtrSet<const AllocaInst*, 32> MergedAllocas;
935
936 for (const std::pair<int, int> &SI : SlotRemap) {
937 const AllocaInst *From = MFI->getObjectAllocation(SI.first);
938 const AllocaInst *To = MFI->getObjectAllocation(SI.second);
939 assert(To && From && "Invalid allocation object");
940 Allocas[From] = To;
941
942 // If From is before wo, its possible that there is a use of From between
943 // them.
944 if (From->comesBefore(To))
945 const_cast<AllocaInst *>(To)->moveBefore(
946 const_cast<AllocaInst *>(From)->getIterator());
947
948 // AA might be used later for instruction scheduling, and we need it to be
949 // able to deduce the correct aliasing releationships between pointers
950 // derived from the alloca being remapped and the target of that remapping.
951 // The only safe way, without directly informing AA about the remapping
952 // somehow, is to directly update the IR to reflect the change being made
953 // here.
954 Instruction *Inst = const_cast<AllocaInst *>(To);
955 if (From->getType() != To->getType()) {
956 BitCastInst *Cast = new BitCastInst(Inst, From->getType());
957 Cast->insertAfter(Inst->getIterator());
958 Inst = Cast;
959 }
960
961 // We keep both slots to maintain AliasAnalysis metadata later.
962 MergedAllocas.insert(From);
963 MergedAllocas.insert(To);
964
965 // Transfer the stack protector layout tag, but make sure that SSPLK_AddrOf
966 // does not overwrite SSPLK_SmallArray or SSPLK_LargeArray, and make sure
967 // that SSPLK_SmallArray does not overwrite SSPLK_LargeArray.
969 = MFI->getObjectSSPLayout(SI.first);
971 if (FromKind != MachineFrameInfo::SSPLK_None &&
972 (ToKind == MachineFrameInfo::SSPLK_None ||
974 FromKind != MachineFrameInfo::SSPLK_AddrOf)))
975 MFI->setObjectSSPLayout(SI.second, FromKind);
976
977 // The new alloca might not be valid in a llvm.dbg.declare for this
978 // variable, so poison out the use to make the verifier happy.
979 AllocaInst *FromAI = const_cast<AllocaInst *>(From);
980 if (FromAI->isUsedByMetadata())
982 for (auto &Use : FromAI->uses()) {
983 if (BitCastInst *BCI = dyn_cast<BitCastInst>(Use.get()))
984 if (BCI->isUsedByMetadata())
985 ValueAsMetadata::handleRAUW(BCI, PoisonValue::get(BCI->getType()));
986 }
987
988 // Note that this will not replace uses in MMOs (which we'll update below),
989 // or anywhere else (which is why we won't delete the original
990 // instruction).
991 FromAI->replaceAllUsesWith(Inst);
992 }
993
994 // Remap all instructions to the new stack slots.
995 std::vector<std::vector<MachineMemOperand *>> SSRefs(
996 MFI->getObjectIndexEnd());
997 for (MachineBasicBlock &BB : *MF)
998 for (MachineInstr &I : BB) {
999 // Skip lifetime markers. We'll remove them soon.
1000 if (I.getOpcode() == TargetOpcode::LIFETIME_START ||
1001 I.getOpcode() == TargetOpcode::LIFETIME_END)
1002 continue;
1003
1004 // Update the MachineMemOperand to use the new alloca.
1005 for (MachineMemOperand *MMO : I.memoperands()) {
1006 // We've replaced IR-level uses of the remapped allocas, so we only
1007 // need to replace direct uses here.
1008 const AllocaInst *AI = dyn_cast_or_null<AllocaInst>(MMO->getValue());
1009 if (!AI)
1010 continue;
1011
1012 auto It = Allocas.find(AI);
1013 if (It == Allocas.end())
1014 continue;
1015
1016 MMO->setValue(It->second);
1017 FixedMemOp++;
1018 }
1019
1020 // Update all of the machine instruction operands.
1021 for (MachineOperand &MO : I.operands()) {
1022 if (!MO.isFI())
1023 continue;
1024 int FromSlot = MO.getIndex();
1025
1026 // Don't touch arguments.
1027 if (FromSlot<0)
1028 continue;
1029
1030 // Only look at mapped slots.
1031 if (!SlotRemap.count(FromSlot))
1032 continue;
1033
1034 // In a debug build, check that the instruction that we are modifying is
1035 // inside the expected live range. If the instruction is not inside
1036 // the calculated range then it means that the alloca usage moved
1037 // outside of the lifetime markers, or that the user has a bug.
1038 // NOTE: Alloca address calculations which happen outside the lifetime
1039 // zone are okay, despite the fact that we don't have a good way
1040 // for validating all of the usages of the calculation.
1041#ifndef NDEBUG
1042 bool TouchesMemory = I.mayLoadOrStore();
1043 // If we *don't* protect the user from escaped allocas, don't bother
1044 // validating the instructions.
1045 if (!I.isDebugInstr() && TouchesMemory && ProtectFromEscapedAllocas) {
1046 SlotIndex Index = Indexes->getInstructionIndex(I);
1047 const LiveInterval *Interval = &*Intervals[FromSlot];
1048 assert(Interval->find(Index) != Interval->end() &&
1049 "Found instruction usage outside of live range.");
1050 }
1051#endif
1052
1053 // Fix the machine instructions.
1054 int ToSlot = SlotRemap[FromSlot];
1055 MO.setIndex(ToSlot);
1056 FixedInstr++;
1057 }
1058
1059 // We adjust AliasAnalysis information for merged stack slots.
1061 bool ReplaceMemOps = false;
1062 for (MachineMemOperand *MMO : I.memoperands()) {
1063 // Collect MachineMemOperands which reference
1064 // FixedStackPseudoSourceValues with old frame indices.
1066 MMO->getPseudoValue())) {
1067 int FI = FSV->getFrameIndex();
1068 auto To = SlotRemap.find(FI);
1069 if (To != SlotRemap.end())
1070 SSRefs[FI].push_back(MMO);
1071 }
1072
1073 // If this memory location can be a slot remapped here,
1074 // we remove AA information.
1075 bool MayHaveConflictingAAMD = false;
1076 if (MMO->getAAInfo()) {
1077 if (const Value *MMOV = MMO->getValue()) {
1078 SmallVector<Value *, 4> Objs;
1080
1081 if (Objs.empty())
1082 MayHaveConflictingAAMD = true;
1083 else
1084 for (Value *V : Objs) {
1085 // If this memory location comes from a known stack slot
1086 // that is not remapped, we continue checking.
1087 // Otherwise, we need to invalidate AA infomation.
1088 const AllocaInst *AI = dyn_cast_or_null<AllocaInst>(V);
1089 if (AI && MergedAllocas.count(AI)) {
1090 MayHaveConflictingAAMD = true;
1091 break;
1092 }
1093 }
1094 }
1095 }
1096 if (MayHaveConflictingAAMD) {
1097 NewMMOs.push_back(MF->getMachineMemOperand(MMO, AAMDNodes()));
1098 ReplaceMemOps = true;
1099 } else {
1100 NewMMOs.push_back(MMO);
1101 }
1102 }
1103
1104 // If any memory operand is updated, set memory references of
1105 // this instruction.
1106 if (ReplaceMemOps)
1107 I.setMemRefs(*MF, NewMMOs);
1108 }
1109
1110 // Rewrite MachineMemOperands that reference old frame indices.
1111 for (auto E : enumerate(SSRefs))
1112 if (!E.value().empty()) {
1113 const PseudoSourceValue *NewSV =
1114 MF->getPSVManager().getFixedStack(SlotRemap.find(E.index())->second);
1115 for (MachineMemOperand *Ref : E.value())
1116 Ref->setValue(NewSV);
1117 }
1118
1119 // Update the location of C++ catch objects for the MSVC personality routine.
1120 if (WinEHFuncInfo *EHInfo = MF->getWinEHFuncInfo())
1121 for (WinEHTryBlockMapEntry &TBME : EHInfo->TryBlockMap)
1122 for (WinEHHandlerType &H : TBME.HandlerArray)
1123 if (H.CatchObj.FrameIndex != std::numeric_limits<int>::max())
1124 if (auto It = SlotRemap.find(H.CatchObj.FrameIndex);
1125 It != SlotRemap.end())
1126 H.CatchObj.FrameIndex = It->second;
1127
1128 LLVM_DEBUG(dbgs() << "Fixed " << FixedMemOp << " machine memory operands.\n");
1129 LLVM_DEBUG(dbgs() << "Fixed " << FixedDbg << " debug locations.\n");
1130 LLVM_DEBUG(dbgs() << "Fixed " << FixedInstr << " machine instructions.\n");
1131 (void) FixedMemOp;
1132 (void) FixedDbg;
1133 (void) FixedInstr;
1134}
1135
1136void StackColoring::removeInvalidSlotRanges() {
1137 for (MachineBasicBlock &BB : *MF)
1138 for (MachineInstr &I : BB) {
1139 if (I.getOpcode() == TargetOpcode::LIFETIME_START ||
1140 I.getOpcode() == TargetOpcode::LIFETIME_END || I.isDebugInstr())
1141 continue;
1142
1143 // Some intervals are suspicious! In some cases we find address
1144 // calculations outside of the lifetime zone, but not actual memory
1145 // read or write. Memory accesses outside of the lifetime zone are a clear
1146 // violation, but address calculations are okay. This can happen when
1147 // GEPs are hoisted outside of the lifetime zone.
1148 // So, in here we only check instructions which can read or write memory.
1149 if (!I.mayLoad() && !I.mayStore())
1150 continue;
1151
1152 // Check all of the machine operands.
1153 for (const MachineOperand &MO : I.operands()) {
1154 if (!MO.isFI())
1155 continue;
1156
1157 int Slot = MO.getIndex();
1158
1159 if (Slot<0)
1160 continue;
1161
1162 if (Intervals[Slot]->empty())
1163 continue;
1164
1165 // Check that the used slot is inside the calculated lifetime range.
1166 // If it is not, warn about it and invalidate the range.
1167 LiveInterval *Interval = &*Intervals[Slot];
1168 SlotIndex Index = Indexes->getInstructionIndex(I);
1169 if (Interval->find(Index) == Interval->end()) {
1170 Interval->clear();
1171 LLVM_DEBUG(dbgs() << "Invalidating range #" << Slot << "\n");
1172 EscapedAllocas++;
1173 }
1174 }
1175 }
1176}
1177
1178void StackColoring::expungeSlotMap(DenseMap<int, int> &SlotRemap,
1179 unsigned NumSlots) {
1180 // Expunge slot remap map.
1181 for (unsigned i=0; i < NumSlots; ++i) {
1182 // If we are remapping i
1183 if (auto It = SlotRemap.find(i); It != SlotRemap.end()) {
1184 int Target = It->second;
1185 // As long as our target is mapped to something else, follow it.
1186 while (true) {
1187 auto It = SlotRemap.find(Target);
1188 if (It == SlotRemap.end())
1189 break;
1190 Target = It->second;
1191 SlotRemap[i] = Target;
1192 }
1193 }
1194 }
1195}
1196
1197bool StackColoringLegacy::runOnMachineFunction(MachineFunction &MF) {
1198 StackColoring SC(&getAnalysis<SlotIndexesWrapperPass>().getSI());
1199 return SC.run(MF, skipFunction(MF.getFunction()));
1200}
1201
1204 StackColoring SC(&MFAM.getResult<SlotIndexesAnalysis>(MF));
1205 if (SC.run(MF))
1207 return PreservedAnalyses::all();
1208}
1209
1210bool StackColoring::run(MachineFunction &Func, bool OnlyRemoveMarkers) {
1211 LLVM_DEBUG(dbgs() << "********** Stack Coloring **********\n"
1212 << "********** Function: " << Func.getName() << '\n');
1213 MF = &Func;
1214 MFI = &MF->getFrameInfo();
1215 BlockLiveness.clear();
1216 BasicBlocks.clear();
1217 BasicBlockNumbering.clear();
1218 Markers.clear();
1219 Intervals.clear();
1220 LiveStarts.clear();
1221 VNInfoAllocator.Reset();
1222
1223 unsigned NumSlots = MFI->getObjectIndexEnd();
1224
1225 // If there are no stack slots then there are no markers to remove.
1226 if (!NumSlots)
1227 return false;
1228
1229 SmallVector<int, 8> SortedSlots;
1230 SortedSlots.reserve(NumSlots);
1231 Intervals.reserve(NumSlots);
1232 LiveStarts.resize(NumSlots);
1233
1234 unsigned NumMarkers = collectMarkers(NumSlots);
1235
1236 unsigned TotalSize = 0;
1237 LLVM_DEBUG(dbgs() << "Found " << NumMarkers << " markers and " << NumSlots
1238 << " slots\n");
1239 LLVM_DEBUG(dbgs() << "Slot structure:\n");
1240
1241 for (int i=0; i < MFI->getObjectIndexEnd(); ++i) {
1242 LLVM_DEBUG(dbgs() << "Slot #" << i << " - " << MFI->getObjectSize(i)
1243 << " bytes.\n");
1244 TotalSize += MFI->getObjectSize(i);
1245 }
1246
1247 LLVM_DEBUG(dbgs() << "Total Stack size: " << TotalSize << " bytes\n\n");
1248
1249 // Don't continue because there are not enough lifetime markers, or the
1250 // stack is too small, or we are told not to optimize the slots, or
1251 // opt-bisect-limit is skipping this pass.
1252 if (NumMarkers < 2 || TotalSize < 16 || DisableColoring ||
1253 OnlyRemoveMarkers) {
1254 LLVM_DEBUG(dbgs() << "Will not try to merge slots.\n");
1255 return removeAllMarkers();
1256 }
1257
1258 for (unsigned i=0; i < NumSlots; ++i) {
1259 std::unique_ptr<LiveInterval> LI(new LiveInterval(i, 0));
1260 LI->getNextValue(Indexes->getZeroIndex(), VNInfoAllocator);
1261 Intervals.push_back(std::move(LI));
1262 SortedSlots.push_back(i);
1263 }
1264
1265 // Calculate the liveness of each block.
1266 calculateLocalLiveness();
1267 LLVM_DEBUG(dbgs() << "Dataflow iterations: " << NumIterations << "\n");
1268 LLVM_DEBUG(dump());
1269
1270 // Propagate the liveness information.
1271 calculateLiveIntervals(NumSlots);
1272 LLVM_DEBUG(dumpIntervals());
1273
1274 // Search for allocas which are used outside of the declared lifetime
1275 // markers.
1277 removeInvalidSlotRanges();
1278
1279 // Maps old slots to new slots.
1280 DenseMap<int, int> SlotRemap;
1281 unsigned RemovedSlots = 0;
1282 unsigned ReducedSize = 0;
1283
1284 // Do not bother looking at empty intervals.
1285 for (unsigned I = 0; I < NumSlots; ++I) {
1286 if (Intervals[SortedSlots[I]]->empty())
1287 SortedSlots[I] = -1;
1288 }
1289
1290 // This is a simple greedy algorithm for merging allocas. First, sort the
1291 // slots, placing the largest slots first. Next, perform an n^2 scan and look
1292 // for disjoint slots. When you find disjoint slots, merge the smaller one
1293 // into the bigger one and update the live interval. Remove the small alloca
1294 // and continue.
1295
1296 // Sort the slots according to their size. Place unused slots at the end.
1297 // Use stable sort to guarantee deterministic code generation.
1298 llvm::stable_sort(SortedSlots, [this](int LHS, int RHS) {
1299 // We use -1 to denote a uninteresting slot. Place these slots at the end.
1300 if (LHS == -1)
1301 return false;
1302 if (RHS == -1)
1303 return true;
1304 // Sort according to size.
1305 return MFI->getObjectSize(LHS) > MFI->getObjectSize(RHS);
1306 });
1307
1308 for (auto &s : LiveStarts)
1309 llvm::sort(s);
1310
1311 bool Changed = true;
1312 while (Changed) {
1313 Changed = false;
1314 for (unsigned I = 0; I < NumSlots; ++I) {
1315 if (SortedSlots[I] == -1)
1316 continue;
1317
1318 for (unsigned J=I+1; J < NumSlots; ++J) {
1319 if (SortedSlots[J] == -1)
1320 continue;
1321
1322 int FirstSlot = SortedSlots[I];
1323 int SecondSlot = SortedSlots[J];
1324
1325 // Objects with different stack IDs cannot be merged.
1326 if (MFI->getStackID(FirstSlot) != MFI->getStackID(SecondSlot))
1327 continue;
1328
1329 LiveInterval *First = &*Intervals[FirstSlot];
1330 LiveInterval *Second = &*Intervals[SecondSlot];
1331 auto &FirstS = LiveStarts[FirstSlot];
1332 auto &SecondS = LiveStarts[SecondSlot];
1333 assert(!First->empty() && !Second->empty() && "Found an empty range");
1334
1335 // Merge disjoint slots. This is a little bit tricky - see the
1336 // Implementation Notes section for an explanation.
1337 if (!First->isLiveAtIndexes(SecondS) &&
1338 !Second->isLiveAtIndexes(FirstS)) {
1339 Changed = true;
1340 First->MergeSegmentsInAsValue(*Second, First->getValNumInfo(0));
1341
1342 int OldSize = FirstS.size();
1343 FirstS.append(SecondS.begin(), SecondS.end());
1344 auto Mid = FirstS.begin() + OldSize;
1345 std::inplace_merge(FirstS.begin(), Mid, FirstS.end());
1346
1347 SlotRemap[SecondSlot] = FirstSlot;
1348 SortedSlots[J] = -1;
1349 LLVM_DEBUG(dbgs() << "Merging #" << FirstSlot << " and slots #"
1350 << SecondSlot << " together.\n");
1351 Align MaxAlignment = std::max(MFI->getObjectAlign(FirstSlot),
1352 MFI->getObjectAlign(SecondSlot));
1353
1354 assert(MFI->getObjectSize(FirstSlot) >=
1355 MFI->getObjectSize(SecondSlot) &&
1356 "Merging a small object into a larger one");
1357
1358 RemovedSlots+=1;
1359 ReducedSize += MFI->getObjectSize(SecondSlot);
1360 MFI->setObjectAlignment(FirstSlot, MaxAlignment);
1361 MFI->RemoveStackObject(SecondSlot);
1362 }
1363 }
1364 }
1365 }// While changed.
1366
1367 // Record statistics.
1368 StackSpaceSaved += ReducedSize;
1369 StackSlotMerged += RemovedSlots;
1370 LLVM_DEBUG(dbgs() << "Merge " << RemovedSlots << " slots. Saved "
1371 << ReducedSize << " bytes\n");
1372
1373 // Scan the entire function and update all machine operands that use frame
1374 // indices to use the remapped frame index.
1375 if (!SlotRemap.empty()) {
1376 expungeSlotMap(SlotRemap, NumSlots);
1377 remapInstructions(SlotRemap);
1378 }
1379
1380 return removeAllMarkers();
1381}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
This file implements the BitVector class.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:661
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
#define DEBUG_TYPE
IRTranslator LLVM IR MI
This defines the Use class.
#define I(x, y, z)
Definition MD5.cpp:57
#define H(x, y, z)
Definition MD5.cpp:56
std::pair< uint64_t, uint64_t > Interval
This file contains the declarations for metadata subclasses.
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
R600 Emit Clause Markers
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
static int getStartOrEndSlot(const MachineInstr &MI)
static cl::opt< bool > DisableColoring("no-stack-coloring", cl::init(false), cl::Hidden, cl::desc("Disable stack coloring"))
static cl::opt< bool > ProtectFromEscapedAllocas("protect-from-escaped-allocas", cl::init(false), cl::Hidden, cl::desc("Do not optimize lifetime zones that " "are broken"))
The user may write code that uses allocas outside of the declared lifetime zone.
static cl::opt< bool > LifetimeStartOnFirstUse("stackcoloring-lifetime-start-on-first-use", cl::init(true), cl::Hidden, cl::desc("Treat stack lifetimes as starting on first use, not on START marker."))
Enable enhanced dataflow scheme for lifetime analysis (treat first use of stack slot as start of slot...
Merge disjoint stack slots
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
Value * RHS
Value * LHS
PointerType * getType() const
Overload to return most specific pointer type.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
bool test(unsigned Idx) const
Returns true if bit Idx is set.
Definition BitVector.h:482
BitVector & reset()
Reset all bits in the bitvector.
Definition BitVector.h:409
void resize(unsigned N, bool t=false)
Grow or shrink the bitvector.
Definition BitVector.h:355
void clear()
Removes all bits from the bitvector.
Definition BitVector.h:349
BitVector & set()
Set all bits in the bitvector.
Definition BitVector.h:366
size_type size() const
Returns the number of bits in this bitvector.
Definition BitVector.h:178
bool subsetOf(const BitVector &RHS) const
Check if This is a subset of RHS.
Definition BitVector.h:573
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
bool empty() const
Definition DenseMap.h:109
iterator end()
Definition DenseMap.h:81
LLVM_ABI bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
LLVM_ABI void insertAfter(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately after the specified instruction.
LLVM_ABI bool isLiveAtIndexes(ArrayRef< SlotIndex > Slots) const
bool empty() const
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
iterator_range< pred_iterator > predecessors()
LLVM_ABI StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
SSPLayoutKind getObjectSSPLayout(int ObjectIdx) const
const AllocaInst * getObjectAllocation(int ObjectIdx) const
Return the underlying Alloca of the specified stack object if it exists.
SSPLayoutKind
Stack Smashing Protection (SSP) rules require that vulnerable stack allocations are located close the...
@ SSPLK_LargeArray
Array or nested array >= SSP-buffer-size.
@ SSPLK_AddrOf
The address of this allocation is exposed and triggered protection.
@ SSPLK_None
Did not trigger a stack protector.
void setObjectSSPLayout(int ObjectIdx, SSPLayoutKind Kind)
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
void RemoveStackObject(int ObjectIdx)
Remove or mark dead a statically sized stack object.
int getObjectIndexEnd() const
Return one past the maximum frame object index.
uint8_t getStackID(int ObjectIdx) const
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const WinEHFuncInfo * getWinEHFuncInfo() const
getWinEHFuncInfo - Return information about how the current function uses Windows exception handling.
bool exposesReturnsTwice() const
exposesReturnsTwice - Returns true if the function calls setjmp or any other similar functions with a...
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Function & getFunction()
Return the LLVM function that this machine code represents.
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
LLVM_ABI void print(raw_ostream &os) const
Print this index to the given raw_ostream.
SlotIndex getMBBEndIdx(unsigned Num) const
Returns the index past the last valid index in the given basic block.
SlotIndex getInstructionIndex(const MachineInstr &MI, bool IgnoreBundle=false) const
Returns the base index for the given instruction.
SlotIndex getMBBStartIdx(unsigned Num) const
Returns the first index in the given basic block number.
SlotIndex getZeroIndex()
Returns the zero index for this analysis.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
void reserve(size_type N)
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
BumpPtrAllocator Allocator
static LLVM_ABI void handleRAUW(Value *From, Value *To)
Definition Metadata.cpp:552
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:549
bool isUsedByMetadata() const
Return true if there is metadata referencing this value.
Definition Value.h:568
iterator_range< use_iterator > uses()
Definition Value.h:380
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:318
self_iterator getIterator()
Definition ilist_node.h:123
Changed
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
initializer< Ty > init(const Ty &Val)
DXILDebugInfoMap run(Module &M)
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
bool empty() const
Definition BasicBlock.h:101
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
void stable_sort(R &&Range)
Definition STLExtras.h:2115
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2553
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI bool getUnderlyingObjectsForCodeGen(const Value *V, SmallVectorImpl< Value * > &Objects)
This is a wrapper around getUnderlyingObjects and adds support for basic ptrtoint+arithmetic+inttoptr...
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1635
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
@ Ref
The access may reference the value stored in memory.
Definition ModRef.h:32
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74
LLVM_ABI char & StackColoringLegacyID
StackSlotColoring - This pass performs stack coloring and merging.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
iterator_range< df_iterator< T > > depth_first(const T &G)
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
SmallVector< WinEHHandlerType, 1 > HandlerArray