LLVM 23.0.0git
StackColoring.cpp
Go to the documentation of this file.
1//===- StackColoring.cpp --------------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass implements the stack-coloring optimization that looks for
10// lifetime markers machine instructions (LIFETIME_START and LIFETIME_END),
11// which represent the possible lifetime of stack slots. It attempts to
12// merge disjoint stack slots and reduce the used stack space.
13// NOTE: This pass is not StackSlotColoring, which optimizes spill slots.
14//
15// TODO: In the future we plan to improve stack coloring in the following ways:
16// 1. Allow merging multiple small slots into a single larger slot at different
17// offsets.
18// 2. Merge this pass with StackSlotColoring and allow merging of allocas with
19// spill slots.
20//
21//===----------------------------------------------------------------------===//
22
24#include "llvm/ADT/BitVector.h"
25#include "llvm/ADT/DenseMap.h"
29#include "llvm/ADT/Statistic.h"
39#include "llvm/CodeGen/Passes.h"
44#include "llvm/Config/llvm-config.h"
45#include "llvm/IR/Constants.h"
48#include "llvm/IR/Metadata.h"
49#include "llvm/IR/Use.h"
50#include "llvm/IR/Value.h"
52#include "llvm/Pass.h"
56#include "llvm/Support/Debug.h"
58#include <algorithm>
59#include <cassert>
60#include <limits>
61#include <memory>
62#include <utility>
63
64using namespace llvm;
65
66#define DEBUG_TYPE "stack-coloring"
67
68static cl::opt<bool>
69DisableColoring("no-stack-coloring",
70 cl::init(false), cl::Hidden,
71 cl::desc("Disable stack coloring"));
72
73/// The user may write code that uses allocas outside of the declared lifetime
74/// zone. This can happen when the user returns a reference to a local
75/// data-structure. We can detect these cases and decide not to optimize the
76/// code. If this flag is enabled, we try to save the user. This option
77/// is treated as overriding LifetimeStartOnFirstUse below.
78static cl::opt<bool>
79ProtectFromEscapedAllocas("protect-from-escaped-allocas",
80 cl::init(false), cl::Hidden,
81 cl::desc("Do not optimize lifetime zones that "
82 "are broken"));
83
84/// Enable enhanced dataflow scheme for lifetime analysis (treat first
85/// use of stack slot as start of slot lifetime, as opposed to looking
86/// for LIFETIME_START marker). See "Implementation notes" below for
87/// more info.
88static cl::opt<bool>
89LifetimeStartOnFirstUse("stackcoloring-lifetime-start-on-first-use",
90 cl::init(true), cl::Hidden,
91 cl::desc("Treat stack lifetimes as starting on first use, not on START marker."));
92
93
94STATISTIC(NumMarkerSeen, "Number of lifetime markers found.");
95STATISTIC(StackSpaceSaved, "Number of bytes saved due to merging slots.");
96STATISTIC(StackSlotMerged, "Number of stack slot merged.");
97STATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region");
98
99//===----------------------------------------------------------------------===//
100// StackColoring Pass
101//===----------------------------------------------------------------------===//
102//
103// Stack Coloring reduces stack usage by merging stack slots when they
104// can't be used together. For example, consider the following C program:
105//
106// void bar(char *, int);
107// void foo(bool var) {
108// A: {
109// char z[4096];
110// bar(z, 0);
111// }
112//
113// char *p;
114// char x[4096];
115// char y[4096];
116// if (var) {
117// p = x;
118// } else {
119// bar(y, 1);
120// p = y + 1024;
121// }
122// B:
123// bar(p, 2);
124// }
125//
126// Naively-compiled, this program would use 12k of stack space. However, the
127// stack slot corresponding to `z` is always destroyed before either of the
128// stack slots for `x` or `y` are used, and then `x` is only used if `var`
129// is true, while `y` is only used if `var` is false. So in no time are 2
130// of the stack slots used together, and therefore we can merge them,
131// compiling the function using only a single 4k alloca:
132//
133// void foo(bool var) { // equivalent
134// char x[4096];
135// char *p;
136// bar(x, 0);
137// if (var) {
138// p = x;
139// } else {
140// bar(x, 1);
141// p = x + 1024;
142// }
143// bar(p, 2);
144// }
145//
146// This is an important optimization if we want stack space to be under
147// control in large functions, both open-coded ones and ones created by
148// inlining.
149//
150// Implementation Notes:
151// ---------------------
152//
153// An important part of the above reasoning is that `z` can't be accessed
154// while the latter 2 calls to `bar` are running. This is justified because
155// `z`'s lifetime is over after we exit from block `A:`, so any further
156// accesses to it would be UB. The way we represent this information
157// in LLVM is by having frontends delimit blocks with `lifetime.start`
158// and `lifetime.end` intrinsics.
159//
160// The effect of these intrinsics seems to be as follows (maybe I should
161// specify this in the reference?):
162//
163// L1) at start, each stack-slot is marked as *out-of-scope*, unless no
164// lifetime intrinsic refers to that stack slot, in which case
165// it is marked as *in-scope*.
166// L2) on a `lifetime.start`, a stack slot is marked as *in-scope* and
167// the stack slot is overwritten with `undef`.
168// L3) on a `lifetime.end`, a stack slot is marked as *out-of-scope*.
169// L4) on function exit, all stack slots are marked as *out-of-scope*.
170// L5) `lifetime.end` is a no-op when called on a slot that is already
171// *out-of-scope*.
172// L6) memory accesses to *out-of-scope* stack slots are UB.
173// L7) when a stack-slot is marked as *out-of-scope*, all pointers to it
174// are invalidated, unless the slot is "degenerate". This is used to
175// justify not marking slots as in-use until the pointer to them is
176// used, but feels a bit hacky in the presence of things like LICM. See
177// the "Degenerate Slots" section for more details.
178//
179// Now, let's ground stack coloring on these rules. We'll define a slot
180// as *in-use* at a (dynamic) point in execution if it either can be
181// written to at that point, or if it has a live and non-undef content
182// at that point.
183//
184// Obviously, slots that are never *in-use* together can be merged, and
185// in our example `foo`, the slots for `x`, `y` and `z` are never
186// in-use together (of course, sometimes slots that *are* in-use together
187// might still be mergable, but we don't care about that here).
188//
189// In this implementation, we successively merge pairs of slots that are
190// not *in-use* together. We could be smarter - for example, we could merge
191// a single large slot with 2 small slots, or we could construct the
192// interference graph and run a "smart" graph coloring algorithm, but with
193// that aside, how do we find out whether a pair of slots might be *in-use*
194// together?
195//
196// From our rules, we see that *out-of-scope* slots are never *in-use*,
197// and from (L7) we see that "non-degenerate" slots remain non-*in-use*
198// until their address is taken. Therefore, we can approximate slot activity
199// using dataflow.
200//
201// A subtle point: naively, we might try to figure out which pairs of
202// stack-slots interfere by propagating `S in-use` through the CFG for every
203// stack-slot `S`, and having `S` and `T` interfere if there is a CFG point in
204// which they are both *in-use*.
205//
206// That is sound, but overly conservative in some cases: in our (artificial)
207// example `foo`, either `x` or `y` might be in use at the label `B:`, but
208// as `x` is only in use if we came in from the `var` edge and `y` only
209// if we came from the `!var` edge, they still can't be in use together.
210// See PR32488 for an important real-life case.
211//
212// If we wanted to find all points of interference precisely, we could
213// propagate `S in-use` and `S&T in-use` predicates through the CFG. That
214// would be precise, but requires propagating `O(n^2)` dataflow facts.
215//
216// However, we aren't interested in the *set* of points of interference
217// between 2 stack slots, only *whether* there *is* such a point. So we
218// can rely on a little trick: for `S` and `T` to be in-use together,
219// one of them needs to become in-use while the other is in-use (or
220// they might both become in use simultaneously). We can check this
221// by also keeping track of the points at which a stack slot might *start*
222// being in-use.
223//
224// Exact first use:
225// ----------------
226//
227// Consider the following motivating example:
228//
229// int foo() {
230// char b1[1024], b2[1024];
231// if (...) {
232// char b3[1024];
233// <uses of b1, b3>;
234// return x;
235// } else {
236// char b4[1024], b5[1024];
237// <uses of b2, b4, b5>;
238// return y;
239// }
240// }
241//
242// In the code above, "b3" and "b4" are declared in distinct lexical
243// scopes, meaning that it is easy to prove that they can share the
244// same stack slot. Variables "b1" and "b2" are declared in the same
245// scope, meaning that from a lexical point of view, their lifetimes
246// overlap. From a control flow pointer of view, however, the two
247// variables are accessed in disjoint regions of the CFG, thus it
248// should be possible for them to share the same stack slot. An ideal
249// stack allocation for the function above would look like:
250//
251// slot 0: b1, b2
252// slot 1: b3, b4
253// slot 2: b5
254//
255// Achieving this allocation is tricky, however, due to the way
256// lifetime markers are inserted. Here is a simplified view of the
257// control flow graph for the code above:
258//
259// +------ block 0 -------+
260// 0| LIFETIME_START b1, b2 |
261// 1| <test 'if' condition> |
262// +-----------------------+
263// ./ \.
264// +------ block 1 -------+ +------ block 2 -------+
265// 2| LIFETIME_START b3 | 5| LIFETIME_START b4, b5 |
266// 3| <uses of b1, b3> | 6| <uses of b2, b4, b5> |
267// 4| LIFETIME_END b3 | 7| LIFETIME_END b4, b5 |
268// +-----------------------+ +-----------------------+
269// \. /.
270// +------ block 3 -------+
271// 8| <cleanupcode> |
272// 9| LIFETIME_END b1, b2 |
273// 10| return |
274// +-----------------------+
275//
276// If we create live intervals for the variables above strictly based
277// on the lifetime markers, we'll get the set of intervals on the
278// left. If we ignore the lifetime start markers and instead treat a
279// variable's lifetime as beginning with the first reference to the
280// var, then we get the intervals on the right.
281//
282// LIFETIME_START First Use
283// b1: [0,9] [3,4] [8,9]
284// b2: [0,9] [6,9]
285// b3: [2,4] [3,4]
286// b4: [5,7] [6,7]
287// b5: [5,7] [6,7]
288//
289// For the intervals on the left, the best we can do is overlap two
290// variables (b3 and b4, for example); this gives us a stack size of
291// 4*1024 bytes, not ideal. When treating first-use as the start of a
292// lifetime, we can additionally overlap b1 and b5, giving us a 3*1024
293// byte stack (better).
294//
295// Degenerate Slots:
296// -----------------
297//
298// Relying entirely on first-use of stack slots is problematic,
299// however, due to the fact that optimizations can sometimes migrate
300// uses of a variable outside of its lifetime start/end region. Here
301// is an example:
302//
303// int bar() {
304// char b1[1024], b2[1024];
305// if (...) {
306// <uses of b2>
307// return y;
308// } else {
309// <uses of b1>
310// while (...) {
311// char b3[1024];
312// <uses of b3>
313// }
314// }
315// }
316//
317// Before optimization, the control flow graph for the code above
318// might look like the following:
319//
320// +------ block 0 -------+
321// 0| LIFETIME_START b1, b2 |
322// 1| <test 'if' condition> |
323// +-----------------------+
324// ./ \.
325// +------ block 1 -------+ +------- block 2 -------+
326// 2| <uses of b2> | 3| <uses of b1> |
327// +-----------------------+ +-----------------------+
328// | |
329// | +------- block 3 -------+ <-\.
330// | 4| <while condition> | |
331// | +-----------------------+ |
332// | / | |
333// | / +------- block 4 -------+
334// \ / 5| LIFETIME_START b3 | |
335// \ / 6| <uses of b3> | |
336// \ / 7| LIFETIME_END b3 | |
337// \ | +------------------------+ |
338// \ | \ /
339// +------ block 5 -----+ \---------------
340// 8| <cleanupcode> |
341// 9| LIFETIME_END b1, b2 |
342// 10| return |
343// +---------------------+
344//
345// During optimization, however, it can happen that an instruction
346// computing an address in "b3" (for example, a loop-invariant GEP) is
347// hoisted up out of the loop from block 4 to block 2. [Note that
348// this is not an actual load from the stack, only an instruction that
349// computes the address to be loaded]. If this happens, there is now a
350// path leading from the first use of b3 to the return instruction
351// that does not encounter the b3 LIFETIME_END, hence b3's lifetime is
352// now larger than if we were computing live intervals strictly based
353// on lifetime markers. In the example above, this lengthened lifetime
354// would mean that it would appear illegal to overlap b3 with b2.
355//
356// To deal with this such cases, the code in ::collectMarkers() below
357// tries to identify "degenerate" slots -- those slots where on a single
358// forward pass through the CFG we encounter a first reference to slot
359// K before we hit the slot K lifetime start marker. For such slots,
360// we fall back on using the lifetime start marker as the beginning of
361// the variable's lifetime. NB: with this implementation, slots can
362// appear degenerate in cases where there is unstructured control flow:
363//
364// if (q) goto mid;
365// if (x > 9) {
366// int b[100];
367// memcpy(&b[0], ...);
368// mid: b[k] = ...;
369// abc(&b);
370// }
371//
372// If in RPO ordering chosen to walk the CFG we happen to visit the b[k]
373// before visiting the memcpy block (which will contain the lifetime start
374// for "b" then it will appear that 'b' has a degenerate lifetime.
375
376namespace {
377
378/// StackColoring - A machine pass for merging disjoint stack allocations,
379/// marked by the LIFETIME_START and LIFETIME_END pseudo instructions.
380class StackColoring {
381 MachineFrameInfo *MFI = nullptr;
382 MachineFunction *MF = nullptr;
383
384 /// A class representing liveness information for a single basic block.
385 /// Each bit in the BitVector represents the liveness property
386 /// for a different stack slot.
387 struct BlockLifetimeInfo {
388 /// Which slots BEGINs in each basic block.
389 BitVector Begin;
390
391 /// Which slots ENDs in each basic block.
392 BitVector End;
393
394 /// Which slots are marked as LIVE_IN, coming into each basic block.
395 BitVector LiveIn;
396
397 /// Which slots are marked as LIVE_OUT, coming out of each basic block.
398 BitVector LiveOut;
399 };
400
401 /// Maps active slots (per bit) for each basic block.
402 using LivenessMap = DenseMap<const MachineBasicBlock *, BlockLifetimeInfo>;
403 LivenessMap BlockLiveness;
404
405 /// Depth-first ordering of the basic blocks.
407
408 /// Maps slots to their use interval. Outside of this interval, slots
409 /// values are either dead or `undef` and they will not be written to.
411
412 /// Maps slots to the points where they can become in-use.
414
415 /// VNInfo is used for the construction of LiveIntervals.
416 VNInfo::Allocator VNInfoAllocator;
417
418 /// SlotIndex analysis object.
419 SlotIndexes *Indexes = nullptr;
420
421 /// The list of lifetime markers found. These markers are to be removed
422 /// once the coloring is done.
423 SmallVector<MachineInstr*, 8> Markers;
424
425 /// Record the FI slots for which we have seen some sort of
426 /// lifetime marker (either start or end).
427 BitVector InterestingSlots;
428
429 /// FI slots that need to be handled conservatively (for these
430 /// slots lifetime-start-on-first-use is disabled).
431 BitVector ConservativeSlots;
432
433 /// Number of iterations taken during data flow analysis.
434 unsigned NumIterations;
435
436public:
437 StackColoring(SlotIndexes *Indexes) : Indexes(Indexes) {}
438 bool run(MachineFunction &Func, bool OnlyRemoveMarkers = false);
439
440private:
441 /// Used in collectMarkers
442 using BlockBitVecMap = DenseMap<const MachineBasicBlock *, BitVector>;
443
444 /// Debug.
445 void dump() const;
446 void dumpIntervals() const;
447 void dumpBB(MachineBasicBlock *MBB) const;
448 void dumpBV(const char *tag, const BitVector &BV) const;
449
450 /// Removes all of the lifetime marker instructions from the function.
451 /// \returns true if any markers were removed.
452 bool removeAllMarkers();
453
454 /// Scan the machine function and find all of the lifetime markers.
455 /// Record the findings in the BEGIN and END vectors.
456 /// \returns the number of markers found.
457 unsigned collectMarkers(unsigned NumSlot);
458
459 /// Perform the dataflow calculation and calculate the lifetime for each of
460 /// the slots, based on the BEGIN/END vectors. Set the LifetimeLIVE_IN and
461 /// LifetimeLIVE_OUT maps that represent which stack slots are live coming
462 /// in and out blocks.
463 void calculateLocalLiveness();
464
465 /// Returns TRUE if we're using the first-use-begins-lifetime method for
466 /// this slot (if FALSE, then the start marker is treated as start of lifetime).
467 bool applyFirstUse(int Slot) {
469 return false;
470 if (ConservativeSlots.test(Slot))
471 return false;
472 return true;
473 }
474
475 /// Examines the specified instruction and returns TRUE if the instruction
476 /// represents the start or end of an interesting lifetime. The slot or slots
477 /// starting or ending are added to the vector "slots" and "isStart" is set
478 /// accordingly.
479 /// \returns True if inst contains a lifetime start or end
480 bool isLifetimeStartOrEnd(const MachineInstr &MI,
481 SmallVector<int, 4> &slots,
482 bool &isStart);
483
484 /// Construct the LiveIntervals for the slots.
485 void calculateLiveIntervals(unsigned NumSlots);
486
487 /// Go over the machine function and change instructions which use stack
488 /// slots to use the joint slots.
489 void remapInstructions(DenseMap<int, int> &SlotRemap);
490
491 /// The input program may contain instructions which are not inside lifetime
492 /// markers. This can happen due to a bug in the compiler or due to a bug in
493 /// user code (for example, returning a reference to a local variable).
494 /// This procedure checks all of the instructions in the function and
495 /// invalidates lifetime ranges which do not contain all of the instructions
496 /// which access that frame slot.
497 void removeInvalidSlotRanges();
498
499 /// Map entries which point to other entries to their destination.
500 /// A->B->C becomes A->C.
501 void expungeSlotMap(DenseMap<int, int> &SlotRemap, unsigned NumSlots);
502};
503
504class StackColoringLegacy : public MachineFunctionPass {
505public:
506 static char ID;
507
508 StackColoringLegacy() : MachineFunctionPass(ID) {}
509
510 void getAnalysisUsage(AnalysisUsage &AU) const override;
511 bool runOnMachineFunction(MachineFunction &Func) override;
512};
513
514} // end anonymous namespace
515
516char StackColoringLegacy::ID = 0;
517
518char &llvm::StackColoringLegacyID = StackColoringLegacy::ID;
519
521 "Merge disjoint stack slots", false, false)
523INITIALIZE_PASS_END(StackColoringLegacy, DEBUG_TYPE,
524 "Merge disjoint stack slots", false, false)
525
526void StackColoringLegacy::getAnalysisUsage(AnalysisUsage &AU) const {
527 AU.addRequired<SlotIndexesWrapperPass>();
529}
530
531#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
532LLVM_DUMP_METHOD void StackColoring::dumpBV(const char *tag,
533 const BitVector &BV) const {
534 dbgs() << tag << " : { ";
535 for (unsigned I = 0, E = BV.size(); I != E; ++I)
536 dbgs() << BV.test(I) << " ";
537 dbgs() << "}\n";
538}
539
540LLVM_DUMP_METHOD void StackColoring::dumpBB(MachineBasicBlock *MBB) const {
541 LivenessMap::const_iterator BI = BlockLiveness.find(MBB);
542 assert(BI != BlockLiveness.end() && "Block not found");
543 const BlockLifetimeInfo &BlockInfo = BI->second;
544
545 dumpBV("BEGIN", BlockInfo.Begin);
546 dumpBV("END", BlockInfo.End);
547 dumpBV("LIVE_IN", BlockInfo.LiveIn);
548 dumpBV("LIVE_OUT", BlockInfo.LiveOut);
549}
550
551LLVM_DUMP_METHOD void StackColoring::dump() const {
552 for (MachineBasicBlock *MBB : depth_first(MF)) {
553 dbgs() << "Inspecting block #" << MBB->getNumber() << " ["
554 << MBB->getName() << "]\n";
555 dumpBB(MBB);
556 }
557}
558
559LLVM_DUMP_METHOD void StackColoring::dumpIntervals() const {
560 for (unsigned I = 0, E = Intervals.size(); I != E; ++I) {
561 dbgs() << "Interval[" << I << "]:\n";
562 Intervals[I]->dump();
563 }
564}
565#endif
566
567static inline int getStartOrEndSlot(const MachineInstr &MI)
568{
569 assert((MI.getOpcode() == TargetOpcode::LIFETIME_START ||
570 MI.getOpcode() == TargetOpcode::LIFETIME_END) &&
571 "Expected LIFETIME_START or LIFETIME_END op");
572 const MachineOperand &MO = MI.getOperand(0);
573 int Slot = MO.getIndex();
574 if (Slot >= 0)
575 return Slot;
576 return -1;
577}
578
579// At the moment the only way to end a variable lifetime is with
580// a VARIABLE_LIFETIME op (which can't contain a start). If things
581// change and the IR allows for a single inst that both begins
582// and ends lifetime(s), this interface will need to be reworked.
583bool StackColoring::isLifetimeStartOrEnd(const MachineInstr &MI,
584 SmallVector<int, 4> &slots,
585 bool &isStart) {
586 if (MI.getOpcode() == TargetOpcode::LIFETIME_START ||
587 MI.getOpcode() == TargetOpcode::LIFETIME_END) {
589 if (Slot < 0)
590 return false;
591 if (!InterestingSlots.test(Slot))
592 return false;
593 slots.push_back(Slot);
594 if (MI.getOpcode() == TargetOpcode::LIFETIME_END) {
595 isStart = false;
596 return true;
597 }
598 if (!applyFirstUse(Slot)) {
599 isStart = true;
600 return true;
601 }
603 if (!MI.isDebugInstr()) {
604 bool found = false;
605 for (const MachineOperand &MO : MI.operands()) {
606 if (!MO.isFI())
607 continue;
608 int Slot = MO.getIndex();
609 if (Slot<0)
610 continue;
611 if (InterestingSlots.test(Slot) && applyFirstUse(Slot)) {
612 slots.push_back(Slot);
613 found = true;
614 }
615 }
616 if (found) {
617 isStart = true;
618 return true;
619 }
620 }
621 }
622 return false;
623}
624
625unsigned StackColoring::collectMarkers(unsigned NumSlot) {
626 unsigned MarkersFound = 0;
627 BlockBitVecMap SeenStartMap;
628 InterestingSlots.clear();
629 InterestingSlots.resize(NumSlot);
630 ConservativeSlots.clear();
631 ConservativeSlots.resize(NumSlot);
632
633 // number of start and end lifetime ops for each slot
634 SmallVector<int, 8> NumStartLifetimes(NumSlot, 0);
635 SmallVector<int, 8> NumEndLifetimes(NumSlot, 0);
636
637 // Step 1: collect markers and populate the "InterestingSlots"
638 // and "ConservativeSlots" sets.
639 for (MachineBasicBlock *MBB : depth_first(MF)) {
640 BasicBlockOrdering.push_back(MBB);
641
642 // Compute the set of slots for which we've seen a START marker but have
643 // not yet seen an END marker at this point in the walk (e.g. on entry
644 // to this bb).
645 BitVector BetweenStartEnd;
646 BetweenStartEnd.resize(NumSlot);
647 for (const MachineBasicBlock *Pred : MBB->predecessors()) {
648 BlockBitVecMap::const_iterator I = SeenStartMap.find(Pred);
649 if (I != SeenStartMap.end()) {
650 BetweenStartEnd |= I->second;
651 }
652 }
653
654 // Walk the instructions in the block to look for start/end ops.
655 for (MachineInstr &MI : *MBB) {
656 if (MI.isDebugInstr())
657 continue;
658 if (MI.getOpcode() == TargetOpcode::LIFETIME_START ||
659 MI.getOpcode() == TargetOpcode::LIFETIME_END) {
661 if (Slot < 0)
662 continue;
663 InterestingSlots.set(Slot);
664 if (MI.getOpcode() == TargetOpcode::LIFETIME_START) {
665 BetweenStartEnd.set(Slot);
666 NumStartLifetimes[Slot] += 1;
667 } else {
668 BetweenStartEnd.reset(Slot);
669 NumEndLifetimes[Slot] += 1;
670 }
671 const AllocaInst *Allocation = MFI->getObjectAllocation(Slot);
672 if (Allocation) {
673 LLVM_DEBUG(dbgs() << "Found a lifetime ");
674 LLVM_DEBUG(dbgs() << (MI.getOpcode() == TargetOpcode::LIFETIME_START
675 ? "start"
676 : "end"));
677 LLVM_DEBUG(dbgs() << " marker for slot #" << Slot);
679 << " with allocation: " << Allocation->getName() << "\n");
680 }
681 Markers.push_back(&MI);
682 MarkersFound += 1;
683 } else {
684 for (const MachineOperand &MO : MI.operands()) {
685 if (!MO.isFI())
686 continue;
687 int Slot = MO.getIndex();
688 if (Slot < 0)
689 continue;
690 if (! BetweenStartEnd.test(Slot)) {
691 ConservativeSlots.set(Slot);
692 }
693 }
694 }
695 }
696 BitVector &SeenStart = SeenStartMap[MBB];
697 SeenStart |= BetweenStartEnd;
698 }
699 if (!MarkersFound) {
700 return 0;
701 }
702
703 // PR27903: slots with multiple start or end lifetime ops are not
704 // safe to enable for "lifetime-start-on-first-use".
705 for (unsigned slot = 0; slot < NumSlot; ++slot) {
706 if (NumStartLifetimes[slot] > 1 || NumEndLifetimes[slot] > 1)
707 ConservativeSlots.set(slot);
708 }
709
710 // The write to the catch object by the personality function is not propely
711 // modeled in IR: It happens before any cleanuppads are executed, even if the
712 // first mention of the catch object is in a catchpad. As such, mark catch
713 // object slots as conservative, so they are excluded from first-use analysis.
714 if (WinEHFuncInfo *EHInfo = MF->getWinEHFuncInfo())
715 for (WinEHTryBlockMapEntry &TBME : EHInfo->TryBlockMap)
716 for (WinEHHandlerType &H : TBME.HandlerArray)
717 if (H.CatchObj.FrameIndex != std::numeric_limits<int>::max() &&
718 H.CatchObj.FrameIndex >= 0)
719 ConservativeSlots.set(H.CatchObj.FrameIndex);
720
721 // Treat all stack slots as conservative if we happen to have calls to
722 // setjmp/sigsetjmp, as longjmp may re-enter the function on a different path.
723 if (MF->exposesReturnsTwice())
724 ConservativeSlots.set();
725
726 LLVM_DEBUG(dumpBV("Conservative slots", ConservativeSlots));
727
728 // Step 2: compute begin/end sets for each block
729 for (const MachineBasicBlock *MBB : BasicBlockOrdering) {
730 // Keep a reference to avoid repeated lookups.
731 BlockLifetimeInfo &BlockInfo = BlockLiveness[MBB];
732
733 BlockInfo.Begin.resize(NumSlot);
734 BlockInfo.End.resize(NumSlot);
735
736 SmallVector<int, 4> slots;
737 for (const MachineInstr &MI : *MBB) {
738 bool isStart = false;
739 slots.clear();
740 if (isLifetimeStartOrEnd(MI, slots, isStart)) {
741 if (!isStart) {
742 assert(slots.size() == 1 && "unexpected: MI ends multiple slots");
743 int Slot = slots[0];
744 if (BlockInfo.Begin.test(Slot)) {
745 BlockInfo.Begin.reset(Slot);
746 }
747 BlockInfo.End.set(Slot);
748 } else {
749 for (auto Slot : slots) {
750 LLVM_DEBUG(dbgs() << "Found a use of slot #" << Slot);
752 << " at " << printMBBReference(*MBB) << " index ");
754 const AllocaInst *Allocation = MFI->getObjectAllocation(Slot);
755 if (Allocation) {
757 << " with allocation: " << Allocation->getName());
758 }
759 LLVM_DEBUG(dbgs() << "\n");
760 if (BlockInfo.End.test(Slot)) {
761 BlockInfo.End.reset(Slot);
762 }
763 BlockInfo.Begin.set(Slot);
764 }
765 }
766 }
767 }
768 }
769
770 // Update statistics.
771 NumMarkerSeen += MarkersFound;
772 return MarkersFound;
773}
774
775void StackColoring::calculateLocalLiveness() {
776 unsigned NumIters = 0;
777 bool changed = true;
778 // Create BitVector outside the loop and reuse them to avoid repeated heap
779 // allocations.
780 BitVector LocalLiveIn;
781 BitVector LocalLiveOut;
782 while (changed) {
783 changed = false;
784 ++NumIters;
785
786 for (const MachineBasicBlock *BB : BasicBlockOrdering) {
787 // Use an iterator to avoid repeated lookups.
788 LivenessMap::iterator BI = BlockLiveness.find(BB);
789 assert(BI != BlockLiveness.end() && "Block not found");
790 BlockLifetimeInfo &BlockInfo = BI->second;
791
792 // Compute LiveIn by unioning together the LiveOut sets of all preds.
793 LocalLiveIn.clear();
794 for (MachineBasicBlock *Pred : BB->predecessors()) {
795 LivenessMap::const_iterator I = BlockLiveness.find(Pred);
796 // PR37130: transformations prior to stack coloring can
797 // sometimes leave behind statically unreachable blocks; these
798 // can be safely skipped here.
799 if (I != BlockLiveness.end())
800 LocalLiveIn |= I->second.LiveOut;
801 }
802
803 // Compute LiveOut by subtracting out lifetimes that end in this
804 // block, then adding in lifetimes that begin in this block. If
805 // we have both BEGIN and END markers in the same basic block
806 // then we know that the BEGIN marker comes after the END,
807 // because we already handle the case where the BEGIN comes
808 // before the END when collecting the markers (and building the
809 // BEGIN/END vectors).
810 LocalLiveOut = LocalLiveIn;
811 LocalLiveOut.reset(BlockInfo.End);
812 LocalLiveOut |= BlockInfo.Begin;
813
814 // Update block LiveIn set, noting whether it has changed.
815 if (!LocalLiveIn.subsetOf(BlockInfo.LiveIn)) {
816 changed = true;
817 BlockInfo.LiveIn |= LocalLiveIn;
818 }
819
820 // Update block LiveOut set, noting whether it has changed.
821 if (!LocalLiveOut.subsetOf(BlockInfo.LiveOut)) {
822 changed = true;
823 BlockInfo.LiveOut |= LocalLiveOut;
824 }
825 }
826 } // while changed.
827
828 NumIterations = NumIters;
829}
830
831void StackColoring::calculateLiveIntervals(unsigned NumSlots) {
833 SmallVector<bool, 16> DefinitelyInUse;
834
835 // For each block, find which slots are active within this block
836 // and update the live intervals.
837 for (const MachineBasicBlock &MBB : *MF) {
838 Starts.clear();
839 Starts.resize(NumSlots);
840 DefinitelyInUse.clear();
841 DefinitelyInUse.resize(NumSlots);
842
843 // Start the interval of the slots that we previously found to be 'in-use'.
844 BlockLifetimeInfo &MBBLiveness = BlockLiveness[&MBB];
845 for (int pos = MBBLiveness.LiveIn.find_first(); pos != -1;
846 pos = MBBLiveness.LiveIn.find_next(pos)) {
847 Starts[pos] = Indexes->getMBBStartIdx(&MBB);
848 }
849
850 // Create the interval for the basic blocks containing lifetime begin/end.
851 for (const MachineInstr &MI : MBB) {
852 SmallVector<int, 4> slots;
853 bool IsStart = false;
854 if (!isLifetimeStartOrEnd(MI, slots, IsStart))
855 continue;
856 SlotIndex ThisIndex = Indexes->getInstructionIndex(MI);
857 for (auto Slot : slots) {
858 if (IsStart) {
859 // If a slot is already definitely in use, we don't have to emit
860 // a new start marker because there is already a pre-existing
861 // one.
862 if (!DefinitelyInUse[Slot]) {
863 LiveStarts[Slot].push_back(ThisIndex);
864 DefinitelyInUse[Slot] = true;
865 }
866 if (!Starts[Slot].isValid())
867 Starts[Slot] = ThisIndex;
868 } else {
869 if (Starts[Slot].isValid()) {
870 VNInfo *VNI = Intervals[Slot]->getValNumInfo(0);
871 Intervals[Slot]->addSegment(
872 LiveInterval::Segment(Starts[Slot], ThisIndex, VNI));
873 Starts[Slot] = SlotIndex(); // Invalidate the start index
874 DefinitelyInUse[Slot] = false;
875 }
876 }
877 }
878 }
879
880 // Finish up started segments
881 for (unsigned i = 0; i < NumSlots; ++i) {
882 if (!Starts[i].isValid())
883 continue;
884
885 SlotIndex EndIdx = Indexes->getMBBEndIdx(&MBB);
886 VNInfo *VNI = Intervals[i]->getValNumInfo(0);
887 Intervals[i]->addSegment(LiveInterval::Segment(Starts[i], EndIdx, VNI));
888 }
889 }
890}
891
892bool StackColoring::removeAllMarkers() {
893 unsigned Count = 0;
894 for (MachineInstr *MI : Markers) {
895 MI->eraseFromParent();
896 Count++;
897 }
898 Markers.clear();
899
900 LLVM_DEBUG(dbgs() << "Removed " << Count << " markers.\n");
901 return Count;
902}
903
904void StackColoring::remapInstructions(DenseMap<int, int> &SlotRemap) {
905 unsigned FixedInstr = 0;
906 unsigned FixedMemOp = 0;
907 unsigned FixedDbg = 0;
908
909 // Remap debug information that refers to stack slots.
910 for (auto &VI : MF->getVariableDbgInfo()) {
911 if (!VI.Var || !VI.inStackSlot())
912 continue;
913 int Slot = VI.getStackSlot();
914 if (auto It = SlotRemap.find(Slot); It != SlotRemap.end()) {
915 LLVM_DEBUG(dbgs() << "Remapping debug info for ["
916 << cast<DILocalVariable>(VI.Var)->getName() << "].\n");
917 VI.updateStackSlot(It->second);
918 FixedDbg++;
919 }
920 }
921
922 // Keep a list of *allocas* which need to be remapped.
923 DenseMap<const AllocaInst*, const AllocaInst*> Allocas;
924
925 // Keep a list of allocas which has been affected by the remap.
926 SmallPtrSet<const AllocaInst*, 32> MergedAllocas;
927
928 for (const std::pair<int, int> &SI : SlotRemap) {
929 const AllocaInst *From = MFI->getObjectAllocation(SI.first);
930 const AllocaInst *To = MFI->getObjectAllocation(SI.second);
931 assert(To && From && "Invalid allocation object");
932 Allocas[From] = To;
933
934 // If From is before wo, its possible that there is a use of From between
935 // them.
936 if (From->comesBefore(To))
937 const_cast<AllocaInst *>(To)->moveBefore(
938 const_cast<AllocaInst *>(From)->getIterator());
939
940 // AA might be used later for instruction scheduling, and we need it to be
941 // able to deduce the correct aliasing releationships between pointers
942 // derived from the alloca being remapped and the target of that remapping.
943 // The only safe way, without directly informing AA about the remapping
944 // somehow, is to directly update the IR to reflect the change being made
945 // here.
946 Instruction *Inst = const_cast<AllocaInst *>(To);
947 if (From->getType() != To->getType()) {
948 BitCastInst *Cast = new BitCastInst(Inst, From->getType());
949 Cast->insertAfter(Inst->getIterator());
950 Inst = Cast;
951 }
952
953 // We keep both slots to maintain AliasAnalysis metadata later.
954 MergedAllocas.insert(From);
955 MergedAllocas.insert(To);
956
957 // Transfer the stack protector layout tag, but make sure that SSPLK_AddrOf
958 // does not overwrite SSPLK_SmallArray or SSPLK_LargeArray, and make sure
959 // that SSPLK_SmallArray does not overwrite SSPLK_LargeArray.
961 = MFI->getObjectSSPLayout(SI.first);
963 if (FromKind != MachineFrameInfo::SSPLK_None &&
964 (ToKind == MachineFrameInfo::SSPLK_None ||
966 FromKind != MachineFrameInfo::SSPLK_AddrOf)))
967 MFI->setObjectSSPLayout(SI.second, FromKind);
968
969 // The new alloca might not be valid in a llvm.dbg.declare for this
970 // variable, so poison out the use to make the verifier happy.
971 AllocaInst *FromAI = const_cast<AllocaInst *>(From);
972 if (FromAI->isUsedByMetadata())
974 for (auto &Use : FromAI->uses()) {
975 if (BitCastInst *BCI = dyn_cast<BitCastInst>(Use.get()))
976 if (BCI->isUsedByMetadata())
977 ValueAsMetadata::handleRAUW(BCI, PoisonValue::get(BCI->getType()));
978 }
979
980 // Note that this will not replace uses in MMOs (which we'll update below),
981 // or anywhere else (which is why we won't delete the original
982 // instruction).
983 FromAI->replaceAllUsesWith(Inst);
984 }
985
986 // Remap all instructions to the new stack slots.
987 std::vector<std::vector<MachineMemOperand *>> SSRefs(
988 MFI->getObjectIndexEnd());
989 for (MachineBasicBlock &BB : *MF)
990 for (MachineInstr &I : BB) {
991 // Skip lifetime markers. We'll remove them soon.
992 if (I.getOpcode() == TargetOpcode::LIFETIME_START ||
993 I.getOpcode() == TargetOpcode::LIFETIME_END)
994 continue;
995
996 // Update the MachineMemOperand to use the new alloca.
997 for (MachineMemOperand *MMO : I.memoperands()) {
998 // We've replaced IR-level uses of the remapped allocas, so we only
999 // need to replace direct uses here.
1000 const AllocaInst *AI = dyn_cast_or_null<AllocaInst>(MMO->getValue());
1001 if (!AI)
1002 continue;
1003
1004 auto It = Allocas.find(AI);
1005 if (It == Allocas.end())
1006 continue;
1007
1008 MMO->setValue(It->second);
1009 FixedMemOp++;
1010 }
1011
1012 // Update all of the machine instruction operands.
1013 for (MachineOperand &MO : I.operands()) {
1014 if (!MO.isFI())
1015 continue;
1016 int FromSlot = MO.getIndex();
1017
1018 // Don't touch arguments.
1019 if (FromSlot<0)
1020 continue;
1021
1022 // Only look at mapped slots.
1023 if (!SlotRemap.count(FromSlot))
1024 continue;
1025
1026 // In a debug build, check that the instruction that we are modifying is
1027 // inside the expected live range. If the instruction is not inside
1028 // the calculated range then it means that the alloca usage moved
1029 // outside of the lifetime markers, or that the user has a bug.
1030 // NOTE: Alloca address calculations which happen outside the lifetime
1031 // zone are okay, despite the fact that we don't have a good way
1032 // for validating all of the usages of the calculation.
1033#ifndef NDEBUG
1034 bool TouchesMemory = I.mayLoadOrStore();
1035 // If we *don't* protect the user from escaped allocas, don't bother
1036 // validating the instructions.
1037 if (!I.isDebugInstr() && TouchesMemory && ProtectFromEscapedAllocas) {
1038 SlotIndex Index = Indexes->getInstructionIndex(I);
1039 const LiveInterval *Interval = &*Intervals[FromSlot];
1040 assert(Interval->find(Index) != Interval->end() &&
1041 "Found instruction usage outside of live range.");
1042 }
1043#endif
1044
1045 // Fix the machine instructions.
1046 int ToSlot = SlotRemap[FromSlot];
1047 MO.setIndex(ToSlot);
1048 FixedInstr++;
1049 }
1050
1051 // We adjust AliasAnalysis information for merged stack slots.
1053 bool ReplaceMemOps = false;
1054 for (MachineMemOperand *MMO : I.memoperands()) {
1055 // Collect MachineMemOperands which reference
1056 // FixedStackPseudoSourceValues with old frame indices.
1058 MMO->getPseudoValue())) {
1059 int FI = FSV->getFrameIndex();
1060 auto To = SlotRemap.find(FI);
1061 if (To != SlotRemap.end())
1062 SSRefs[FI].push_back(MMO);
1063 }
1064
1065 // If this memory location can be a slot remapped here,
1066 // we remove AA information.
1067 bool MayHaveConflictingAAMD = false;
1068 if (MMO->getAAInfo()) {
1069 if (const Value *MMOV = MMO->getValue()) {
1070 SmallVector<Value *, 4> Objs;
1072
1073 if (Objs.empty())
1074 MayHaveConflictingAAMD = true;
1075 else
1076 for (Value *V : Objs) {
1077 // If this memory location comes from a known stack slot
1078 // that is not remapped, we continue checking.
1079 // Otherwise, we need to invalidate AA infomation.
1080 const AllocaInst *AI = dyn_cast_or_null<AllocaInst>(V);
1081 if (AI && MergedAllocas.count(AI)) {
1082 MayHaveConflictingAAMD = true;
1083 break;
1084 }
1085 }
1086 }
1087 }
1088 if (MayHaveConflictingAAMD) {
1089 NewMMOs.push_back(MF->getMachineMemOperand(MMO, AAMDNodes()));
1090 ReplaceMemOps = true;
1091 } else {
1092 NewMMOs.push_back(MMO);
1093 }
1094 }
1095
1096 // If any memory operand is updated, set memory references of
1097 // this instruction.
1098 if (ReplaceMemOps)
1099 I.setMemRefs(*MF, NewMMOs);
1100 }
1101
1102 // Rewrite MachineMemOperands that reference old frame indices.
1103 for (auto E : enumerate(SSRefs))
1104 if (!E.value().empty()) {
1105 const PseudoSourceValue *NewSV =
1106 MF->getPSVManager().getFixedStack(SlotRemap.find(E.index())->second);
1107 for (MachineMemOperand *Ref : E.value())
1108 Ref->setValue(NewSV);
1109 }
1110
1111 // Update the location of C++ catch objects for the MSVC personality routine.
1112 if (WinEHFuncInfo *EHInfo = MF->getWinEHFuncInfo())
1113 for (WinEHTryBlockMapEntry &TBME : EHInfo->TryBlockMap)
1114 for (WinEHHandlerType &H : TBME.HandlerArray)
1115 if (H.CatchObj.FrameIndex != std::numeric_limits<int>::max())
1116 if (auto It = SlotRemap.find(H.CatchObj.FrameIndex);
1117 It != SlotRemap.end())
1118 H.CatchObj.FrameIndex = It->second;
1119
1120 LLVM_DEBUG(dbgs() << "Fixed " << FixedMemOp << " machine memory operands.\n");
1121 LLVM_DEBUG(dbgs() << "Fixed " << FixedDbg << " debug locations.\n");
1122 LLVM_DEBUG(dbgs() << "Fixed " << FixedInstr << " machine instructions.\n");
1123 (void) FixedMemOp;
1124 (void) FixedDbg;
1125 (void) FixedInstr;
1126}
1127
1128void StackColoring::removeInvalidSlotRanges() {
1129 for (MachineBasicBlock &BB : *MF)
1130 for (MachineInstr &I : BB) {
1131 if (I.getOpcode() == TargetOpcode::LIFETIME_START ||
1132 I.getOpcode() == TargetOpcode::LIFETIME_END || I.isDebugInstr())
1133 continue;
1134
1135 // Some intervals are suspicious! In some cases we find address
1136 // calculations outside of the lifetime zone, but not actual memory
1137 // read or write. Memory accesses outside of the lifetime zone are a clear
1138 // violation, but address calculations are okay. This can happen when
1139 // GEPs are hoisted outside of the lifetime zone.
1140 // So, in here we only check instructions which can read or write memory.
1141 if (!I.mayLoad() && !I.mayStore())
1142 continue;
1143
1144 // Check all of the machine operands.
1145 for (const MachineOperand &MO : I.operands()) {
1146 if (!MO.isFI())
1147 continue;
1148
1149 int Slot = MO.getIndex();
1150
1151 if (Slot<0)
1152 continue;
1153
1154 if (Intervals[Slot]->empty())
1155 continue;
1156
1157 // Check that the used slot is inside the calculated lifetime range.
1158 // If it is not, warn about it and invalidate the range.
1159 LiveInterval *Interval = &*Intervals[Slot];
1160 SlotIndex Index = Indexes->getInstructionIndex(I);
1161 if (Interval->find(Index) == Interval->end()) {
1162 Interval->clear();
1163 LLVM_DEBUG(dbgs() << "Invalidating range #" << Slot << "\n");
1164 EscapedAllocas++;
1165 }
1166 }
1167 }
1168}
1169
1170void StackColoring::expungeSlotMap(DenseMap<int, int> &SlotRemap,
1171 unsigned NumSlots) {
1172 // Expunge slot remap map.
1173 for (unsigned i=0; i < NumSlots; ++i) {
1174 // If we are remapping i
1175 if (auto It = SlotRemap.find(i); It != SlotRemap.end()) {
1176 int Target = It->second;
1177 // As long as our target is mapped to something else, follow it.
1178 while (true) {
1179 auto It = SlotRemap.find(Target);
1180 if (It == SlotRemap.end())
1181 break;
1182 Target = It->second;
1183 SlotRemap[i] = Target;
1184 }
1185 }
1186 }
1187}
1188
1189bool StackColoringLegacy::runOnMachineFunction(MachineFunction &MF) {
1190 StackColoring SC(&getAnalysis<SlotIndexesWrapperPass>().getSI());
1191 return SC.run(MF, skipFunction(MF.getFunction()));
1192}
1193
1196 StackColoring SC(&MFAM.getResult<SlotIndexesAnalysis>(MF));
1197 if (SC.run(MF))
1199 return PreservedAnalyses::all();
1200}
1201
1202bool StackColoring::run(MachineFunction &Func, bool OnlyRemoveMarkers) {
1203 LLVM_DEBUG(dbgs() << "********** Stack Coloring **********\n"
1204 << "********** Function: " << Func.getName() << '\n');
1205 MF = &Func;
1206 MFI = &MF->getFrameInfo();
1207 BlockLiveness.clear();
1208 BasicBlockOrdering.clear();
1209 Markers.clear();
1210 Intervals.clear();
1211 LiveStarts.clear();
1212 VNInfoAllocator.Reset();
1213
1214 unsigned NumSlots = MFI->getObjectIndexEnd();
1215
1216 // If there are no stack slots then there are no markers to remove.
1217 if (!NumSlots)
1218 return false;
1219
1220 SmallVector<int, 8> SortedSlots;
1221 SortedSlots.reserve(NumSlots);
1222 Intervals.reserve(NumSlots);
1223 LiveStarts.resize(NumSlots);
1224
1225 unsigned NumMarkers = collectMarkers(NumSlots);
1226
1227 unsigned TotalSize = 0;
1228 LLVM_DEBUG(dbgs() << "Found " << NumMarkers << " markers and " << NumSlots
1229 << " slots\n");
1230 LLVM_DEBUG(dbgs() << "Slot structure:\n");
1231
1232 for (int i=0; i < MFI->getObjectIndexEnd(); ++i) {
1233 LLVM_DEBUG(dbgs() << "Slot #" << i << " - " << MFI->getObjectSize(i)
1234 << " bytes.\n");
1235 TotalSize += MFI->getObjectSize(i);
1236 }
1237
1238 LLVM_DEBUG(dbgs() << "Total Stack size: " << TotalSize << " bytes\n\n");
1239
1240 // Don't continue because there are not enough lifetime markers, or the
1241 // stack is too small, or we are told not to optimize the slots, or
1242 // opt-bisect-limit is skipping this pass.
1243 if (NumMarkers < 2 || TotalSize < 16 || DisableColoring ||
1244 OnlyRemoveMarkers) {
1245 LLVM_DEBUG(dbgs() << "Will not try to merge slots.\n");
1246 return removeAllMarkers();
1247 }
1248
1249 for (unsigned i=0; i < NumSlots; ++i) {
1250 std::unique_ptr<LiveInterval> LI(new LiveInterval(i, 0));
1251 LI->getNextValue(Indexes->getZeroIndex(), VNInfoAllocator);
1252 Intervals.push_back(std::move(LI));
1253 SortedSlots.push_back(i);
1254 }
1255
1256 // Calculate the liveness of each block.
1257 calculateLocalLiveness();
1258 LLVM_DEBUG(dbgs() << "Dataflow iterations: " << NumIterations << "\n");
1259 LLVM_DEBUG(dump());
1260
1261 // Propagate the liveness information.
1262 calculateLiveIntervals(NumSlots);
1263 LLVM_DEBUG(dumpIntervals());
1264
1265 // Search for allocas which are used outside of the declared lifetime
1266 // markers.
1268 removeInvalidSlotRanges();
1269
1270 // Maps old slots to new slots.
1271 DenseMap<int, int> SlotRemap;
1272 unsigned RemovedSlots = 0;
1273 unsigned ReducedSize = 0;
1274
1275 // Do not bother looking at empty intervals.
1276 for (unsigned I = 0; I < NumSlots; ++I) {
1277 if (Intervals[SortedSlots[I]]->empty())
1278 SortedSlots[I] = -1;
1279 }
1280
1281 // This is a simple greedy algorithm for merging allocas. First, sort the
1282 // slots, placing the largest slots first. Next, perform an n^2 scan and look
1283 // for disjoint slots. When you find disjoint slots, merge the smaller one
1284 // into the bigger one and update the live interval. Remove the small alloca
1285 // and continue.
1286
1287 // Sort the slots according to their size. Place unused slots at the end.
1288 // Use stable sort to guarantee deterministic code generation.
1289 llvm::stable_sort(SortedSlots, [this](int LHS, int RHS) {
1290 // We use -1 to denote a uninteresting slot. Place these slots at the end.
1291 if (LHS == -1)
1292 return false;
1293 if (RHS == -1)
1294 return true;
1295 // Sort according to size.
1296 return MFI->getObjectSize(LHS) > MFI->getObjectSize(RHS);
1297 });
1298
1299 for (auto &s : LiveStarts)
1300 llvm::sort(s);
1301
1302 bool Changed = true;
1303 while (Changed) {
1304 Changed = false;
1305 for (unsigned I = 0; I < NumSlots; ++I) {
1306 if (SortedSlots[I] == -1)
1307 continue;
1308
1309 for (unsigned J=I+1; J < NumSlots; ++J) {
1310 if (SortedSlots[J] == -1)
1311 continue;
1312
1313 int FirstSlot = SortedSlots[I];
1314 int SecondSlot = SortedSlots[J];
1315
1316 // Objects with different stack IDs cannot be merged.
1317 if (MFI->getStackID(FirstSlot) != MFI->getStackID(SecondSlot))
1318 continue;
1319
1320 LiveInterval *First = &*Intervals[FirstSlot];
1321 LiveInterval *Second = &*Intervals[SecondSlot];
1322 auto &FirstS = LiveStarts[FirstSlot];
1323 auto &SecondS = LiveStarts[SecondSlot];
1324 assert(!First->empty() && !Second->empty() && "Found an empty range");
1325
1326 // Merge disjoint slots. This is a little bit tricky - see the
1327 // Implementation Notes section for an explanation.
1328 if (!First->isLiveAtIndexes(SecondS) &&
1329 !Second->isLiveAtIndexes(FirstS)) {
1330 Changed = true;
1331 First->MergeSegmentsInAsValue(*Second, First->getValNumInfo(0));
1332
1333 int OldSize = FirstS.size();
1334 FirstS.append(SecondS.begin(), SecondS.end());
1335 auto Mid = FirstS.begin() + OldSize;
1336 std::inplace_merge(FirstS.begin(), Mid, FirstS.end());
1337
1338 SlotRemap[SecondSlot] = FirstSlot;
1339 SortedSlots[J] = -1;
1340 LLVM_DEBUG(dbgs() << "Merging #" << FirstSlot << " and slots #"
1341 << SecondSlot << " together.\n");
1342 Align MaxAlignment = std::max(MFI->getObjectAlign(FirstSlot),
1343 MFI->getObjectAlign(SecondSlot));
1344
1345 assert(MFI->getObjectSize(FirstSlot) >=
1346 MFI->getObjectSize(SecondSlot) &&
1347 "Merging a small object into a larger one");
1348
1349 RemovedSlots+=1;
1350 ReducedSize += MFI->getObjectSize(SecondSlot);
1351 MFI->setObjectAlignment(FirstSlot, MaxAlignment);
1352 MFI->RemoveStackObject(SecondSlot);
1353 }
1354 }
1355 }
1356 }// While changed.
1357
1358 // Record statistics.
1359 StackSpaceSaved += ReducedSize;
1360 StackSlotMerged += RemovedSlots;
1361 LLVM_DEBUG(dbgs() << "Merge " << RemovedSlots << " slots. Saved "
1362 << ReducedSize << " bytes\n");
1363
1364 // Scan the entire function and update all machine operands that use frame
1365 // indices to use the remapped frame index.
1366 if (!SlotRemap.empty()) {
1367 expungeSlotMap(SlotRemap, NumSlots);
1368 remapInstructions(SlotRemap);
1369 }
1370
1371 return removeAllMarkers();
1372}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
This file implements the BitVector class.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:663
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
#define DEBUG_TYPE
IRTranslator LLVM IR MI
This defines the Use class.
#define I(x, y, z)
Definition MD5.cpp:57
#define H(x, y, z)
Definition MD5.cpp:56
std::pair< uint64_t, uint64_t > Interval
This file contains the declarations for metadata subclasses.
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
R600 Emit Clause Markers
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
static int getStartOrEndSlot(const MachineInstr &MI)
static cl::opt< bool > DisableColoring("no-stack-coloring", cl::init(false), cl::Hidden, cl::desc("Disable stack coloring"))
static cl::opt< bool > ProtectFromEscapedAllocas("protect-from-escaped-allocas", cl::init(false), cl::Hidden, cl::desc("Do not optimize lifetime zones that " "are broken"))
The user may write code that uses allocas outside of the declared lifetime zone.
static cl::opt< bool > LifetimeStartOnFirstUse("stackcoloring-lifetime-start-on-first-use", cl::init(true), cl::Hidden, cl::desc("Treat stack lifetimes as starting on first use, not on START marker."))
Enable enhanced dataflow scheme for lifetime analysis (treat first use of stack slot as start of slot...
Merge disjoint stack slots
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:119
Value * RHS
Value * LHS
PointerType * getType() const
Overload to return most specific pointer type.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
bool test(unsigned Idx) const
Returns true if bit Idx is set.
Definition BitVector.h:482
BitVector & reset()
Reset all bits in the bitvector.
Definition BitVector.h:409
void resize(unsigned N, bool t=false)
Grow or shrink the bitvector.
Definition BitVector.h:355
void clear()
Removes all bits from the bitvector.
Definition BitVector.h:349
BitVector & set()
Set all bits in the bitvector.
Definition BitVector.h:366
size_type size() const
Returns the number of bits in this bitvector.
Definition BitVector.h:178
bool subsetOf(const BitVector &RHS) const
Check if This is a subset of RHS.
Definition BitVector.h:573
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:225
bool empty() const
Definition DenseMap.h:173
iterator end()
Definition DenseMap.h:143
LLVM_ABI bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
LLVM_ABI void insertAfter(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately after the specified instruction.
LLVM_ABI bool isLiveAtIndexes(ArrayRef< SlotIndex > Slots) const
bool empty() const
int getNumber() const
MachineBasicBlocks are uniquely numbered at the function level, unless they're not in a MachineFuncti...
iterator_range< pred_iterator > predecessors()
LLVM_ABI StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
SSPLayoutKind getObjectSSPLayout(int ObjectIdx) const
const AllocaInst * getObjectAllocation(int ObjectIdx) const
Return the underlying Alloca of the specified stack object if it exists.
SSPLayoutKind
Stack Smashing Protection (SSP) rules require that vulnerable stack allocations are located close the...
@ SSPLK_LargeArray
Array or nested array >= SSP-buffer-size.
@ SSPLK_AddrOf
The address of this allocation is exposed and triggered protection.
@ SSPLK_None
Did not trigger a stack protector.
void setObjectSSPLayout(int ObjectIdx, SSPLayoutKind Kind)
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
void RemoveStackObject(int ObjectIdx)
Remove or mark dead a statically sized stack object.
int getObjectIndexEnd() const
Return one past the maximum frame object index.
uint8_t getStackID(int ObjectIdx) const
void setObjectAlignment(int ObjectIdx, Align Alignment)
setObjectAlignment - Change the alignment of the specified stack object.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const WinEHFuncInfo * getWinEHFuncInfo() const
getWinEHFuncInfo - Return information about how the current function uses Windows exception handling.
bool exposesReturnsTwice() const
exposesReturnsTwice - Returns true if the function calls setjmp or any other similar functions with a...
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
Function & getFunction()
Return the LLVM function that this machine code represents.
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
LLVM_ABI void print(raw_ostream &os) const
Print this index to the given raw_ostream.
SlotIndex getMBBEndIdx(unsigned Num) const
Returns the index past the last valid index in the given basic block.
SlotIndex getInstructionIndex(const MachineInstr &MI, bool IgnoreBundle=false) const
Returns the base index for the given instruction.
SlotIndex getMBBStartIdx(unsigned Num) const
Returns the first index in the given basic block number.
SlotIndex getZeroIndex()
Returns the zero index for this analysis.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
void reserve(size_type N)
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
LLVM_ABI PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
BumpPtrAllocator Allocator
static LLVM_ABI void handleRAUW(Value *From, Value *To)
Definition Metadata.cpp:552
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:553
bool isUsedByMetadata() const
Return true if there is metadata referencing this value.
Definition Value.h:558
iterator_range< use_iterator > uses()
Definition Value.h:380
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:319
self_iterator getIterator()
Definition ilist_node.h:123
Changed
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
initializer< Ty > init(const Ty &Val)
DXILDebugInfoMap run(Module &M)
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
bool empty() const
Definition BasicBlock.h:101
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
void stable_sort(R &&Range)
Definition STLExtras.h:2115
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2553
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI bool getUnderlyingObjectsForCodeGen(const Value *V, SmallVectorImpl< Value * > &Objects)
This is a wrapper around getUnderlyingObjects and adds support for basic ptrtoint+arithmetic+inttoptr...
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1635
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
@ Ref
The access may reference the value stored in memory.
Definition ModRef.h:32
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74
LLVM_ABI char & StackColoringLegacyID
StackSlotColoring - This pass performs stack coloring and merging.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
iterator_range< df_iterator< T > > depth_first(const T &G)
LLVM_ABI Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
SmallVector< WinEHHandlerType, 1 > HandlerArray