LLVM 23.0.0git
SROA.cpp
Go to the documentation of this file.
1//===- SROA.cpp - Scalar Replacement Of Aggregates ------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This transformation implements the well known scalar replacement of
10/// aggregates transformation. It tries to identify promotable elements of an
11/// aggregate alloca, and promote them to registers. It will also try to
12/// convert uses of an element (or set of elements) of an alloca into a vector
13/// or bitfield-style integer scalar if appropriate.
14///
15/// It works to do this with minimal slicing of the alloca so that regions
16/// which are merely transferred in and out of external memory remain unchanged
17/// and are not decomposed to scalar code.
18///
19/// Because this also performs alloca promotion, it can be thought of as also
20/// serving the purpose of SSA formation. The algorithm iterates on the
21/// function until all opportunities for promotion have been realized.
22///
23//===----------------------------------------------------------------------===//
24
26#include "llvm/ADT/APInt.h"
27#include "llvm/ADT/ArrayRef.h"
28#include "llvm/ADT/DenseMap.h"
29#include "llvm/ADT/MapVector.h"
31#include "llvm/ADT/STLExtras.h"
32#include "llvm/ADT/SetVector.h"
36#include "llvm/ADT/Statistic.h"
37#include "llvm/ADT/StringRef.h"
38#include "llvm/ADT/Twine.h"
39#include "llvm/ADT/iterator.h"
44#include "llvm/Analysis/Loads.h"
47#include "llvm/Config/llvm-config.h"
48#include "llvm/IR/BasicBlock.h"
49#include "llvm/IR/Constant.h"
51#include "llvm/IR/Constants.h"
52#include "llvm/IR/DIBuilder.h"
53#include "llvm/IR/DataLayout.h"
54#include "llvm/IR/DebugInfo.h"
57#include "llvm/IR/Dominators.h"
58#include "llvm/IR/Function.h"
59#include "llvm/IR/GlobalAlias.h"
60#include "llvm/IR/IRBuilder.h"
61#include "llvm/IR/InstVisitor.h"
62#include "llvm/IR/Instruction.h"
65#include "llvm/IR/Intrinsics.h"
66#include "llvm/IR/LLVMContext.h"
67#include "llvm/IR/Metadata.h"
68#include "llvm/IR/Module.h"
69#include "llvm/IR/Operator.h"
70#include "llvm/IR/PassManager.h"
71#include "llvm/IR/Type.h"
72#include "llvm/IR/Use.h"
73#include "llvm/IR/User.h"
74#include "llvm/IR/Value.h"
75#include "llvm/IR/ValueHandle.h"
77#include "llvm/Pass.h"
81#include "llvm/Support/Debug.h"
89#include <algorithm>
90#include <cassert>
91#include <cstddef>
92#include <cstdint>
93#include <cstring>
94#include <iterator>
95#include <queue>
96#include <string>
97#include <tuple>
98#include <utility>
99#include <variant>
100#include <vector>
101
102using namespace llvm;
103
104#define DEBUG_TYPE "sroa"
105
106STATISTIC(NumAllocasAnalyzed, "Number of allocas analyzed for replacement");
107STATISTIC(NumAllocaPartitions, "Number of alloca partitions formed");
108STATISTIC(MaxPartitionsPerAlloca, "Maximum number of partitions per alloca");
109STATISTIC(NumAllocaPartitionUses, "Number of alloca partition uses rewritten");
110STATISTIC(MaxUsesPerAllocaPartition, "Maximum number of uses of a partition");
111STATISTIC(NumNewAllocas, "Number of new, smaller allocas introduced");
112STATISTIC(NumPromoted, "Number of allocas promoted to SSA values");
113STATISTIC(NumLoadsSpeculated, "Number of loads speculated to allow promotion");
114STATISTIC(NumLoadsPredicated,
115 "Number of loads rewritten into predicated loads to allow promotion");
117 NumStoresPredicated,
118 "Number of stores rewritten into predicated loads to allow promotion");
119STATISTIC(NumDeleted, "Number of instructions deleted");
120STATISTIC(NumVectorized, "Number of vectorized aggregates");
121
122namespace llvm {
123/// Disable running mem2reg during SROA in order to test or debug SROA.
124static cl::opt<bool> SROASkipMem2Reg("sroa-skip-mem2reg", cl::init(false),
125 cl::Hidden);
127} // namespace llvm
128
129namespace {
130
131class AllocaSliceRewriter;
132class AllocaSlices;
133class Partition;
134
135class SelectHandSpeculativity {
136 unsigned char Storage = 0; // None are speculatable by default.
137 using TrueVal = Bitfield::Element<bool, 0, 1>; // Low 0'th bit.
138 using FalseVal = Bitfield::Element<bool, 1, 1>; // Low 1'th bit.
139public:
140 SelectHandSpeculativity() = default;
141 SelectHandSpeculativity &setAsSpeculatable(bool isTrueVal);
142 bool isSpeculatable(bool isTrueVal) const;
143 bool areAllSpeculatable() const;
144 bool areAnySpeculatable() const;
145 bool areNoneSpeculatable() const;
146 // For interop as int half of PointerIntPair.
147 explicit operator intptr_t() const { return static_cast<intptr_t>(Storage); }
148 explicit SelectHandSpeculativity(intptr_t Storage_) : Storage(Storage_) {}
149};
150static_assert(sizeof(SelectHandSpeculativity) == sizeof(unsigned char));
151
152using PossiblySpeculatableLoad =
154using UnspeculatableStore = StoreInst *;
155using RewriteableMemOp =
156 std::variant<PossiblySpeculatableLoad, UnspeculatableStore>;
157using RewriteableMemOps = SmallVector<RewriteableMemOp, 2>;
158
159/// An optimization pass providing Scalar Replacement of Aggregates.
160///
161/// This pass takes allocations which can be completely analyzed (that is, they
162/// don't escape) and tries to turn them into scalar SSA values. There are
163/// a few steps to this process.
164///
165/// 1) It takes allocations of aggregates and analyzes the ways in which they
166/// are used to try to split them into smaller allocations, ideally of
167/// a single scalar data type. It will split up memcpy and memset accesses
168/// as necessary and try to isolate individual scalar accesses.
169/// 2) It will transform accesses into forms which are suitable for SSA value
170/// promotion. This can be replacing a memset with a scalar store of an
171/// integer value, or it can involve speculating operations on a PHI or
172/// select to be a PHI or select of the results.
173/// 3) Finally, this will try to detect a pattern of accesses which map cleanly
174/// onto insert and extract operations on a vector value, and convert them to
175/// this form. By doing so, it will enable promotion of vector aggregates to
176/// SSA vector values.
177class SROA {
178 LLVMContext *const C;
179 DomTreeUpdater *const DTU;
180 AssumptionCache *const AC;
181 const bool PreserveCFG;
182
183 /// Worklist of alloca instructions to simplify.
184 ///
185 /// Each alloca in the function is added to this. Each new alloca formed gets
186 /// added to it as well to recursively simplify unless that alloca can be
187 /// directly promoted. Finally, each time we rewrite a use of an alloca other
188 /// the one being actively rewritten, we add it back onto the list if not
189 /// already present to ensure it is re-visited.
190 SmallSetVector<AllocaInst *, 16> Worklist;
191
192 /// A collection of instructions to delete.
193 /// We try to batch deletions to simplify code and make things a bit more
194 /// efficient. We also make sure there is no dangling pointers.
195 SmallVector<WeakVH, 8> DeadInsts;
196
197 /// Post-promotion worklist.
198 ///
199 /// Sometimes we discover an alloca which has a high probability of becoming
200 /// viable for SROA after a round of promotion takes place. In those cases,
201 /// the alloca is enqueued here for re-processing.
202 ///
203 /// Note that we have to be very careful to clear allocas out of this list in
204 /// the event they are deleted.
205 SmallSetVector<AllocaInst *, 16> PostPromotionWorklist;
206
207 /// A collection of alloca instructions we can directly promote.
208 SetVector<AllocaInst *, SmallVector<AllocaInst *>,
209 SmallPtrSet<AllocaInst *, 16>, 16>
210 PromotableAllocas;
211
212 /// A worklist of PHIs to speculate prior to promoting allocas.
213 ///
214 /// All of these PHIs have been checked for the safety of speculation and by
215 /// being speculated will allow promoting allocas currently in the promotable
216 /// queue.
217 SmallSetVector<PHINode *, 8> SpeculatablePHIs;
218
219 /// A worklist of select instructions to rewrite prior to promoting
220 /// allocas.
221 SmallMapVector<SelectInst *, RewriteableMemOps, 8> SelectsToRewrite;
222
223 /// Select instructions that use an alloca and are subsequently loaded can be
224 /// rewritten to load both input pointers and then select between the result,
225 /// allowing the load of the alloca to be promoted.
226 /// From this:
227 /// %P2 = select i1 %cond, ptr %Alloca, ptr %Other
228 /// %V = load <type>, ptr %P2
229 /// to:
230 /// %V1 = load <type>, ptr %Alloca -> will be mem2reg'd
231 /// %V2 = load <type>, ptr %Other
232 /// %V = select i1 %cond, <type> %V1, <type> %V2
233 ///
234 /// We can do this to a select if its only uses are loads
235 /// and if either the operand to the select can be loaded unconditionally,
236 /// or if we are allowed to perform CFG modifications.
237 /// If found an intervening bitcast with a single use of the load,
238 /// allow the promotion.
239 static std::optional<RewriteableMemOps>
240 isSafeSelectToSpeculate(SelectInst &SI, bool PreserveCFG);
241
242public:
243 SROA(LLVMContext *C, DomTreeUpdater *DTU, AssumptionCache *AC,
244 SROAOptions PreserveCFG_)
245 : C(C), DTU(DTU), AC(AC),
246 PreserveCFG(PreserveCFG_ == SROAOptions::PreserveCFG) {}
247
248 /// Main run method used by both the SROAPass and by the legacy pass.
249 std::pair<bool /*Changed*/, bool /*CFGChanged*/> runSROA(Function &F);
250
251private:
252 friend class AllocaSliceRewriter;
253
254 bool presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS);
255 std::pair<AllocaInst *, uint64_t>
256 rewritePartition(AllocaInst &AI, AllocaSlices &AS, Partition &P);
257 bool splitAlloca(AllocaInst &AI, AllocaSlices &AS);
258 bool propagateStoredValuesToLoads(AllocaInst &AI, AllocaSlices &AS);
259 std::pair<bool /*Changed*/, bool /*CFGChanged*/> runOnAlloca(AllocaInst &AI);
260 void clobberUse(Use &U);
261 bool deleteDeadInstructions(SmallPtrSetImpl<AllocaInst *> &DeletedAllocas);
262 bool promoteAllocas();
263};
264
265} // end anonymous namespace
266
267/// Calculate the fragment of a variable to use when slicing a store
268/// based on the slice dimensions, existing fragment, and base storage
269/// fragment.
270/// Results:
271/// UseFrag - Use Target as the new fragment.
272/// UseNoFrag - The new slice already covers the whole variable.
273/// Skip - The new alloca slice doesn't include this variable.
274/// FIXME: Can we use calculateFragmentIntersect instead?
275namespace {
276enum FragCalcResult { UseFrag, UseNoFrag, Skip };
277}
278static FragCalcResult
280 uint64_t NewStorageSliceOffsetInBits,
281 uint64_t NewStorageSliceSizeInBits,
282 std::optional<DIExpression::FragmentInfo> StorageFragment,
283 std::optional<DIExpression::FragmentInfo> CurrentFragment,
285 // If the base storage describes part of the variable apply the offset and
286 // the size constraint.
287 if (StorageFragment) {
288 Target.SizeInBits =
289 std::min(NewStorageSliceSizeInBits, StorageFragment->SizeInBits);
290 Target.OffsetInBits =
291 NewStorageSliceOffsetInBits + StorageFragment->OffsetInBits;
292 } else {
293 Target.SizeInBits = NewStorageSliceSizeInBits;
294 Target.OffsetInBits = NewStorageSliceOffsetInBits;
295 }
296
297 // If this slice extracts the entirety of an independent variable from a
298 // larger alloca, do not produce a fragment expression, as the variable is
299 // not fragmented.
300 if (!CurrentFragment) {
301 if (auto Size = Variable->getSizeInBits()) {
302 // Treat the current fragment as covering the whole variable.
303 CurrentFragment = DIExpression::FragmentInfo(*Size, 0);
304 if (Target == CurrentFragment)
305 return UseNoFrag;
306 }
307 }
308
309 // No additional work to do if there isn't a fragment already, or there is
310 // but it already exactly describes the new assignment.
311 if (!CurrentFragment || *CurrentFragment == Target)
312 return UseFrag;
313
314 // Reject the target fragment if it doesn't fit wholly within the current
315 // fragment. TODO: We could instead chop up the target to fit in the case of
316 // a partial overlap.
317 if (Target.startInBits() < CurrentFragment->startInBits() ||
318 Target.endInBits() > CurrentFragment->endInBits())
319 return Skip;
320
321 // Target fits within the current fragment, return it.
322 return UseFrag;
323}
324
326 return DebugVariable(DVR->getVariable(), std::nullopt,
327 DVR->getDebugLoc().getInlinedAt());
328}
329
330/// Find linked dbg.assign and generate a new one with the correct
331/// FragmentInfo. Link Inst to the new dbg.assign. If Value is nullptr the
332/// value component is copied from the old dbg.assign to the new.
333/// \param OldAlloca Alloca for the variable before splitting.
334/// \param IsSplit True if the store (not necessarily alloca)
335/// is being split.
336/// \param OldAllocaOffsetInBits Offset of the slice taken from OldAlloca.
337/// \param SliceSizeInBits New number of bits being written to.
338/// \param OldInst Instruction that is being split.
339/// \param Inst New instruction performing this part of the
340/// split store.
341/// \param Dest Store destination.
342/// \param Value Stored value.
343/// \param DL Datalayout.
344static void migrateDebugInfo(AllocaInst *OldAlloca, bool IsSplit,
345 uint64_t OldAllocaOffsetInBits,
346 uint64_t SliceSizeInBits, Instruction *OldInst,
347 Instruction *Inst, Value *Dest, Value *Value,
348 const DataLayout &DL) {
349 // If we want allocas to be migrated using this helper then we need to ensure
350 // that the BaseFragments map code still works. A simple solution would be
351 // to choose to always clone alloca dbg_assigns (rather than sometimes
352 // "stealing" them).
353 assert(!isa<AllocaInst>(Inst) && "Unexpected alloca");
354
355 auto DVRAssignMarkerRange = at::getDVRAssignmentMarkers(OldInst);
356 // Nothing to do if OldInst has no linked dbg.assign intrinsics.
357 if (DVRAssignMarkerRange.empty())
358 return;
359
360 LLVM_DEBUG(dbgs() << " migrateDebugInfo\n");
361 LLVM_DEBUG(dbgs() << " OldAlloca: " << *OldAlloca << "\n");
362 LLVM_DEBUG(dbgs() << " IsSplit: " << IsSplit << "\n");
363 LLVM_DEBUG(dbgs() << " OldAllocaOffsetInBits: " << OldAllocaOffsetInBits
364 << "\n");
365 LLVM_DEBUG(dbgs() << " SliceSizeInBits: " << SliceSizeInBits << "\n");
366 LLVM_DEBUG(dbgs() << " OldInst: " << *OldInst << "\n");
367 LLVM_DEBUG(dbgs() << " Inst: " << *Inst << "\n");
368 LLVM_DEBUG(dbgs() << " Dest: " << *Dest << "\n");
369 if (Value)
370 LLVM_DEBUG(dbgs() << " Value: " << *Value << "\n");
371
372 /// Map of aggregate variables to their fragment associated with OldAlloca.
374 BaseFragments;
375 for (auto *DVR : at::getDVRAssignmentMarkers(OldAlloca))
376 BaseFragments[getAggregateVariable(DVR)] =
377 DVR->getExpression()->getFragmentInfo();
378
379 // The new inst needs a DIAssignID unique metadata tag (if OldInst has
380 // one). It shouldn't already have one: assert this assumption.
381 assert(!Inst->getMetadata(LLVMContext::MD_DIAssignID));
382 DIAssignID *NewID = nullptr;
383 auto &Ctx = Inst->getContext();
384 DIBuilder DIB(*OldInst->getModule(), /*AllowUnresolved*/ false);
385 assert(OldAlloca->isStaticAlloca());
386
387 auto MigrateDbgAssign = [&](DbgVariableRecord *DbgAssign) {
388 LLVM_DEBUG(dbgs() << " existing dbg.assign is: " << *DbgAssign
389 << "\n");
390 auto *Expr = DbgAssign->getExpression();
391 bool SetKillLocation = false;
392
393 if (IsSplit) {
394 std::optional<DIExpression::FragmentInfo> BaseFragment;
395 {
396 auto R = BaseFragments.find(getAggregateVariable(DbgAssign));
397 if (R == BaseFragments.end())
398 return;
399 BaseFragment = R->second;
400 }
401 std::optional<DIExpression::FragmentInfo> CurrentFragment =
402 Expr->getFragmentInfo();
403 DIExpression::FragmentInfo NewFragment;
404 FragCalcResult Result = calculateFragment(
405 DbgAssign->getVariable(), OldAllocaOffsetInBits, SliceSizeInBits,
406 BaseFragment, CurrentFragment, NewFragment);
407
408 if (Result == Skip)
409 return;
410 if (Result == UseFrag && !(NewFragment == CurrentFragment)) {
411 if (CurrentFragment) {
412 // Rewrite NewFragment to be relative to the existing one (this is
413 // what createFragmentExpression wants). CalculateFragment has
414 // already resolved the size for us. FIXME: Should it return the
415 // relative fragment too?
416 NewFragment.OffsetInBits -= CurrentFragment->OffsetInBits;
417 }
418 // Add the new fragment info to the existing expression if possible.
420 Expr, NewFragment.OffsetInBits, NewFragment.SizeInBits)) {
421 Expr = *E;
422 } else {
423 // Otherwise, add the new fragment info to an empty expression and
424 // discard the value component of this dbg.assign as the value cannot
425 // be computed with the new fragment.
427 DIExpression::get(Expr->getContext(), {}),
428 NewFragment.OffsetInBits, NewFragment.SizeInBits);
429 SetKillLocation = true;
430 }
431 }
432 }
433
434 // If we haven't created a DIAssignID ID do that now and attach it to Inst.
435 if (!NewID) {
436 NewID = DIAssignID::getDistinct(Ctx);
437 Inst->setMetadata(LLVMContext::MD_DIAssignID, NewID);
438 }
439
440 DbgVariableRecord *NewAssign;
441 if (IsSplit) {
442 ::Value *NewValue = Value ? Value : DbgAssign->getValue();
444 DIB.insertDbgAssign(Inst, NewValue, DbgAssign->getVariable(), Expr,
445 Dest, DIExpression::get(Expr->getContext(), {}),
446 DbgAssign->getDebugLoc())));
447 } else {
448 // The store is not split, simply steal the existing dbg_assign.
449 NewAssign = DbgAssign;
450 NewAssign->setAssignId(NewID); // FIXME: Can we avoid generating new IDs?
451 NewAssign->setAddress(Dest);
452 if (Value)
453 NewAssign->replaceVariableLocationOp(0u, Value);
454 assert(Expr == NewAssign->getExpression());
455 }
456
457 // If we've updated the value but the original dbg.assign has an arglist
458 // then kill it now - we can't use the requested new value.
459 // We can't replace the DIArgList with the new value as it'd leave
460 // the DIExpression in an invalid state (DW_OP_LLVM_arg operands without
461 // an arglist). And we can't keep the DIArgList in case the linked store
462 // is being split - in which case the DIArgList + expression may no longer
463 // be computing the correct value.
464 // This should be a very rare situation as it requires the value being
465 // stored to differ from the dbg.assign (i.e., the value has been
466 // represented differently in the debug intrinsic for some reason).
467 SetKillLocation |=
468 Value && (DbgAssign->hasArgList() ||
469 !DbgAssign->getExpression()->isSingleLocationExpression());
470 if (SetKillLocation)
471 NewAssign->setKillLocation();
472
473 // We could use more precision here at the cost of some additional (code)
474 // complexity - if the original dbg.assign was adjacent to its store, we
475 // could position this new dbg.assign adjacent to its store rather than the
476 // old dbg.assgn. That would result in interleaved dbg.assigns rather than
477 // what we get now:
478 // split store !1
479 // split store !2
480 // dbg.assign !1
481 // dbg.assign !2
482 // This (current behaviour) results results in debug assignments being
483 // noted as slightly offset (in code) from the store. In practice this
484 // should have little effect on the debugging experience due to the fact
485 // that all the split stores should get the same line number.
486 if (NewAssign != DbgAssign) {
487 NewAssign->moveBefore(DbgAssign->getIterator());
488 NewAssign->setDebugLoc(DbgAssign->getDebugLoc());
489 }
490 LLVM_DEBUG(dbgs() << "Created new assign: " << *NewAssign << "\n");
491 };
492
493 for_each(DVRAssignMarkerRange, MigrateDbgAssign);
494}
495
496namespace {
497
498/// A custom IRBuilder inserter which prefixes all names, but only in
499/// Assert builds.
500class IRBuilderPrefixedInserter final : public IRBuilderDefaultInserter {
501 std::string Prefix;
502
503 Twine getNameWithPrefix(const Twine &Name) const {
504 return Name.isTriviallyEmpty() ? Name : Prefix + Name;
505 }
506
507public:
508 void SetNamePrefix(const Twine &P) { Prefix = P.str(); }
509
510 void InsertHelper(Instruction *I, const Twine &Name,
511 BasicBlock::iterator InsertPt) const override {
512 IRBuilderDefaultInserter::InsertHelper(I, getNameWithPrefix(Name),
513 InsertPt);
514 }
515};
516
517/// Provide a type for IRBuilder that drops names in release builds.
519
520/// A used slice of an alloca.
521///
522/// This structure represents a slice of an alloca used by some instruction. It
523/// stores both the begin and end offsets of this use, a pointer to the use
524/// itself, and a flag indicating whether we can classify the use as splittable
525/// or not when forming partitions of the alloca.
526class Slice {
527 /// The beginning offset of the range.
528 uint64_t BeginOffset = 0;
529
530 /// The ending offset, not included in the range.
531 uint64_t EndOffset = 0;
532
533 /// Storage for both the use of this slice and whether it can be
534 /// split.
535 PointerIntPair<Use *, 1, bool> UseAndIsSplittable;
536
537public:
538 Slice() = default;
539
540 Slice(uint64_t BeginOffset, uint64_t EndOffset, Use *U, bool IsSplittable,
541 Value *ProtectedFieldDisc)
542 : BeginOffset(BeginOffset), EndOffset(EndOffset),
543 UseAndIsSplittable(U, IsSplittable),
544 ProtectedFieldDisc(ProtectedFieldDisc) {}
545
546 uint64_t beginOffset() const { return BeginOffset; }
547 uint64_t endOffset() const { return EndOffset; }
548
549 bool isSplittable() const { return UseAndIsSplittable.getInt(); }
550 void makeUnsplittable() { UseAndIsSplittable.setInt(false); }
551
552 Use *getUse() const { return UseAndIsSplittable.getPointer(); }
553
554 bool isDead() const { return getUse() == nullptr; }
555 void kill() { UseAndIsSplittable.setPointer(nullptr); }
556
557 // When this access is via an llvm.protected.field.ptr intrinsic, contains
558 // the second argument to the intrinsic, the discriminator.
559 Value *ProtectedFieldDisc;
560
561 /// Support for ordering ranges.
562 ///
563 /// This provides an ordering over ranges such that start offsets are
564 /// always increasing, and within equal start offsets, the end offsets are
565 /// decreasing. Thus the spanning range comes first in a cluster with the
566 /// same start position.
567 bool operator<(const Slice &RHS) const {
568 if (beginOffset() < RHS.beginOffset())
569 return true;
570 if (beginOffset() > RHS.beginOffset())
571 return false;
572 if (isSplittable() != RHS.isSplittable())
573 return !isSplittable();
574 if (endOffset() > RHS.endOffset())
575 return true;
576 return false;
577 }
578
579 /// Support comparison with a single offset to allow binary searches.
580 [[maybe_unused]] friend bool operator<(const Slice &LHS, uint64_t RHSOffset) {
581 return LHS.beginOffset() < RHSOffset;
582 }
583 [[maybe_unused]] friend bool operator<(uint64_t LHSOffset, const Slice &RHS) {
584 return LHSOffset < RHS.beginOffset();
585 }
586
587 bool operator==(const Slice &RHS) const {
588 return isSplittable() == RHS.isSplittable() &&
589 beginOffset() == RHS.beginOffset() && endOffset() == RHS.endOffset();
590 }
591 bool operator!=(const Slice &RHS) const { return !operator==(RHS); }
592};
593
594/// Representation of the alloca slices.
595///
596/// This class represents the slices of an alloca which are formed by its
597/// various uses. If a pointer escapes, we can't fully build a representation
598/// for the slices used and we reflect that in this structure. The uses are
599/// stored, sorted by increasing beginning offset and with unsplittable slices
600/// starting at a particular offset before splittable slices.
601class AllocaSlices {
602public:
603 /// Construct the slices of a particular alloca.
604 AllocaSlices(const DataLayout &DL, AllocaInst &AI);
605
606 /// Test whether a pointer to the allocation escapes our analysis.
607 ///
608 /// If this is true, the slices are never fully built and should be
609 /// ignored.
610 bool isEscaped() const { return PointerEscapingInstr; }
611 bool isEscapedReadOnly() const { return PointerEscapingInstrReadOnly; }
612
613 /// Support for iterating over the slices.
614 /// @{
615 using iterator = SmallVectorImpl<Slice>::iterator;
616 using range = iterator_range<iterator>;
617
618 iterator begin() { return Slices.begin(); }
619 iterator end() { return Slices.end(); }
620
621 using const_iterator = SmallVectorImpl<Slice>::const_iterator;
622 using const_range = iterator_range<const_iterator>;
623
624 const_iterator begin() const { return Slices.begin(); }
625 const_iterator end() const { return Slices.end(); }
626 /// @}
627
628 /// Erase a range of slices.
629 void erase(iterator Start, iterator Stop) { Slices.erase(Start, Stop); }
630
631 /// Insert new slices for this alloca.
632 ///
633 /// This moves the slices into the alloca's slices collection, and re-sorts
634 /// everything so that the usual ordering properties of the alloca's slices
635 /// hold.
636 void insert(ArrayRef<Slice> NewSlices) {
637 int OldSize = Slices.size();
638 Slices.append(NewSlices.begin(), NewSlices.end());
639 auto SliceI = Slices.begin() + OldSize;
640 std::stable_sort(SliceI, Slices.end());
641 std::inplace_merge(Slices.begin(), SliceI, Slices.end());
642 }
643
644 // Forward declare the iterator and range accessor for walking the
645 // partitions.
646 class partition_iterator;
648
649 /// Access the dead users for this alloca.
650 ArrayRef<Instruction *> getDeadUsers() const { return DeadUsers; }
651
652 /// Access the users for this alloca that are llvm.protected.field.ptr
653 /// intrinsics.
654 ArrayRef<IntrinsicInst *> getPFPUsers() const { return PFPUsers; }
655
656 /// Access Uses that should be dropped if the alloca is promotable.
657 ArrayRef<Use *> getDeadUsesIfPromotable() const {
658 return DeadUseIfPromotable;
659 }
660
661 /// Access the dead operands referring to this alloca.
662 ///
663 /// These are operands which have cannot actually be used to refer to the
664 /// alloca as they are outside its range and the user doesn't correct for
665 /// that. These mostly consist of PHI node inputs and the like which we just
666 /// need to replace with undef.
667 ArrayRef<Use *> getDeadOperands() const { return DeadOperands; }
668
669#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
670 void print(raw_ostream &OS, const_iterator I, StringRef Indent = " ") const;
671 void printSlice(raw_ostream &OS, const_iterator I,
672 StringRef Indent = " ") const;
673 void printUse(raw_ostream &OS, const_iterator I,
674 StringRef Indent = " ") const;
675 void print(raw_ostream &OS) const;
676 void dump(const_iterator I) const;
677 void dump() const;
678#endif
679
680private:
681 template <typename DerivedT, typename RetT = void> class BuilderBase;
682 class SliceBuilder;
683
684 friend class AllocaSlices::SliceBuilder;
685
686#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
687 /// Handle to alloca instruction to simplify method interfaces.
688 AllocaInst &AI;
689#endif
690
691 /// The instruction responsible for this alloca not having a known set
692 /// of slices.
693 ///
694 /// When an instruction (potentially) escapes the pointer to the alloca, we
695 /// store a pointer to that here and abort trying to form slices of the
696 /// alloca. This will be null if the alloca slices are analyzed successfully.
697 Instruction *PointerEscapingInstr;
698 Instruction *PointerEscapingInstrReadOnly;
699
700 /// The slices of the alloca.
701 ///
702 /// We store a vector of the slices formed by uses of the alloca here. This
703 /// vector is sorted by increasing begin offset, and then the unsplittable
704 /// slices before the splittable ones. See the Slice inner class for more
705 /// details.
707
708 /// Instructions which will become dead if we rewrite the alloca.
709 ///
710 /// Note that these are not separated by slice. This is because we expect an
711 /// alloca to be completely rewritten or not rewritten at all. If rewritten,
712 /// all these instructions can simply be removed and replaced with poison as
713 /// they come from outside of the allocated space.
714 SmallVector<Instruction *, 8> DeadUsers;
715
716 /// Users that are llvm.protected.field.ptr intrinsics. These will be RAUW'd
717 /// to their first argument if we rewrite the alloca.
719
720 /// Uses which will become dead if can promote the alloca.
721 SmallVector<Use *, 8> DeadUseIfPromotable;
722
723 /// Operands which will become dead if we rewrite the alloca.
724 ///
725 /// These are operands that in their particular use can be replaced with
726 /// poison when we rewrite the alloca. These show up in out-of-bounds inputs
727 /// to PHI nodes and the like. They aren't entirely dead (there might be
728 /// a GEP back into the bounds using it elsewhere) and nor is the PHI, but we
729 /// want to swap this particular input for poison to simplify the use lists of
730 /// the alloca.
731 SmallVector<Use *, 8> DeadOperands;
732};
733
734/// A partition of the slices.
735///
736/// An ephemeral representation for a range of slices which can be viewed as
737/// a partition of the alloca. This range represents a span of the alloca's
738/// memory which cannot be split, and provides access to all of the slices
739/// overlapping some part of the partition.
740///
741/// Objects of this type are produced by traversing the alloca's slices, but
742/// are only ephemeral and not persistent.
743class Partition {
744private:
745 friend class AllocaSlices;
746 friend class AllocaSlices::partition_iterator;
747
748 using iterator = AllocaSlices::iterator;
749
750 /// The beginning and ending offsets of the alloca for this
751 /// partition.
752 uint64_t BeginOffset = 0, EndOffset = 0;
753
754 /// The start and end iterators of this partition.
755 iterator SI, SJ;
756
757 /// A collection of split slice tails overlapping the partition.
758 SmallVector<Slice *, 4> SplitTails;
759
760 /// Raw constructor builds an empty partition starting and ending at
761 /// the given iterator.
762 Partition(iterator SI) : SI(SI), SJ(SI) {}
763
764public:
765 /// The start offset of this partition.
766 ///
767 /// All of the contained slices start at or after this offset.
768 uint64_t beginOffset() const { return BeginOffset; }
769
770 /// The end offset of this partition.
771 ///
772 /// All of the contained slices end at or before this offset.
773 uint64_t endOffset() const { return EndOffset; }
774
775 /// The size of the partition.
776 ///
777 /// Note that this can never be zero.
778 uint64_t size() const {
779 assert(BeginOffset < EndOffset && "Partitions must span some bytes!");
780 return EndOffset - BeginOffset;
781 }
782
783 /// Test whether this partition contains no slices, and merely spans
784 /// a region occupied by split slices.
785 bool empty() const { return SI == SJ; }
786
787 /// \name Iterate slices that start within the partition.
788 /// These may be splittable or unsplittable. They have a begin offset >= the
789 /// partition begin offset.
790 /// @{
791 // FIXME: We should probably define a "concat_iterator" helper and use that
792 // to stitch together pointee_iterators over the split tails and the
793 // contiguous iterators of the partition. That would give a much nicer
794 // interface here. We could then additionally expose filtered iterators for
795 // split, unsplit, and unsplittable splices based on the usage patterns.
796 iterator begin() const { return SI; }
797 iterator end() const { return SJ; }
798 /// @}
799
800 /// Get the sequence of split slice tails.
801 ///
802 /// These tails are of slices which start before this partition but are
803 /// split and overlap into the partition. We accumulate these while forming
804 /// partitions.
805 ArrayRef<Slice *> splitSliceTails() const { return SplitTails; }
806};
807
808} // end anonymous namespace
809
810/// An iterator over partitions of the alloca's slices.
811///
812/// This iterator implements the core algorithm for partitioning the alloca's
813/// slices. It is a forward iterator as we don't support backtracking for
814/// efficiency reasons, and re-use a single storage area to maintain the
815/// current set of split slices.
816///
817/// It is templated on the slice iterator type to use so that it can operate
818/// with either const or non-const slice iterators.
820 : public iterator_facade_base<partition_iterator, std::forward_iterator_tag,
821 Partition> {
822 friend class AllocaSlices;
823
824 /// Most of the state for walking the partitions is held in a class
825 /// with a nice interface for examining them.
826 Partition P;
827
828 /// We need to keep the end of the slices to know when to stop.
829 AllocaSlices::iterator SE;
830
831 /// We also need to keep track of the maximum split end offset seen.
832 /// FIXME: Do we really?
833 uint64_t MaxSplitSliceEndOffset = 0;
834
835 /// Sets the partition to be empty at given iterator, and sets the
836 /// end iterator.
837 partition_iterator(AllocaSlices::iterator SI, AllocaSlices::iterator SE)
838 : P(SI), SE(SE) {
839 // If not already at the end, advance our state to form the initial
840 // partition.
841 if (SI != SE)
842 advance();
843 }
844
845 /// Advance the iterator to the next partition.
846 ///
847 /// Requires that the iterator not be at the end of the slices.
848 void advance() {
849 assert((P.SI != SE || !P.SplitTails.empty()) &&
850 "Cannot advance past the end of the slices!");
851
852 // Clear out any split uses which have ended.
853 if (!P.SplitTails.empty()) {
854 if (P.EndOffset >= MaxSplitSliceEndOffset) {
855 // If we've finished all splits, this is easy.
856 P.SplitTails.clear();
857 MaxSplitSliceEndOffset = 0;
858 } else {
859 // Remove the uses which have ended in the prior partition. This
860 // cannot change the max split slice end because we just checked that
861 // the prior partition ended prior to that max.
862 llvm::erase_if(P.SplitTails,
863 [&](Slice *S) { return S->endOffset() <= P.EndOffset; });
864 assert(llvm::any_of(P.SplitTails,
865 [&](Slice *S) {
866 return S->endOffset() == MaxSplitSliceEndOffset;
867 }) &&
868 "Could not find the current max split slice offset!");
869 assert(llvm::all_of(P.SplitTails,
870 [&](Slice *S) {
871 return S->endOffset() <= MaxSplitSliceEndOffset;
872 }) &&
873 "Max split slice end offset is not actually the max!");
874 }
875 }
876
877 // If P.SI is already at the end, then we've cleared the split tail and
878 // now have an end iterator.
879 if (P.SI == SE) {
880 assert(P.SplitTails.empty() && "Failed to clear the split slices!");
881 return;
882 }
883
884 // If we had a non-empty partition previously, set up the state for
885 // subsequent partitions.
886 if (P.SI != P.SJ) {
887 // Accumulate all the splittable slices which started in the old
888 // partition into the split list.
889 for (Slice &S : P)
890 if (S.isSplittable() && S.endOffset() > P.EndOffset) {
891 P.SplitTails.push_back(&S);
892 MaxSplitSliceEndOffset =
893 std::max(S.endOffset(), MaxSplitSliceEndOffset);
894 }
895
896 // Start from the end of the previous partition.
897 P.SI = P.SJ;
898
899 // If P.SI is now at the end, we at most have a tail of split slices.
900 if (P.SI == SE) {
901 P.BeginOffset = P.EndOffset;
902 P.EndOffset = MaxSplitSliceEndOffset;
903 return;
904 }
905
906 // If the we have split slices and the next slice is after a gap and is
907 // not splittable immediately form an empty partition for the split
908 // slices up until the next slice begins.
909 if (!P.SplitTails.empty() && P.SI->beginOffset() != P.EndOffset &&
910 !P.SI->isSplittable()) {
911 P.BeginOffset = P.EndOffset;
912 P.EndOffset = P.SI->beginOffset();
913 return;
914 }
915 }
916
917 // OK, we need to consume new slices. Set the end offset based on the
918 // current slice, and step SJ past it. The beginning offset of the
919 // partition is the beginning offset of the next slice unless we have
920 // pre-existing split slices that are continuing, in which case we begin
921 // at the prior end offset.
922 P.BeginOffset = P.SplitTails.empty() ? P.SI->beginOffset() : P.EndOffset;
923 P.EndOffset = P.SI->endOffset();
924 ++P.SJ;
925
926 // There are two strategies to form a partition based on whether the
927 // partition starts with an unsplittable slice or a splittable slice.
928 if (!P.SI->isSplittable()) {
929 // When we're forming an unsplittable region, it must always start at
930 // the first slice and will extend through its end.
931 assert(P.BeginOffset == P.SI->beginOffset());
932
933 // Form a partition including all of the overlapping slices with this
934 // unsplittable slice.
935 while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
936 if (!P.SJ->isSplittable())
937 P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
938 ++P.SJ;
939 }
940
941 // We have a partition across a set of overlapping unsplittable
942 // partitions.
943 return;
944 }
945
946 // If we're starting with a splittable slice, then we need to form
947 // a synthetic partition spanning it and any other overlapping splittable
948 // splices.
949 assert(P.SI->isSplittable() && "Forming a splittable partition!");
950
951 // Collect all of the overlapping splittable slices.
952 while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset &&
953 P.SJ->isSplittable()) {
954 P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
955 ++P.SJ;
956 }
957
958 // Back upiP.EndOffset if we ended the span early when encountering an
959 // unsplittable slice. This synthesizes the early end offset of
960 // a partition spanning only splittable slices.
961 if (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
962 assert(!P.SJ->isSplittable());
963 P.EndOffset = P.SJ->beginOffset();
964 }
965 }
966
967public:
968 bool operator==(const partition_iterator &RHS) const {
969 assert(SE == RHS.SE &&
970 "End iterators don't match between compared partition iterators!");
971
972 // The observed positions of partitions is marked by the P.SI iterator and
973 // the emptiness of the split slices. The latter is only relevant when
974 // P.SI == SE, as the end iterator will additionally have an empty split
975 // slices list, but the prior may have the same P.SI and a tail of split
976 // slices.
977 if (P.SI == RHS.P.SI && P.SplitTails.empty() == RHS.P.SplitTails.empty()) {
978 assert(P.SJ == RHS.P.SJ &&
979 "Same set of slices formed two different sized partitions!");
980 assert(P.SplitTails.size() == RHS.P.SplitTails.size() &&
981 "Same slice position with differently sized non-empty split "
982 "slice tails!");
983 return true;
984 }
985 return false;
986 }
987
988 partition_iterator &operator++() {
989 advance();
990 return *this;
991 }
992
993 Partition &operator*() { return P; }
994};
995
996/// A forward range over the partitions of the alloca's slices.
997///
998/// This accesses an iterator range over the partitions of the alloca's
999/// slices. It computes these partitions on the fly based on the overlapping
1000/// offsets of the slices and the ability to split them. It will visit "empty"
1001/// partitions to cover regions of the alloca only accessed via split
1002/// slices.
1003iterator_range<AllocaSlices::partition_iterator> AllocaSlices::partitions() {
1004 return make_range(partition_iterator(begin(), end()),
1005 partition_iterator(end(), end()));
1006}
1007
1009 // If the condition being selected on is a constant or the same value is
1010 // being selected between, fold the select. Yes this does (rarely) happen
1011 // early on.
1012 if (ConstantInt *CI = dyn_cast<ConstantInt>(SI.getCondition()))
1013 return SI.getOperand(1 + CI->isZero());
1014 if (SI.getOperand(1) == SI.getOperand(2))
1015 return SI.getOperand(1);
1016
1017 return nullptr;
1018}
1019
1020/// A helper that folds a PHI node or a select.
1022 if (PHINode *PN = dyn_cast<PHINode>(&I)) {
1023 // If PN merges together the same value, return that value.
1024 return PN->hasConstantValue();
1025 }
1027}
1028
1029/// Builder for the alloca slices.
1030///
1031/// This class builds a set of alloca slices by recursively visiting the uses
1032/// of an alloca and making a slice for each load and store at each offset.
1033class AllocaSlices::SliceBuilder : public PtrUseVisitor<SliceBuilder> {
1034 friend class PtrUseVisitor<SliceBuilder>;
1035 friend class InstVisitor<SliceBuilder>;
1036
1037 using Base = PtrUseVisitor<SliceBuilder>;
1038
1039 const uint64_t AllocSize;
1040 AllocaSlices &AS;
1041
1042 SmallDenseMap<Instruction *, unsigned> MemTransferSliceMap;
1044
1045 /// Set to de-duplicate dead instructions found in the use walk.
1046 SmallPtrSet<Instruction *, 4> VisitedDeadInsts;
1047
1048 // When this access is via an llvm.protected.field.ptr intrinsic, contains
1049 // the second argument to the intrinsic, the discriminator.
1050 Value *ProtectedFieldDisc = nullptr;
1051
1052public:
1053 SliceBuilder(const DataLayout &DL, AllocaInst &AI, AllocaSlices &AS)
1055 AllocSize(AI.getAllocationSize(DL)->getFixedValue()), AS(AS) {}
1056
1057private:
1058 void markAsDead(Instruction &I) {
1059 if (VisitedDeadInsts.insert(&I).second)
1060 AS.DeadUsers.push_back(&I);
1061 }
1062
1063 void insertUse(Instruction &I, const APInt &Offset, uint64_t Size,
1064 bool IsSplittable = false) {
1065 // Completely skip uses which have a zero size or start either before or
1066 // past the end of the allocation.
1067 if (Size == 0 || Offset.uge(AllocSize)) {
1068 LLVM_DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte use @"
1069 << Offset
1070 << " which has zero size or starts outside of the "
1071 << AllocSize << " byte alloca:\n"
1072 << " alloca: " << AS.AI << "\n"
1073 << " use: " << I << "\n");
1074 return markAsDead(I);
1075 }
1076
1077 uint64_t BeginOffset = Offset.getZExtValue();
1078 uint64_t EndOffset = BeginOffset + Size;
1079
1080 // Clamp the end offset to the end of the allocation. Note that this is
1081 // formulated to handle even the case where "BeginOffset + Size" overflows.
1082 // This may appear superficially to be something we could ignore entirely,
1083 // but that is not so! There may be widened loads or PHI-node uses where
1084 // some instructions are dead but not others. We can't completely ignore
1085 // them, and so have to record at least the information here.
1086 assert(AllocSize >= BeginOffset); // Established above.
1087 if (Size > AllocSize - BeginOffset) {
1088 LLVM_DEBUG(dbgs() << "WARNING: Clamping a " << Size << " byte use @"
1089 << Offset << " to remain within the " << AllocSize
1090 << " byte alloca:\n"
1091 << " alloca: " << AS.AI << "\n"
1092 << " use: " << I << "\n");
1093 EndOffset = AllocSize;
1094 }
1095
1096 AS.Slices.push_back(
1097 Slice(BeginOffset, EndOffset, U, IsSplittable, ProtectedFieldDisc));
1098 }
1099
1100 void visitBitCastInst(BitCastInst &BC) {
1101 if (BC.use_empty())
1102 return markAsDead(BC);
1103
1104 return Base::visitBitCastInst(BC);
1105 }
1106
1107 void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
1108 if (ASC.use_empty())
1109 return markAsDead(ASC);
1110
1111 return Base::visitAddrSpaceCastInst(ASC);
1112 }
1113
1114 void visitGetElementPtrInst(GetElementPtrInst &GEPI) {
1115 if (GEPI.use_empty())
1116 return markAsDead(GEPI);
1117
1118 return Base::visitGetElementPtrInst(GEPI);
1119 }
1120
1121 void handleLoadOrStore(Type *Ty, Instruction &I, const APInt &Offset,
1122 uint64_t Size, bool IsVolatile) {
1123 // We allow splitting of non-volatile loads and stores where the type is an
1124 // integer type. These may be used to implement 'memcpy' or other "transfer
1125 // of bits" patterns.
1126 bool IsSplittable =
1127 Ty->isIntegerTy() && !IsVolatile && DL.typeSizeEqualsStoreSize(Ty);
1128
1129 insertUse(I, Offset, Size, IsSplittable);
1130 }
1131
1132 void visitLoadInst(LoadInst &LI) {
1133 assert((!LI.isSimple() || LI.getType()->isSingleValueType()) &&
1134 "All simple FCA loads should have been pre-split");
1135
1136 // If there is a load with an unknown offset, we can still perform store
1137 // to load forwarding for other known-offset loads.
1138 if (!IsOffsetKnown)
1139 return PI.setEscapedReadOnly(&LI);
1140
1141 TypeSize Size = DL.getTypeStoreSize(LI.getType());
1142 if (Size.isScalable()) {
1143 unsigned VScale = LI.getFunction()->getVScaleValue();
1144 if (!VScale)
1145 return PI.setAborted(&LI);
1146
1147 Size = TypeSize::getFixed(Size.getKnownMinValue() * VScale);
1148 }
1149
1150 return handleLoadOrStore(LI.getType(), LI, Offset, Size.getFixedValue(),
1151 LI.isVolatile());
1152 }
1153
1154 void visitStoreInst(StoreInst &SI) {
1155 Value *ValOp = SI.getValueOperand();
1156 if (ValOp == *U)
1157 return PI.setEscapedAndAborted(&SI);
1158 if (!IsOffsetKnown)
1159 return PI.setAborted(&SI);
1160
1161 TypeSize StoreSize = DL.getTypeStoreSize(ValOp->getType());
1162 if (StoreSize.isScalable()) {
1163 unsigned VScale = SI.getFunction()->getVScaleValue();
1164 if (!VScale)
1165 return PI.setAborted(&SI);
1166
1167 StoreSize = TypeSize::getFixed(StoreSize.getKnownMinValue() * VScale);
1168 }
1169
1170 uint64_t Size = StoreSize.getFixedValue();
1171
1172 // If this memory access can be shown to *statically* extend outside the
1173 // bounds of the allocation, it's behavior is undefined, so simply
1174 // ignore it. Note that this is more strict than the generic clamping
1175 // behavior of insertUse. We also try to handle cases which might run the
1176 // risk of overflow.
1177 // FIXME: We should instead consider the pointer to have escaped if this
1178 // function is being instrumented for addressing bugs or race conditions.
1179 if (Size > AllocSize || Offset.ugt(AllocSize - Size)) {
1180 LLVM_DEBUG(dbgs() << "WARNING: Ignoring " << Size << " byte store @"
1181 << Offset << " which extends past the end of the "
1182 << AllocSize << " byte alloca:\n"
1183 << " alloca: " << AS.AI << "\n"
1184 << " use: " << SI << "\n");
1185 return markAsDead(SI);
1186 }
1187
1188 assert((!SI.isSimple() || ValOp->getType()->isSingleValueType()) &&
1189 "All simple FCA stores should have been pre-split");
1190 handleLoadOrStore(ValOp->getType(), SI, Offset, Size, SI.isVolatile());
1191 }
1192
1193 void visitMemSetInst(MemSetInst &II) {
1194 assert(II.getRawDest() == *U && "Pointer use is not the destination?");
1195 ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength());
1196 if ((Length && Length->getValue() == 0) ||
1197 (IsOffsetKnown && Offset.uge(AllocSize)))
1198 // Zero-length mem transfer intrinsics can be ignored entirely.
1199 return markAsDead(II);
1200
1201 if (!IsOffsetKnown)
1202 return PI.setAborted(&II);
1203
1204 insertUse(II, Offset,
1205 Length ? Length->getLimitedValue()
1206 : AllocSize - Offset.getLimitedValue(),
1207 (bool)Length);
1208 }
1209
1210 void visitMemTransferInst(MemTransferInst &II) {
1211 ConstantInt *Length = dyn_cast<ConstantInt>(II.getLength());
1212 if (Length && Length->getValue() == 0)
1213 // Zero-length mem transfer intrinsics can be ignored entirely.
1214 return markAsDead(II);
1215
1216 // Because we can visit these intrinsics twice, also check to see if the
1217 // first time marked this instruction as dead. If so, skip it.
1218 if (VisitedDeadInsts.count(&II))
1219 return;
1220
1221 if (!IsOffsetKnown)
1222 return PI.setAborted(&II);
1223
1224 // This side of the transfer is completely out-of-bounds, and so we can
1225 // nuke the entire transfer. However, we also need to nuke the other side
1226 // if already added to our partitions.
1227 // FIXME: Yet another place we really should bypass this when
1228 // instrumenting for ASan.
1229 if (Offset.uge(AllocSize)) {
1230 SmallDenseMap<Instruction *, unsigned>::iterator MTPI =
1231 MemTransferSliceMap.find(&II);
1232 if (MTPI != MemTransferSliceMap.end())
1233 AS.Slices[MTPI->second].kill();
1234 return markAsDead(II);
1235 }
1236
1237 uint64_t RawOffset = Offset.getLimitedValue();
1238 uint64_t Size = Length ? Length->getLimitedValue() : AllocSize - RawOffset;
1239
1240 // Check for the special case where the same exact value is used for both
1241 // source and dest.
1242 if (*U == II.getRawDest() && *U == II.getRawSource()) {
1243 // For non-volatile transfers this is a no-op.
1244 if (!II.isVolatile())
1245 return markAsDead(II);
1246
1247 return insertUse(II, Offset, Size, /*IsSplittable=*/false);
1248 }
1249
1250 // If we have seen both source and destination for a mem transfer, then
1251 // they both point to the same alloca.
1252 bool Inserted;
1253 SmallDenseMap<Instruction *, unsigned>::iterator MTPI;
1254 std::tie(MTPI, Inserted) =
1255 MemTransferSliceMap.insert(std::make_pair(&II, AS.Slices.size()));
1256 unsigned PrevIdx = MTPI->second;
1257 if (!Inserted) {
1258 Slice &PrevP = AS.Slices[PrevIdx];
1259
1260 // Check if the begin offsets match and this is a non-volatile transfer.
1261 // In that case, we can completely elide the transfer.
1262 if (!II.isVolatile() && PrevP.beginOffset() == RawOffset) {
1263 PrevP.kill();
1264 return markAsDead(II);
1265 }
1266
1267 // Otherwise we have an offset transfer within the same alloca. We can't
1268 // split those.
1269 PrevP.makeUnsplittable();
1270 }
1271
1272 // Insert the use now that we've fixed up the splittable nature.
1273 insertUse(II, Offset, Size, /*IsSplittable=*/Inserted && Length);
1274
1275 // Check that we ended up with a valid index in the map.
1276 assert(AS.Slices[PrevIdx].getUse()->getUser() == &II &&
1277 "Map index doesn't point back to a slice with this user.");
1278 }
1279
1280 // Disable SRoA for any intrinsics except for lifetime invariants.
1281 // FIXME: What about debug intrinsics? This matches old behavior, but
1282 // doesn't make sense.
1283 void visitIntrinsicInst(IntrinsicInst &II) {
1284 if (II.isDroppable()) {
1285 AS.DeadUseIfPromotable.push_back(U);
1286 return;
1287 }
1288
1289 if (!IsOffsetKnown)
1290 return PI.setAborted(&II);
1291
1292 if (II.isLifetimeStartOrEnd()) {
1293 insertUse(II, Offset, AllocSize, true);
1294 return;
1295 }
1296
1297 if (II.getIntrinsicID() == Intrinsic::protected_field_ptr) {
1298 // We only handle loads and stores as users of llvm.protected.field.ptr.
1299 // Other uses may add items to the worklist, which will cause
1300 // ProtectedFieldDisc to be tracked incorrectly.
1301 AS.PFPUsers.push_back(&II);
1302 ProtectedFieldDisc = II.getArgOperand(1);
1303 for (Use &U : II.uses()) {
1304 this->U = &U;
1305 if (auto *LI = dyn_cast<LoadInst>(U.getUser()))
1306 visitLoadInst(*LI);
1307 else if (auto *SI = dyn_cast<StoreInst>(U.getUser()))
1308 visitStoreInst(*SI);
1309 else
1310 PI.setAborted(&II);
1311 if (PI.isAborted())
1312 break;
1313 }
1314 ProtectedFieldDisc = nullptr;
1315 return;
1316 }
1317
1318 Base::visitIntrinsicInst(II);
1319 }
1320
1321 Instruction *hasUnsafePHIOrSelectUse(Instruction *Root, uint64_t &Size) {
1322 // We consider any PHI or select that results in a direct load or store of
1323 // the same offset to be a viable use for slicing purposes. These uses
1324 // are considered unsplittable and the size is the maximum loaded or stored
1325 // size.
1326 SmallPtrSet<Instruction *, 4> Visited;
1328 Visited.insert(Root);
1329 Uses.push_back(std::make_pair(cast<Instruction>(*U), Root));
1330 const DataLayout &DL = Root->getDataLayout();
1331 // If there are no loads or stores, the access is dead. We mark that as
1332 // a size zero access.
1333 Size = 0;
1334 do {
1335 Instruction *I, *UsedI;
1336 std::tie(UsedI, I) = Uses.pop_back_val();
1337
1338 if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
1339 TypeSize LoadSize = DL.getTypeStoreSize(LI->getType());
1340 if (LoadSize.isScalable()) {
1341 PI.setAborted(LI);
1342 return nullptr;
1343 }
1344 Size = std::max(Size, LoadSize.getFixedValue());
1345 continue;
1346 }
1347 if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
1348 Value *Op = SI->getOperand(0);
1349 if (Op == UsedI)
1350 return SI;
1351 TypeSize StoreSize = DL.getTypeStoreSize(Op->getType());
1352 if (StoreSize.isScalable()) {
1353 PI.setAborted(SI);
1354 return nullptr;
1355 }
1356 Size = std::max(Size, StoreSize.getFixedValue());
1357 continue;
1358 }
1359
1360 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
1361 if (!GEP->hasAllZeroIndices())
1362 return GEP;
1363 } else if (!isa<BitCastInst>(I) && !isa<PHINode>(I) &&
1365 return I;
1366 }
1367
1368 for (User *U : I->users())
1369 if (Visited.insert(cast<Instruction>(U)).second)
1370 Uses.push_back(std::make_pair(I, cast<Instruction>(U)));
1371 } while (!Uses.empty());
1372
1373 return nullptr;
1374 }
1375
1376 void visitPHINodeOrSelectInst(Instruction &I) {
1378 if (I.use_empty())
1379 return markAsDead(I);
1380
1381 // If this is a PHI node before a catchswitch, we cannot insert any non-PHI
1382 // instructions in this BB, which may be required during rewriting. Bail out
1383 // on these cases.
1384 if (isa<PHINode>(I) && !I.getParent()->hasInsertionPt())
1385 return PI.setAborted(&I);
1386
1387 // TODO: We could use simplifyInstruction here to fold PHINodes and
1388 // SelectInsts. However, doing so requires to change the current
1389 // dead-operand-tracking mechanism. For instance, suppose neither loading
1390 // from %U nor %other traps. Then "load (select undef, %U, %other)" does not
1391 // trap either. However, if we simply replace %U with undef using the
1392 // current dead-operand-tracking mechanism, "load (select undef, undef,
1393 // %other)" may trap because the select may return the first operand
1394 // "undef".
1395 if (Value *Result = foldPHINodeOrSelectInst(I)) {
1396 if (Result == *U)
1397 // If the result of the constant fold will be the pointer, recurse
1398 // through the PHI/select as if we had RAUW'ed it.
1399 enqueueUsers(I);
1400 else
1401 // Otherwise the operand to the PHI/select is dead, and we can replace
1402 // it with poison.
1403 AS.DeadOperands.push_back(U);
1404
1405 return;
1406 }
1407
1408 if (!IsOffsetKnown)
1409 return PI.setAborted(&I);
1410
1411 // See if we already have computed info on this node.
1412 uint64_t &Size = PHIOrSelectSizes[&I];
1413 if (!Size) {
1414 // This is a new PHI/Select, check for an unsafe use of it.
1415 if (Instruction *UnsafeI = hasUnsafePHIOrSelectUse(&I, Size))
1416 return PI.setAborted(UnsafeI);
1417 }
1418
1419 // For PHI and select operands outside the alloca, we can't nuke the entire
1420 // phi or select -- the other side might still be relevant, so we special
1421 // case them here and use a separate structure to track the operands
1422 // themselves which should be replaced with poison.
1423 // FIXME: This should instead be escaped in the event we're instrumenting
1424 // for address sanitization.
1425 if (Offset.uge(AllocSize)) {
1426 AS.DeadOperands.push_back(U);
1427 return;
1428 }
1429
1430 insertUse(I, Offset, Size);
1431 }
1432
1433 void visitPHINode(PHINode &PN) { visitPHINodeOrSelectInst(PN); }
1434
1435 void visitSelectInst(SelectInst &SI) { visitPHINodeOrSelectInst(SI); }
1436
1437 /// Disable SROA entirely if there are unhandled users of the alloca.
1438 void visitInstruction(Instruction &I) { PI.setAborted(&I); }
1439
1440 void visitCallBase(CallBase &CB) {
1441 // If the call operand is read-only and only does a read-only or address
1442 // capture, then we mark it as EscapedReadOnly.
1443 if (CB.isDataOperand(U) &&
1444 !capturesFullProvenance(CB.getCaptureInfo(U->getOperandNo())) &&
1445 CB.onlyReadsMemory(U->getOperandNo())) {
1446 PI.setEscapedReadOnly(&CB);
1447 return;
1448 }
1449
1450 Base::visitCallBase(CB);
1451 }
1452};
1453
1454AllocaSlices::AllocaSlices(const DataLayout &DL, AllocaInst &AI)
1455 :
1456#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1457 AI(AI),
1458#endif
1459 PointerEscapingInstr(nullptr), PointerEscapingInstrReadOnly(nullptr) {
1460 SliceBuilder PB(DL, AI, *this);
1461 SliceBuilder::PtrInfo PtrI = PB.visitPtr(AI);
1462 if (PtrI.isEscaped() || PtrI.isAborted()) {
1463 // FIXME: We should sink the escape vs. abort info into the caller nicely,
1464 // possibly by just storing the PtrInfo in the AllocaSlices.
1465 PointerEscapingInstr = PtrI.getEscapingInst() ? PtrI.getEscapingInst()
1466 : PtrI.getAbortingInst();
1467 assert(PointerEscapingInstr && "Did not track a bad instruction");
1468 return;
1469 }
1470 PointerEscapingInstrReadOnly = PtrI.getEscapedReadOnlyInst();
1471
1472 llvm::erase_if(Slices, [](const Slice &S) { return S.isDead(); });
1473
1474 // Sort the uses. This arranges for the offsets to be in ascending order,
1475 // and the sizes to be in descending order.
1476 llvm::stable_sort(Slices);
1477}
1478
1479#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1480
1481void AllocaSlices::print(raw_ostream &OS, const_iterator I,
1482 StringRef Indent) const {
1483 printSlice(OS, I, Indent);
1484 OS << "\n";
1485 printUse(OS, I, Indent);
1486}
1487
1488void AllocaSlices::printSlice(raw_ostream &OS, const_iterator I,
1489 StringRef Indent) const {
1490 OS << Indent << "[" << I->beginOffset() << "," << I->endOffset() << ")"
1491 << " slice #" << (I - begin())
1492 << (I->isSplittable() ? " (splittable)" : "");
1493}
1494
1495void AllocaSlices::printUse(raw_ostream &OS, const_iterator I,
1496 StringRef Indent) const {
1497 OS << Indent << " used by: " << *I->getUse()->getUser() << "\n";
1498}
1499
1500void AllocaSlices::print(raw_ostream &OS) const {
1501 if (PointerEscapingInstr) {
1502 OS << "Can't analyze slices for alloca: " << AI << "\n"
1503 << " A pointer to this alloca escaped by:\n"
1504 << " " << *PointerEscapingInstr << "\n";
1505 return;
1506 }
1507
1508 if (PointerEscapingInstrReadOnly)
1509 OS << "Escapes into ReadOnly: " << *PointerEscapingInstrReadOnly << "\n";
1510
1511 OS << "Slices of alloca: " << AI << "\n";
1512 for (const_iterator I = begin(), E = end(); I != E; ++I)
1513 print(OS, I);
1514}
1515
1516LLVM_DUMP_METHOD void AllocaSlices::dump(const_iterator I) const {
1517 print(dbgs(), I);
1518}
1519LLVM_DUMP_METHOD void AllocaSlices::dump() const { print(dbgs()); }
1520
1521#endif // !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1522
1523/// Walk the range of a partitioning looking for a common type to cover this
1524/// sequence of slices.
1525static std::pair<Type *, IntegerType *>
1526findCommonType(AllocaSlices::const_iterator B, AllocaSlices::const_iterator E,
1527 uint64_t EndOffset) {
1528 Type *Ty = nullptr;
1529 bool TyIsCommon = true;
1530 IntegerType *ITy = nullptr;
1531
1532 // Note that we need to look at *every* alloca slice's Use to ensure we
1533 // always get consistent results regardless of the order of slices.
1534 for (AllocaSlices::const_iterator I = B; I != E; ++I) {
1535 Use *U = I->getUse();
1536 if (isa<IntrinsicInst>(*U->getUser()))
1537 continue;
1538 if (I->beginOffset() != B->beginOffset() || I->endOffset() != EndOffset)
1539 continue;
1540
1541 Type *UserTy = nullptr;
1542 if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
1543 UserTy = LI->getType();
1544 } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
1545 UserTy = SI->getValueOperand()->getType();
1546 }
1547
1548 if (IntegerType *UserITy = dyn_cast_or_null<IntegerType>(UserTy)) {
1549 // If the type is larger than the partition, skip it. We only encounter
1550 // this for split integer operations where we want to use the type of the
1551 // entity causing the split. Also skip if the type is not a byte width
1552 // multiple.
1553 if (UserITy->getBitWidth() % 8 != 0 ||
1554 UserITy->getBitWidth() / 8 > (EndOffset - B->beginOffset()))
1555 continue;
1556
1557 // Track the largest bitwidth integer type used in this way in case there
1558 // is no common type.
1559 if (!ITy || ITy->getBitWidth() < UserITy->getBitWidth())
1560 ITy = UserITy;
1561 }
1562
1563 // To avoid depending on the order of slices, Ty and TyIsCommon must not
1564 // depend on types skipped above.
1565 if (!UserTy || (Ty && Ty != UserTy))
1566 TyIsCommon = false; // Give up on anything but an iN type.
1567 else
1568 Ty = UserTy;
1569 }
1570
1571 return {TyIsCommon ? Ty : nullptr, ITy};
1572}
1573
1574/// PHI instructions that use an alloca and are subsequently loaded can be
1575/// rewritten to load both input pointers in the pred blocks and then PHI the
1576/// results, allowing the load of the alloca to be promoted.
1577/// From this:
1578/// %P2 = phi [i32* %Alloca, i32* %Other]
1579/// %V = load i32* %P2
1580/// to:
1581/// %V1 = load i32* %Alloca -> will be mem2reg'd
1582/// ...
1583/// %V2 = load i32* %Other
1584/// ...
1585/// %V = phi [i32 %V1, i32 %V2]
1586///
1587/// We can do this to a select if its only uses are loads and if the operands
1588/// to the select can be loaded unconditionally.
1589///
1590/// FIXME: This should be hoisted into a generic utility, likely in
1591/// Transforms/Util/Local.h
1593 const DataLayout &DL = PN.getDataLayout();
1594
1595 // For now, we can only do this promotion if the load is in the same block
1596 // as the PHI, and if there are no stores between the phi and load.
1597 // TODO: Allow recursive phi users.
1598 // TODO: Allow stores.
1599 BasicBlock *BB = PN.getParent();
1600 Align MaxAlign;
1601 uint64_t APWidth = DL.getIndexTypeSizeInBits(PN.getType());
1602 Type *LoadType = nullptr;
1603 for (User *U : PN.users()) {
1605 if (!LI || !LI->isSimple())
1606 return false;
1607
1608 // For now we only allow loads in the same block as the PHI. This is
1609 // a common case that happens when instcombine merges two loads through
1610 // a PHI.
1611 if (LI->getParent() != BB)
1612 return false;
1613
1614 if (LoadType) {
1615 if (LoadType != LI->getType())
1616 return false;
1617 } else {
1618 LoadType = LI->getType();
1619 }
1620
1621 // Ensure that there are no instructions between the PHI and the load that
1622 // could store.
1623 for (BasicBlock::iterator BBI(PN); &*BBI != LI; ++BBI)
1624 if (BBI->mayWriteToMemory())
1625 return false;
1626
1627 MaxAlign = std::max(MaxAlign, LI->getAlign());
1628 }
1629
1630 if (!LoadType)
1631 return false;
1632
1633 APInt LoadSize =
1634 APInt(APWidth, DL.getTypeStoreSize(LoadType).getFixedValue());
1635
1636 // We can only transform this if it is safe to push the loads into the
1637 // predecessor blocks. The only thing to watch out for is that we can't put
1638 // a possibly trapping load in the predecessor if it is a critical edge.
1639 for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) {
1641 Value *InVal = PN.getIncomingValue(Idx);
1642
1643 // If the value is produced by the terminator of the predecessor (an
1644 // invoke) or it has side-effects, there is no valid place to put a load
1645 // in the predecessor.
1646 if (TI == InVal || TI->mayHaveSideEffects())
1647 return false;
1648
1649 // If the predecessor has a single successor, then the edge isn't
1650 // critical.
1651 if (TI->getNumSuccessors() == 1)
1652 continue;
1653
1654 // If this pointer is always safe to load, or if we can prove that there
1655 // is already a load in the block, then we can move the load to the pred
1656 // block.
1657 if (isSafeToLoadUnconditionally(InVal, MaxAlign, LoadSize, DL, TI))
1658 continue;
1659
1660 return false;
1661 }
1662
1663 return true;
1664}
1665
1666static void speculatePHINodeLoads(IRBuilderTy &IRB, PHINode &PN) {
1667 LLVM_DEBUG(dbgs() << " original: " << PN << "\n");
1668
1669 LoadInst *SomeLoad = cast<LoadInst>(PN.user_back());
1670 Type *LoadTy = SomeLoad->getType();
1671 IRB.SetInsertPoint(&PN);
1672 PHINode *NewPN = IRB.CreatePHI(LoadTy, PN.getNumIncomingValues(),
1673 PN.getName() + ".sroa.speculated");
1674
1675 // Get the AA tags and alignment to use from one of the loads. It does not
1676 // matter which one we get and if any differ.
1677 AAMDNodes AATags = SomeLoad->getAAMetadata();
1678 Align Alignment = SomeLoad->getAlign();
1679
1680 // Rewrite all loads of the PN to use the new PHI.
1681 while (!PN.use_empty()) {
1682 LoadInst *LI = cast<LoadInst>(PN.user_back());
1683 LI->replaceAllUsesWith(NewPN);
1684 LI->eraseFromParent();
1685 }
1686
1687 // Inject loads into all of the pred blocks.
1688 DenseMap<BasicBlock *, Value *> InjectedLoads;
1689 for (unsigned Idx = 0, Num = PN.getNumIncomingValues(); Idx != Num; ++Idx) {
1690 BasicBlock *Pred = PN.getIncomingBlock(Idx);
1691 Value *InVal = PN.getIncomingValue(Idx);
1692
1693 // A PHI node is allowed to have multiple (duplicated) entries for the same
1694 // basic block, as long as the value is the same. So if we already injected
1695 // a load in the predecessor, then we should reuse the same load for all
1696 // duplicated entries.
1697 if (Value *V = InjectedLoads.lookup(Pred)) {
1698 NewPN->addIncoming(V, Pred);
1699 continue;
1700 }
1701
1702 Instruction *TI = Pred->getTerminator();
1703 IRB.SetInsertPoint(TI);
1704
1705 LoadInst *Load = IRB.CreateAlignedLoad(
1706 LoadTy, InVal, Alignment,
1707 (PN.getName() + ".sroa.speculate.load." + Pred->getName()));
1708 ++NumLoadsSpeculated;
1709 if (AATags)
1710 Load->setAAMetadata(AATags);
1711 NewPN->addIncoming(Load, Pred);
1712 InjectedLoads[Pred] = Load;
1713 }
1714
1715 LLVM_DEBUG(dbgs() << " speculated to: " << *NewPN << "\n");
1716 PN.eraseFromParent();
1717}
1718
1719SelectHandSpeculativity &
1720SelectHandSpeculativity::setAsSpeculatable(bool isTrueVal) {
1721 if (isTrueVal)
1723 else
1725 return *this;
1726}
1727
1728bool SelectHandSpeculativity::isSpeculatable(bool isTrueVal) const {
1729 return isTrueVal ? Bitfield::get<SelectHandSpeculativity::TrueVal>(Storage)
1730 : Bitfield::get<SelectHandSpeculativity::FalseVal>(Storage);
1731}
1732
1733bool SelectHandSpeculativity::areAllSpeculatable() const {
1734 return isSpeculatable(/*isTrueVal=*/true) &&
1735 isSpeculatable(/*isTrueVal=*/false);
1736}
1737
1738bool SelectHandSpeculativity::areAnySpeculatable() const {
1739 return isSpeculatable(/*isTrueVal=*/true) ||
1740 isSpeculatable(/*isTrueVal=*/false);
1741}
1742bool SelectHandSpeculativity::areNoneSpeculatable() const {
1743 return !areAnySpeculatable();
1744}
1745
1746static SelectHandSpeculativity
1748 assert(LI.isSimple() && "Only for simple loads");
1749 SelectHandSpeculativity Spec;
1750
1751 const DataLayout &DL = SI.getDataLayout();
1752 for (Value *Value : {SI.getTrueValue(), SI.getFalseValue()})
1754 &LI))
1755 Spec.setAsSpeculatable(/*isTrueVal=*/Value == SI.getTrueValue());
1756 else if (PreserveCFG)
1757 return Spec;
1758
1759 return Spec;
1760}
1761
1762std::optional<RewriteableMemOps>
1763SROA::isSafeSelectToSpeculate(SelectInst &SI, bool PreserveCFG) {
1764 RewriteableMemOps Ops;
1765
1766 for (User *U : SI.users()) {
1767 if (auto *BC = dyn_cast<BitCastInst>(U); BC && BC->hasOneUse())
1768 U = *BC->user_begin();
1769
1770 if (auto *Store = dyn_cast<StoreInst>(U)) {
1771 // Note that atomic stores can be transformed; atomic semantics do not
1772 // have any meaning for a local alloca. Stores are not speculatable,
1773 // however, so if we can't turn it into a predicated store, we are done.
1774 if (Store->isVolatile() || PreserveCFG)
1775 return {}; // Give up on this `select`.
1776 Ops.emplace_back(Store);
1777 continue;
1778 }
1779
1780 auto *LI = dyn_cast<LoadInst>(U);
1781
1782 // Note that atomic loads can be transformed;
1783 // atomic semantics do not have any meaning for a local alloca.
1784 if (!LI || LI->isVolatile())
1785 return {}; // Give up on this `select`.
1786
1787 PossiblySpeculatableLoad Load(LI);
1788 if (!LI->isSimple()) {
1789 // If the `load` is not simple, we can't speculatively execute it,
1790 // but we could handle this via a CFG modification. But can we?
1791 if (PreserveCFG)
1792 return {}; // Give up on this `select`.
1793 Ops.emplace_back(Load);
1794 continue;
1795 }
1796
1797 SelectHandSpeculativity Spec =
1799 if (PreserveCFG && !Spec.areAllSpeculatable())
1800 return {}; // Give up on this `select`.
1801
1802 Load.setInt(Spec);
1803 Ops.emplace_back(Load);
1804 }
1805
1806 return Ops;
1807}
1808
1810 IRBuilderTy &IRB) {
1811 LLVM_DEBUG(dbgs() << " original load: " << SI << "\n");
1812
1813 Value *TV = SI.getTrueValue();
1814 Value *FV = SI.getFalseValue();
1815 // Replace the given load of the select with a select of two loads.
1816
1817 assert(LI.isSimple() && "We only speculate simple loads");
1818
1819 IRB.SetInsertPoint(&LI);
1820
1821 LoadInst *TL =
1822 IRB.CreateAlignedLoad(LI.getType(), TV, LI.getAlign(),
1823 LI.getName() + ".sroa.speculate.load.true");
1824 LoadInst *FL =
1825 IRB.CreateAlignedLoad(LI.getType(), FV, LI.getAlign(),
1826 LI.getName() + ".sroa.speculate.load.false");
1827 NumLoadsSpeculated += 2;
1828
1829 // Transfer alignment and AA info if present.
1830 TL->setAlignment(LI.getAlign());
1831 FL->setAlignment(LI.getAlign());
1832
1833 AAMDNodes Tags = LI.getAAMetadata();
1834 if (Tags) {
1835 TL->setAAMetadata(Tags);
1836 FL->setAAMetadata(Tags);
1837 }
1838
1839 Value *V = IRB.CreateSelect(SI.getCondition(), TL, FL,
1840 LI.getName() + ".sroa.speculated",
1841 ProfcheckDisableMetadataFixes ? nullptr : &SI);
1842
1843 LLVM_DEBUG(dbgs() << " speculated to: " << *V << "\n");
1844 LI.replaceAllUsesWith(V);
1845}
1846
1847template <typename T>
1849 SelectHandSpeculativity Spec,
1850 DomTreeUpdater &DTU) {
1851 assert((isa<LoadInst>(I) || isa<StoreInst>(I)) && "Only for load and store!");
1852 LLVM_DEBUG(dbgs() << " original mem op: " << I << "\n");
1853 BasicBlock *Head = I.getParent();
1854 Instruction *ThenTerm = nullptr;
1855 Instruction *ElseTerm = nullptr;
1856 if (Spec.areNoneSpeculatable())
1857 SplitBlockAndInsertIfThenElse(SI.getCondition(), &I, &ThenTerm, &ElseTerm,
1858 SI.getMetadata(LLVMContext::MD_prof), &DTU);
1859 else {
1860 SplitBlockAndInsertIfThen(SI.getCondition(), &I, /*Unreachable=*/false,
1861 SI.getMetadata(LLVMContext::MD_prof), &DTU,
1862 /*LI=*/nullptr, /*ThenBlock=*/nullptr);
1863 if (Spec.isSpeculatable(/*isTrueVal=*/true))
1864 cast<BranchInst>(Head->getTerminator())->swapSuccessors();
1865 }
1866 auto *HeadBI = cast<BranchInst>(Head->getTerminator());
1867 Spec = {}; // Do not use `Spec` beyond this point.
1868 BasicBlock *Tail = I.getParent();
1869 Tail->setName(Head->getName() + ".cont");
1870 PHINode *PN;
1871 if (isa<LoadInst>(I))
1872 PN = PHINode::Create(I.getType(), 2, "", I.getIterator());
1873 for (BasicBlock *SuccBB : successors(Head)) {
1874 bool IsThen = SuccBB == HeadBI->getSuccessor(0);
1875 int SuccIdx = IsThen ? 0 : 1;
1876 auto *NewMemOpBB = SuccBB == Tail ? Head : SuccBB;
1877 auto &CondMemOp = cast<T>(*I.clone());
1878 if (NewMemOpBB != Head) {
1879 NewMemOpBB->setName(Head->getName() + (IsThen ? ".then" : ".else"));
1880 if (isa<LoadInst>(I))
1881 ++NumLoadsPredicated;
1882 else
1883 ++NumStoresPredicated;
1884 } else {
1885 CondMemOp.dropUBImplyingAttrsAndMetadata();
1886 ++NumLoadsSpeculated;
1887 }
1888 CondMemOp.insertBefore(NewMemOpBB->getTerminator()->getIterator());
1889 Value *Ptr = SI.getOperand(1 + SuccIdx);
1890 CondMemOp.setOperand(I.getPointerOperandIndex(), Ptr);
1891 if (isa<LoadInst>(I)) {
1892 CondMemOp.setName(I.getName() + (IsThen ? ".then" : ".else") + ".val");
1893 PN->addIncoming(&CondMemOp, NewMemOpBB);
1894 } else
1895 LLVM_DEBUG(dbgs() << " to: " << CondMemOp << "\n");
1896 }
1897 if (isa<LoadInst>(I)) {
1898 PN->takeName(&I);
1899 LLVM_DEBUG(dbgs() << " to: " << *PN << "\n");
1900 I.replaceAllUsesWith(PN);
1901 }
1902}
1903
1905 SelectHandSpeculativity Spec,
1906 DomTreeUpdater &DTU) {
1907 if (auto *LI = dyn_cast<LoadInst>(&I))
1908 rewriteMemOpOfSelect(SelInst, *LI, Spec, DTU);
1909 else if (auto *SI = dyn_cast<StoreInst>(&I))
1910 rewriteMemOpOfSelect(SelInst, *SI, Spec, DTU);
1911 else
1912 llvm_unreachable_internal("Only for load and store.");
1913}
1914
1916 const RewriteableMemOps &Ops,
1917 IRBuilderTy &IRB, DomTreeUpdater *DTU) {
1918 bool CFGChanged = false;
1919 LLVM_DEBUG(dbgs() << " original select: " << SI << "\n");
1920
1921 for (const RewriteableMemOp &Op : Ops) {
1922 SelectHandSpeculativity Spec;
1923 Instruction *I;
1924 if (auto *const *US = std::get_if<UnspeculatableStore>(&Op)) {
1925 I = *US;
1926 } else {
1927 auto PSL = std::get<PossiblySpeculatableLoad>(Op);
1928 I = PSL.getPointer();
1929 Spec = PSL.getInt();
1930 }
1931 if (Spec.areAllSpeculatable()) {
1933 } else {
1934 assert(DTU && "Should not get here when not allowed to modify the CFG!");
1935 rewriteMemOpOfSelect(SI, *I, Spec, *DTU);
1936 CFGChanged = true;
1937 }
1938 I->eraseFromParent();
1939 }
1940
1941 for (User *U : make_early_inc_range(SI.users()))
1942 cast<BitCastInst>(U)->eraseFromParent();
1943 SI.eraseFromParent();
1944 return CFGChanged;
1945}
1946
1947/// Compute an adjusted pointer from Ptr by Offset bytes where the
1948/// resulting pointer has PointerTy.
1949static Value *getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr,
1951 const Twine &NamePrefix) {
1952 if (Offset != 0)
1953 Ptr = IRB.CreateInBoundsPtrAdd(Ptr, IRB.getInt(Offset),
1954 NamePrefix + "sroa_idx");
1955 return IRB.CreatePointerBitCastOrAddrSpaceCast(Ptr, PointerTy,
1956 NamePrefix + "sroa_cast");
1957}
1958
1959/// Compute the adjusted alignment for a load or store from an offset.
1963
1964/// Test whether we can convert a value from the old to the new type.
1965///
1966/// This predicate should be used to guard calls to convertValue in order to
1967/// ensure that we only try to convert viable values. The strategy is that we
1968/// will peel off single element struct and array wrappings to get to an
1969/// underlying value, and convert that value.
1970static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy,
1971 unsigned VScale = 0) {
1972 if (OldTy == NewTy)
1973 return true;
1974
1975 // For integer types, we can't handle any bit-width differences. This would
1976 // break both vector conversions with extension and introduce endianness
1977 // issues when in conjunction with loads and stores.
1978 if (isa<IntegerType>(OldTy) && isa<IntegerType>(NewTy)) {
1980 cast<IntegerType>(NewTy)->getBitWidth() &&
1981 "We can't have the same bitwidth for different int types");
1982 return false;
1983 }
1984
1985 TypeSize NewSize = DL.getTypeSizeInBits(NewTy);
1986 TypeSize OldSize = DL.getTypeSizeInBits(OldTy);
1987
1988 if ((isa<ScalableVectorType>(NewTy) && isa<FixedVectorType>(OldTy)) ||
1989 (isa<ScalableVectorType>(OldTy) && isa<FixedVectorType>(NewTy))) {
1990 // Conversion is only possible when the size of scalable vectors is known.
1991 if (!VScale)
1992 return false;
1993
1994 // For ptr-to-int and int-to-ptr casts, the pointer side is resolved within
1995 // a single domain (either fixed or scalable). Any additional conversion
1996 // between fixed and scalable types is handled through integer types.
1997 auto OldVTy = OldTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(OldTy) : OldTy;
1998 auto NewVTy = NewTy->isPtrOrPtrVectorTy() ? DL.getIntPtrType(NewTy) : NewTy;
1999
2000 if (isa<ScalableVectorType>(NewTy)) {
2002 return false;
2003
2004 NewSize = TypeSize::getFixed(NewSize.getKnownMinValue() * VScale);
2005 } else {
2007 return false;
2008
2009 OldSize = TypeSize::getFixed(OldSize.getKnownMinValue() * VScale);
2010 }
2011 }
2012
2013 if (NewSize != OldSize)
2014 return false;
2015 if (!NewTy->isSingleValueType() || !OldTy->isSingleValueType())
2016 return false;
2017
2018 // We can convert pointers to integers and vice-versa. Same for vectors
2019 // of pointers and integers.
2020 OldTy = OldTy->getScalarType();
2021 NewTy = NewTy->getScalarType();
2022 if (NewTy->isPointerTy() || OldTy->isPointerTy()) {
2023 if (NewTy->isPointerTy() && OldTy->isPointerTy()) {
2024 unsigned OldAS = OldTy->getPointerAddressSpace();
2025 unsigned NewAS = NewTy->getPointerAddressSpace();
2026 // Convert pointers if they are pointers from the same address space or
2027 // different integral (not non-integral) address spaces with the same
2028 // pointer size.
2029 return OldAS == NewAS ||
2030 (!DL.isNonIntegralAddressSpace(OldAS) &&
2031 !DL.isNonIntegralAddressSpace(NewAS) &&
2032 DL.getPointerSize(OldAS) == DL.getPointerSize(NewAS));
2033 }
2034
2035 // We can convert integers to integral pointers, but not to non-integral
2036 // pointers.
2037 if (OldTy->isIntegerTy())
2038 return !DL.isNonIntegralPointerType(NewTy);
2039
2040 // We can convert integral pointers to integers, but non-integral pointers
2041 // need to remain pointers.
2042 if (!DL.isNonIntegralPointerType(OldTy))
2043 return NewTy->isIntegerTy();
2044
2045 return false;
2046 }
2047
2048 if (OldTy->isTargetExtTy() || NewTy->isTargetExtTy())
2049 return false;
2050
2051 return true;
2052}
2053
2054/// Generic routine to convert an SSA value to a value of a different
2055/// type.
2056///
2057/// This will try various different casting techniques, such as bitcasts,
2058/// inttoptr, and ptrtoint casts. Use the \c canConvertValue predicate to test
2059/// two types for viability with this routine.
2060static Value *convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V,
2061 Type *NewTy) {
2062 Type *OldTy = V->getType();
2063
2064#ifndef NDEBUG
2065 BasicBlock *BB = IRB.GetInsertBlock();
2066 assert(BB && BB->getParent() && "VScale unknown!");
2067 unsigned VScale = BB->getParent()->getVScaleValue();
2068 assert(canConvertValue(DL, OldTy, NewTy, VScale) &&
2069 "Value not convertable to type");
2070#endif
2071
2072 if (OldTy == NewTy)
2073 return V;
2074
2075 assert(!(isa<IntegerType>(OldTy) && isa<IntegerType>(NewTy)) &&
2076 "Integer types must be the exact same to convert.");
2077
2078 // A variant of bitcast that supports a mixture of fixed and scalable types
2079 // that are know to have the same size.
2080 auto CreateBitCastLike = [&IRB](Value *In, Type *Ty) -> Value * {
2081 Type *InTy = In->getType();
2082 if (InTy == Ty)
2083 return In;
2084
2086 // For vscale_range(2) expand <4 x i32> to <vscale x 4 x i16> -->
2087 // <4 x i32> to <vscale x 2 x i32> to <vscale x 4 x i16>
2089 return IRB.CreateBitCast(IRB.CreateInsertVector(VTy,
2090 PoisonValue::get(VTy), In,
2091 IRB.getInt64(0)),
2092 Ty);
2093 }
2094
2096 // For vscale_range(2) expand <vscale x 4 x i16> to <4 x i32> -->
2097 // <vscale x 4 x i16> to <vscale x 2 x i32> to <4 x i32>
2099 return IRB.CreateExtractVector(Ty, IRB.CreateBitCast(In, VTy),
2100 IRB.getInt64(0));
2101 }
2102
2103 return IRB.CreateBitCast(In, Ty);
2104 };
2105
2106 // See if we need inttoptr for this type pair. May require additional bitcast.
2107 if (OldTy->isIntOrIntVectorTy() && NewTy->isPtrOrPtrVectorTy()) {
2108 // Expand <2 x i32> to i8* --> <2 x i32> to i64 to i8*
2109 // Expand i128 to <2 x i8*> --> i128 to <2 x i64> to <2 x i8*>
2110 // Expand <4 x i32> to <2 x i8*> --> <4 x i32> to <2 x i64> to <2 x i8*>
2111 // Directly handle i64 to i8*
2112 return IRB.CreateIntToPtr(CreateBitCastLike(V, DL.getIntPtrType(NewTy)),
2113 NewTy);
2114 }
2115
2116 // See if we need ptrtoint for this type pair. May require additional bitcast.
2117 if (OldTy->isPtrOrPtrVectorTy() && NewTy->isIntOrIntVectorTy()) {
2118 // Expand <2 x i8*> to i128 --> <2 x i8*> to <2 x i64> to i128
2119 // Expand i8* to <2 x i32> --> i8* to i64 to <2 x i32>
2120 // Expand <2 x i8*> to <4 x i32> --> <2 x i8*> to <2 x i64> to <4 x i32>
2121 // Expand i8* to i64 --> i8* to i64 to i64
2122 return CreateBitCastLike(IRB.CreatePtrToInt(V, DL.getIntPtrType(OldTy)),
2123 NewTy);
2124 }
2125
2126 if (OldTy->isPtrOrPtrVectorTy() && NewTy->isPtrOrPtrVectorTy()) {
2127 unsigned OldAS = OldTy->getPointerAddressSpace();
2128 unsigned NewAS = NewTy->getPointerAddressSpace();
2129 // To convert pointers with different address spaces (they are already
2130 // checked convertible, i.e. they have the same pointer size), so far we
2131 // cannot use `bitcast` (which has restrict on the same address space) or
2132 // `addrspacecast` (which is not always no-op casting). Instead, use a pair
2133 // of no-op `ptrtoint`/`inttoptr` casts through an integer with the same bit
2134 // size.
2135 if (OldAS != NewAS) {
2136 assert(DL.getPointerSize(OldAS) == DL.getPointerSize(NewAS));
2137 return IRB.CreateIntToPtr(
2138 CreateBitCastLike(IRB.CreatePtrToInt(V, DL.getIntPtrType(OldTy)),
2139 DL.getIntPtrType(NewTy)),
2140 NewTy);
2141 }
2142 }
2143
2144 return CreateBitCastLike(V, NewTy);
2145}
2146
2147/// Test whether the given slice use can be promoted to a vector.
2148///
2149/// This function is called to test each entry in a partition which is slated
2150/// for a single slice.
2151static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S,
2152 VectorType *Ty,
2153 uint64_t ElementSize,
2154 const DataLayout &DL,
2155 unsigned VScale) {
2156 // First validate the slice offsets.
2157 uint64_t BeginOffset =
2158 std::max(S.beginOffset(), P.beginOffset()) - P.beginOffset();
2159 uint64_t BeginIndex = BeginOffset / ElementSize;
2160 if (BeginIndex * ElementSize != BeginOffset ||
2161 BeginIndex >= cast<FixedVectorType>(Ty)->getNumElements())
2162 return false;
2163 uint64_t EndOffset = std::min(S.endOffset(), P.endOffset()) - P.beginOffset();
2164 uint64_t EndIndex = EndOffset / ElementSize;
2165 if (EndIndex * ElementSize != EndOffset ||
2166 EndIndex > cast<FixedVectorType>(Ty)->getNumElements())
2167 return false;
2168
2169 assert(EndIndex > BeginIndex && "Empty vector!");
2170 uint64_t NumElements = EndIndex - BeginIndex;
2171 Type *SliceTy = (NumElements == 1)
2172 ? Ty->getElementType()
2173 : FixedVectorType::get(Ty->getElementType(), NumElements);
2174
2175 Type *SplitIntTy =
2176 Type::getIntNTy(Ty->getContext(), NumElements * ElementSize * 8);
2177
2178 Use *U = S.getUse();
2179
2180 if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) {
2181 if (MI->isVolatile())
2182 return false;
2183 if (!S.isSplittable())
2184 return false; // Skip any unsplittable intrinsics.
2185 } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
2186 if (!II->isLifetimeStartOrEnd() && !II->isDroppable())
2187 return false;
2188 } else if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
2189 if (LI->isVolatile())
2190 return false;
2191 Type *LTy = LI->getType();
2192 // Disable vector promotion when there are loads or stores of an FCA.
2193 if (LTy->isStructTy())
2194 return false;
2195 if (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset()) {
2196 assert(LTy->isIntegerTy());
2197 LTy = SplitIntTy;
2198 }
2199 if (!canConvertValue(DL, SliceTy, LTy, VScale))
2200 return false;
2201 } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
2202 if (SI->isVolatile())
2203 return false;
2204 Type *STy = SI->getValueOperand()->getType();
2205 // Disable vector promotion when there are loads or stores of an FCA.
2206 if (STy->isStructTy())
2207 return false;
2208 if (P.beginOffset() > S.beginOffset() || P.endOffset() < S.endOffset()) {
2209 assert(STy->isIntegerTy());
2210 STy = SplitIntTy;
2211 }
2212 if (!canConvertValue(DL, STy, SliceTy, VScale))
2213 return false;
2214 } else {
2215 return false;
2216 }
2217
2218 return true;
2219}
2220
2221/// Test whether any vector type in \p CandidateTys is viable for promotion.
2222///
2223/// This implements the necessary checking for \c isVectorPromotionViable over
2224/// all slices of the alloca for the given VectorType.
2225static VectorType *
2227 SmallVectorImpl<VectorType *> &CandidateTys,
2228 bool HaveCommonEltTy, Type *CommonEltTy,
2229 bool HaveVecPtrTy, bool HaveCommonVecPtrTy,
2230 VectorType *CommonVecPtrTy, unsigned VScale) {
2231 // If we didn't find a vector type, nothing to do here.
2232 if (CandidateTys.empty())
2233 return nullptr;
2234
2235 // Pointer-ness is sticky, if we had a vector-of-pointers candidate type,
2236 // then we should choose it, not some other alternative.
2237 // But, we can't perform a no-op pointer address space change via bitcast,
2238 // so if we didn't have a common pointer element type, bail.
2239 if (HaveVecPtrTy && !HaveCommonVecPtrTy)
2240 return nullptr;
2241
2242 // Try to pick the "best" element type out of the choices.
2243 if (!HaveCommonEltTy && HaveVecPtrTy) {
2244 // If there was a pointer element type, there's really only one choice.
2245 CandidateTys.clear();
2246 CandidateTys.push_back(CommonVecPtrTy);
2247 } else if (!HaveCommonEltTy && !HaveVecPtrTy) {
2248 // Integer-ify vector types.
2249 for (VectorType *&VTy : CandidateTys) {
2250 if (!VTy->getElementType()->isIntegerTy())
2251 VTy = cast<VectorType>(VTy->getWithNewType(IntegerType::getIntNTy(
2252 VTy->getContext(), VTy->getScalarSizeInBits())));
2253 }
2254
2255 // Rank the remaining candidate vector types. This is easy because we know
2256 // they're all integer vectors. We sort by ascending number of elements.
2257 auto RankVectorTypesComp = [&DL](VectorType *RHSTy, VectorType *LHSTy) {
2258 (void)DL;
2259 assert(DL.getTypeSizeInBits(RHSTy).getFixedValue() ==
2260 DL.getTypeSizeInBits(LHSTy).getFixedValue() &&
2261 "Cannot have vector types of different sizes!");
2262 assert(RHSTy->getElementType()->isIntegerTy() &&
2263 "All non-integer types eliminated!");
2264 assert(LHSTy->getElementType()->isIntegerTy() &&
2265 "All non-integer types eliminated!");
2266 return cast<FixedVectorType>(RHSTy)->getNumElements() <
2267 cast<FixedVectorType>(LHSTy)->getNumElements();
2268 };
2269 auto RankVectorTypesEq = [&DL](VectorType *RHSTy, VectorType *LHSTy) {
2270 (void)DL;
2271 assert(DL.getTypeSizeInBits(RHSTy).getFixedValue() ==
2272 DL.getTypeSizeInBits(LHSTy).getFixedValue() &&
2273 "Cannot have vector types of different sizes!");
2274 assert(RHSTy->getElementType()->isIntegerTy() &&
2275 "All non-integer types eliminated!");
2276 assert(LHSTy->getElementType()->isIntegerTy() &&
2277 "All non-integer types eliminated!");
2278 return cast<FixedVectorType>(RHSTy)->getNumElements() ==
2279 cast<FixedVectorType>(LHSTy)->getNumElements();
2280 };
2281 llvm::sort(CandidateTys, RankVectorTypesComp);
2282 CandidateTys.erase(llvm::unique(CandidateTys, RankVectorTypesEq),
2283 CandidateTys.end());
2284 } else {
2285// The only way to have the same element type in every vector type is to
2286// have the same vector type. Check that and remove all but one.
2287#ifndef NDEBUG
2288 for (VectorType *VTy : CandidateTys) {
2289 assert(VTy->getElementType() == CommonEltTy &&
2290 "Unaccounted for element type!");
2291 assert(VTy == CandidateTys[0] &&
2292 "Different vector types with the same element type!");
2293 }
2294#endif
2295 CandidateTys.resize(1);
2296 }
2297
2298 // FIXME: hack. Do we have a named constant for this?
2299 // SDAG SDNode can't have more than 65535 operands.
2300 llvm::erase_if(CandidateTys, [](VectorType *VTy) {
2301 return cast<FixedVectorType>(VTy)->getNumElements() >
2302 std::numeric_limits<unsigned short>::max();
2303 });
2304
2305 // Find a vector type viable for promotion by iterating over all slices.
2306 auto *VTy = llvm::find_if(CandidateTys, [&](VectorType *VTy) -> bool {
2307 uint64_t ElementSize =
2308 DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
2309
2310 // While the definition of LLVM vectors is bitpacked, we don't support sizes
2311 // that aren't byte sized.
2312 if (ElementSize % 8)
2313 return false;
2314 assert((DL.getTypeSizeInBits(VTy).getFixedValue() % 8) == 0 &&
2315 "vector size not a multiple of element size?");
2316 ElementSize /= 8;
2317
2318 for (const Slice &S : P)
2319 if (!isVectorPromotionViableForSlice(P, S, VTy, ElementSize, DL, VScale))
2320 return false;
2321
2322 for (const Slice *S : P.splitSliceTails())
2323 if (!isVectorPromotionViableForSlice(P, *S, VTy, ElementSize, DL, VScale))
2324 return false;
2325
2326 return true;
2327 });
2328 return VTy != CandidateTys.end() ? *VTy : nullptr;
2329}
2330
2332 SetVector<Type *> &OtherTys, ArrayRef<VectorType *> CandidateTysCopy,
2333 function_ref<void(Type *)> CheckCandidateType, Partition &P,
2334 const DataLayout &DL, SmallVectorImpl<VectorType *> &CandidateTys,
2335 bool &HaveCommonEltTy, Type *&CommonEltTy, bool &HaveVecPtrTy,
2336 bool &HaveCommonVecPtrTy, VectorType *&CommonVecPtrTy, unsigned VScale) {
2337 [[maybe_unused]] VectorType *OriginalElt =
2338 CandidateTysCopy.size() ? CandidateTysCopy[0] : nullptr;
2339 // Consider additional vector types where the element type size is a
2340 // multiple of load/store element size.
2341 for (Type *Ty : OtherTys) {
2343 continue;
2344 unsigned TypeSize = DL.getTypeSizeInBits(Ty).getFixedValue();
2345 // Make a copy of CandidateTys and iterate through it, because we
2346 // might append to CandidateTys in the loop.
2347 for (VectorType *const VTy : CandidateTysCopy) {
2348 // The elements in the copy should remain invariant throughout the loop
2349 assert(CandidateTysCopy[0] == OriginalElt && "Different Element");
2350 unsigned VectorSize = DL.getTypeSizeInBits(VTy).getFixedValue();
2351 unsigned ElementSize =
2352 DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
2353 if (TypeSize != VectorSize && TypeSize != ElementSize &&
2354 VectorSize % TypeSize == 0) {
2355 VectorType *NewVTy = VectorType::get(Ty, VectorSize / TypeSize, false);
2356 CheckCandidateType(NewVTy);
2357 }
2358 }
2359 }
2360
2362 P, DL, CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
2363 HaveCommonVecPtrTy, CommonVecPtrTy, VScale);
2364}
2365
2366/// Test whether the given alloca partitioning and range of slices can be
2367/// promoted to a vector.
2368///
2369/// This is a quick test to check whether we can rewrite a particular alloca
2370/// partition (and its newly formed alloca) into a vector alloca with only
2371/// whole-vector loads and stores such that it could be promoted to a vector
2372/// SSA value. We only can ensure this for a limited set of operations, and we
2373/// don't want to do the rewrites unless we are confident that the result will
2374/// be promotable, so we have an early test here.
2376 unsigned VScale) {
2377 // Collect the candidate types for vector-based promotion. Also track whether
2378 // we have different element types.
2379 SmallVector<VectorType *, 4> CandidateTys;
2380 SetVector<Type *> LoadStoreTys;
2381 SetVector<Type *> DeferredTys;
2382 Type *CommonEltTy = nullptr;
2383 VectorType *CommonVecPtrTy = nullptr;
2384 bool HaveVecPtrTy = false;
2385 bool HaveCommonEltTy = true;
2386 bool HaveCommonVecPtrTy = true;
2387 auto CheckCandidateType = [&](Type *Ty) {
2388 if (auto *VTy = dyn_cast<FixedVectorType>(Ty)) {
2389 // Return if bitcast to vectors is different for total size in bits.
2390 if (!CandidateTys.empty()) {
2391 VectorType *V = CandidateTys[0];
2392 if (DL.getTypeSizeInBits(VTy).getFixedValue() !=
2393 DL.getTypeSizeInBits(V).getFixedValue()) {
2394 CandidateTys.clear();
2395 return;
2396 }
2397 }
2398 CandidateTys.push_back(VTy);
2399 Type *EltTy = VTy->getElementType();
2400
2401 if (!CommonEltTy)
2402 CommonEltTy = EltTy;
2403 else if (CommonEltTy != EltTy)
2404 HaveCommonEltTy = false;
2405
2406 if (EltTy->isPointerTy()) {
2407 HaveVecPtrTy = true;
2408 if (!CommonVecPtrTy)
2409 CommonVecPtrTy = VTy;
2410 else if (CommonVecPtrTy != VTy)
2411 HaveCommonVecPtrTy = false;
2412 }
2413 }
2414 };
2415
2416 // Put load and store types into a set for de-duplication.
2417 for (const Slice &S : P) {
2418 Type *Ty;
2419 if (auto *LI = dyn_cast<LoadInst>(S.getUse()->getUser()))
2420 Ty = LI->getType();
2421 else if (auto *SI = dyn_cast<StoreInst>(S.getUse()->getUser()))
2422 Ty = SI->getValueOperand()->getType();
2423 else
2424 continue;
2425
2426 auto CandTy = Ty->getScalarType();
2427 if (CandTy->isPointerTy() && (S.beginOffset() != P.beginOffset() ||
2428 S.endOffset() != P.endOffset())) {
2429 DeferredTys.insert(Ty);
2430 continue;
2431 }
2432
2433 LoadStoreTys.insert(Ty);
2434 // Consider any loads or stores that are the exact size of the slice.
2435 if (S.beginOffset() == P.beginOffset() && S.endOffset() == P.endOffset())
2436 CheckCandidateType(Ty);
2437 }
2438
2439 SmallVector<VectorType *, 4> CandidateTysCopy = CandidateTys;
2441 LoadStoreTys, CandidateTysCopy, CheckCandidateType, P, DL,
2442 CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
2443 HaveCommonVecPtrTy, CommonVecPtrTy, VScale))
2444 return VTy;
2445
2446 CandidateTys.clear();
2448 DeferredTys, CandidateTysCopy, CheckCandidateType, P, DL, CandidateTys,
2449 HaveCommonEltTy, CommonEltTy, HaveVecPtrTy, HaveCommonVecPtrTy,
2450 CommonVecPtrTy, VScale);
2451}
2452
2453/// Test whether a slice of an alloca is valid for integer widening.
2454///
2455/// This implements the necessary checking for the \c isIntegerWideningViable
2456/// test below on a single slice of the alloca.
2457static bool isIntegerWideningViableForSlice(const Slice &S,
2458 uint64_t AllocBeginOffset,
2459 Type *AllocaTy,
2460 const DataLayout &DL,
2461 bool &WholeAllocaOp) {
2462 uint64_t Size = DL.getTypeStoreSize(AllocaTy).getFixedValue();
2463
2464 uint64_t RelBegin = S.beginOffset() - AllocBeginOffset;
2465 uint64_t RelEnd = S.endOffset() - AllocBeginOffset;
2466
2467 Use *U = S.getUse();
2468
2469 // Lifetime intrinsics operate over the whole alloca whose sizes are usually
2470 // larger than other load/store slices (RelEnd > Size). But lifetime are
2471 // always promotable and should not impact other slices' promotability of the
2472 // partition.
2473 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
2474 if (II->isLifetimeStartOrEnd() || II->isDroppable())
2475 return true;
2476 }
2477
2478 // We can't reasonably handle cases where the load or store extends past
2479 // the end of the alloca's type and into its padding.
2480 if (RelEnd > Size)
2481 return false;
2482
2483 if (LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
2484 if (LI->isVolatile())
2485 return false;
2486 // We can't handle loads that extend past the allocated memory.
2487 TypeSize LoadSize = DL.getTypeStoreSize(LI->getType());
2488 if (!LoadSize.isFixed() || LoadSize.getFixedValue() > Size)
2489 return false;
2490 // So far, AllocaSliceRewriter does not support widening split slice tails
2491 // in rewriteIntegerLoad.
2492 if (S.beginOffset() < AllocBeginOffset)
2493 return false;
2494 // Note that we don't count vector loads or stores as whole-alloca
2495 // operations which enable integer widening because we would prefer to use
2496 // vector widening instead.
2497 if (!isa<VectorType>(LI->getType()) && RelBegin == 0 && RelEnd == Size)
2498 WholeAllocaOp = true;
2499 if (IntegerType *ITy = dyn_cast<IntegerType>(LI->getType())) {
2500 if (ITy->getBitWidth() < DL.getTypeStoreSizeInBits(ITy).getFixedValue())
2501 return false;
2502 } else if (RelBegin != 0 || RelEnd != Size ||
2503 !canConvertValue(DL, AllocaTy, LI->getType())) {
2504 // Non-integer loads need to be convertible from the alloca type so that
2505 // they are promotable.
2506 return false;
2507 }
2508 } else if (StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
2509 Type *ValueTy = SI->getValueOperand()->getType();
2510 if (SI->isVolatile())
2511 return false;
2512 // We can't handle stores that extend past the allocated memory.
2513 TypeSize StoreSize = DL.getTypeStoreSize(ValueTy);
2514 if (!StoreSize.isFixed() || StoreSize.getFixedValue() > Size)
2515 return false;
2516 // So far, AllocaSliceRewriter does not support widening split slice tails
2517 // in rewriteIntegerStore.
2518 if (S.beginOffset() < AllocBeginOffset)
2519 return false;
2520 // Note that we don't count vector loads or stores as whole-alloca
2521 // operations which enable integer widening because we would prefer to use
2522 // vector widening instead.
2523 if (!isa<VectorType>(ValueTy) && RelBegin == 0 && RelEnd == Size)
2524 WholeAllocaOp = true;
2525 if (IntegerType *ITy = dyn_cast<IntegerType>(ValueTy)) {
2526 if (ITy->getBitWidth() < DL.getTypeStoreSizeInBits(ITy).getFixedValue())
2527 return false;
2528 } else if (RelBegin != 0 || RelEnd != Size ||
2529 !canConvertValue(DL, ValueTy, AllocaTy)) {
2530 // Non-integer stores need to be convertible to the alloca type so that
2531 // they are promotable.
2532 return false;
2533 }
2534 } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(U->getUser())) {
2535 if (MI->isVolatile() || !isa<Constant>(MI->getLength()))
2536 return false;
2537 if (!S.isSplittable())
2538 return false; // Skip any unsplittable intrinsics.
2539 } else {
2540 return false;
2541 }
2542
2543 return true;
2544}
2545
2546/// Test whether the given alloca partition's integer operations can be
2547/// widened to promotable ones.
2548///
2549/// This is a quick test to check whether we can rewrite the integer loads and
2550/// stores to a particular alloca into wider loads and stores and be able to
2551/// promote the resulting alloca.
2552static bool isIntegerWideningViable(Partition &P, Type *AllocaTy,
2553 const DataLayout &DL) {
2554 uint64_t SizeInBits = DL.getTypeSizeInBits(AllocaTy).getFixedValue();
2555 // Don't create integer types larger than the maximum bitwidth.
2556 if (SizeInBits > IntegerType::MAX_INT_BITS)
2557 return false;
2558
2559 // Don't try to handle allocas with bit-padding.
2560 if (SizeInBits != DL.getTypeStoreSizeInBits(AllocaTy).getFixedValue())
2561 return false;
2562
2563 // We need to ensure that an integer type with the appropriate bitwidth can
2564 // be converted to the alloca type, whatever that is. We don't want to force
2565 // the alloca itself to have an integer type if there is a more suitable one.
2566 Type *IntTy = Type::getIntNTy(AllocaTy->getContext(), SizeInBits);
2567 if (!canConvertValue(DL, AllocaTy, IntTy) ||
2568 !canConvertValue(DL, IntTy, AllocaTy))
2569 return false;
2570
2571 // While examining uses, we ensure that the alloca has a covering load or
2572 // store. We don't want to widen the integer operations only to fail to
2573 // promote due to some other unsplittable entry (which we may make splittable
2574 // later). However, if there are only splittable uses, go ahead and assume
2575 // that we cover the alloca.
2576 // FIXME: We shouldn't consider split slices that happen to start in the
2577 // partition here...
2578 bool WholeAllocaOp = P.empty() && DL.isLegalInteger(SizeInBits);
2579
2580 for (const Slice &S : P)
2581 if (!isIntegerWideningViableForSlice(S, P.beginOffset(), AllocaTy, DL,
2582 WholeAllocaOp))
2583 return false;
2584
2585 for (const Slice *S : P.splitSliceTails())
2586 if (!isIntegerWideningViableForSlice(*S, P.beginOffset(), AllocaTy, DL,
2587 WholeAllocaOp))
2588 return false;
2589
2590 return WholeAllocaOp;
2591}
2592
2593static Value *extractInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *V,
2595 const Twine &Name) {
2596 LLVM_DEBUG(dbgs() << " start: " << *V << "\n");
2597 IntegerType *IntTy = cast<IntegerType>(V->getType());
2598 assert(DL.getTypeStoreSize(Ty).getFixedValue() + Offset <=
2599 DL.getTypeStoreSize(IntTy).getFixedValue() &&
2600 "Element extends past full value");
2601 uint64_t ShAmt = 8 * Offset;
2602 if (DL.isBigEndian())
2603 ShAmt = 8 * (DL.getTypeStoreSize(IntTy).getFixedValue() -
2604 DL.getTypeStoreSize(Ty).getFixedValue() - Offset);
2605 if (ShAmt) {
2606 V = IRB.CreateLShr(V, ShAmt, Name + ".shift");
2607 LLVM_DEBUG(dbgs() << " shifted: " << *V << "\n");
2608 }
2609 assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
2610 "Cannot extract to a larger integer!");
2611 if (Ty != IntTy) {
2612 V = IRB.CreateTrunc(V, Ty, Name + ".trunc");
2613 LLVM_DEBUG(dbgs() << " trunced: " << *V << "\n");
2614 }
2615 return V;
2616}
2617
2618static Value *insertInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *Old,
2619 Value *V, uint64_t Offset, const Twine &Name) {
2620 IntegerType *IntTy = cast<IntegerType>(Old->getType());
2621 IntegerType *Ty = cast<IntegerType>(V->getType());
2622 assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
2623 "Cannot insert a larger integer!");
2624 LLVM_DEBUG(dbgs() << " start: " << *V << "\n");
2625 if (Ty != IntTy) {
2626 V = IRB.CreateZExt(V, IntTy, Name + ".ext");
2627 LLVM_DEBUG(dbgs() << " extended: " << *V << "\n");
2628 }
2629 assert(DL.getTypeStoreSize(Ty).getFixedValue() + Offset <=
2630 DL.getTypeStoreSize(IntTy).getFixedValue() &&
2631 "Element store outside of alloca store");
2632 uint64_t ShAmt = 8 * Offset;
2633 if (DL.isBigEndian())
2634 ShAmt = 8 * (DL.getTypeStoreSize(IntTy).getFixedValue() -
2635 DL.getTypeStoreSize(Ty).getFixedValue() - Offset);
2636 if (ShAmt) {
2637 V = IRB.CreateShl(V, ShAmt, Name + ".shift");
2638 LLVM_DEBUG(dbgs() << " shifted: " << *V << "\n");
2639 }
2640
2641 if (ShAmt || Ty->getBitWidth() < IntTy->getBitWidth()) {
2642 APInt Mask = ~Ty->getMask().zext(IntTy->getBitWidth()).shl(ShAmt);
2643 Old = IRB.CreateAnd(Old, Mask, Name + ".mask");
2644 LLVM_DEBUG(dbgs() << " masked: " << *Old << "\n");
2645 V = IRB.CreateOr(Old, V, Name + ".insert");
2646 LLVM_DEBUG(dbgs() << " inserted: " << *V << "\n");
2647 }
2648 return V;
2649}
2650
2651static Value *extractVector(IRBuilderTy &IRB, Value *V, unsigned BeginIndex,
2652 unsigned EndIndex, const Twine &Name) {
2653 auto *VecTy = cast<FixedVectorType>(V->getType());
2654 unsigned NumElements = EndIndex - BeginIndex;
2655 assert(NumElements <= VecTy->getNumElements() && "Too many elements!");
2656
2657 if (NumElements == VecTy->getNumElements())
2658 return V;
2659
2660 if (NumElements == 1) {
2661 V = IRB.CreateExtractElement(V, IRB.getInt32(BeginIndex),
2662 Name + ".extract");
2663 LLVM_DEBUG(dbgs() << " extract: " << *V << "\n");
2664 return V;
2665 }
2666
2667 auto Mask = llvm::to_vector<8>(llvm::seq<int>(BeginIndex, EndIndex));
2668 V = IRB.CreateShuffleVector(V, Mask, Name + ".extract");
2669 LLVM_DEBUG(dbgs() << " shuffle: " << *V << "\n");
2670 return V;
2671}
2672
2673static Value *insertVector(IRBuilderTy &IRB, Value *Old, Value *V,
2674 unsigned BeginIndex, const Twine &Name) {
2675 VectorType *VecTy = cast<VectorType>(Old->getType());
2676 assert(VecTy && "Can only insert a vector into a vector");
2677
2678 VectorType *Ty = dyn_cast<VectorType>(V->getType());
2679 if (!Ty) {
2680 // Single element to insert.
2681 V = IRB.CreateInsertElement(Old, V, IRB.getInt32(BeginIndex),
2682 Name + ".insert");
2683 LLVM_DEBUG(dbgs() << " insert: " << *V << "\n");
2684 return V;
2685 }
2686
2687 unsigned NumSubElements = cast<FixedVectorType>(Ty)->getNumElements();
2688 unsigned NumElements = cast<FixedVectorType>(VecTy)->getNumElements();
2689
2690 assert(NumSubElements <= NumElements && "Too many elements!");
2691 if (NumSubElements == NumElements) {
2692 assert(V->getType() == VecTy && "Vector type mismatch");
2693 return V;
2694 }
2695 unsigned EndIndex = BeginIndex + NumSubElements;
2696
2697 // When inserting a smaller vector into the larger to store, we first
2698 // use a shuffle vector to widen it with undef elements, and then
2699 // a second shuffle vector to select between the loaded vector and the
2700 // incoming vector.
2702 Mask.reserve(NumElements);
2703 for (unsigned Idx = 0; Idx != NumElements; ++Idx)
2704 if (Idx >= BeginIndex && Idx < EndIndex)
2705 Mask.push_back(Idx - BeginIndex);
2706 else
2707 Mask.push_back(-1);
2708 V = IRB.CreateShuffleVector(V, Mask, Name + ".expand");
2709 LLVM_DEBUG(dbgs() << " shuffle: " << *V << "\n");
2710
2711 Mask.clear();
2712 for (unsigned Idx = 0; Idx != NumElements; ++Idx)
2713 if (Idx >= BeginIndex && Idx < EndIndex)
2714 Mask.push_back(Idx);
2715 else
2716 Mask.push_back(Idx + NumElements);
2717 V = IRB.CreateShuffleVector(V, Old, Mask, Name + "blend");
2718 LLVM_DEBUG(dbgs() << " blend: " << *V << "\n");
2719 return V;
2720}
2721
2722/// This function takes two vector values and combines them into a single vector
2723/// by concatenating their elements. The function handles:
2724///
2725/// 1. Element type mismatch: If either vector's element type differs from
2726/// NewAIEltType, the function bitcasts the vector to use NewAIEltType while
2727/// preserving the total bit width (adjusting the number of elements
2728/// accordingly).
2729///
2730/// 2. Size mismatch: After transforming the vectors to have the desired element
2731/// type, if the two vectors have different numbers of elements, the smaller
2732/// vector is extended with poison values to match the size of the larger
2733/// vector before concatenation.
2734///
2735/// 3. Concatenation: The vectors are merged using a shuffle operation that
2736/// places all elements of V0 first, followed by all elements of V1.
2737///
2738/// \param V0 The first vector to merge (must be a vector type)
2739/// \param V1 The second vector to merge (must be a vector type)
2740/// \param DL The data layout for size calculations
2741/// \param NewAIEltTy The desired element type for the result vector
2742/// \param Builder IRBuilder for creating new instructions
2743/// \return A new vector containing all elements from V0 followed by all
2744/// elements from V1
2746 Type *NewAIEltTy, IRBuilder<> &Builder) {
2747 // V0 and V1 are vectors
2748 // Create a new vector type with combined elements
2749 // Use ShuffleVector to concatenate the vectors
2750 auto *VecType0 = cast<FixedVectorType>(V0->getType());
2751 auto *VecType1 = cast<FixedVectorType>(V1->getType());
2752
2753 // If V0/V1 element types are different from NewAllocaElementType,
2754 // we need to introduce bitcasts before merging them
2755 auto BitcastIfNeeded = [&](Value *&V, FixedVectorType *&VecType,
2756 const char *DebugName) {
2757 Type *EltType = VecType->getElementType();
2758 if (EltType != NewAIEltTy) {
2759 // Calculate new number of elements to maintain same bit width
2760 unsigned TotalBits =
2761 VecType->getNumElements() * DL.getTypeSizeInBits(EltType);
2762 unsigned NewNumElts = TotalBits / DL.getTypeSizeInBits(NewAIEltTy);
2763
2764 auto *NewVecType = FixedVectorType::get(NewAIEltTy, NewNumElts);
2765 V = Builder.CreateBitCast(V, NewVecType);
2766 VecType = NewVecType;
2767 LLVM_DEBUG(dbgs() << " bitcast " << DebugName << ": " << *V << "\n");
2768 }
2769 };
2770
2771 BitcastIfNeeded(V0, VecType0, "V0");
2772 BitcastIfNeeded(V1, VecType1, "V1");
2773
2774 unsigned NumElts0 = VecType0->getNumElements();
2775 unsigned NumElts1 = VecType1->getNumElements();
2776
2777 SmallVector<int, 16> ShuffleMask;
2778
2779 if (NumElts0 == NumElts1) {
2780 for (unsigned i = 0; i < NumElts0 + NumElts1; ++i)
2781 ShuffleMask.push_back(i);
2782 } else {
2783 // If two vectors have different sizes, we need to extend
2784 // the smaller vector to the size of the larger vector.
2785 unsigned SmallSize = std::min(NumElts0, NumElts1);
2786 unsigned LargeSize = std::max(NumElts0, NumElts1);
2787 bool IsV0Smaller = NumElts0 < NumElts1;
2788 Value *&ExtendedVec = IsV0Smaller ? V0 : V1;
2789 SmallVector<int, 16> ExtendMask;
2790 for (unsigned i = 0; i < SmallSize; ++i)
2791 ExtendMask.push_back(i);
2792 for (unsigned i = SmallSize; i < LargeSize; ++i)
2793 ExtendMask.push_back(PoisonMaskElem);
2794 ExtendedVec = Builder.CreateShuffleVector(
2795 ExtendedVec, PoisonValue::get(ExtendedVec->getType()), ExtendMask);
2796 LLVM_DEBUG(dbgs() << " shufflevector: " << *ExtendedVec << "\n");
2797 for (unsigned i = 0; i < NumElts0; ++i)
2798 ShuffleMask.push_back(i);
2799 for (unsigned i = 0; i < NumElts1; ++i)
2800 ShuffleMask.push_back(LargeSize + i);
2801 }
2802
2803 return Builder.CreateShuffleVector(V0, V1, ShuffleMask);
2804}
2805
2806namespace {
2807
2808/// Visitor to rewrite instructions using p particular slice of an alloca
2809/// to use a new alloca.
2810///
2811/// Also implements the rewriting to vector-based accesses when the partition
2812/// passes the isVectorPromotionViable predicate. Most of the rewriting logic
2813/// lives here.
2814class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
2815 // Befriend the base class so it can delegate to private visit methods.
2816 friend class InstVisitor<AllocaSliceRewriter, bool>;
2817
2818 using Base = InstVisitor<AllocaSliceRewriter, bool>;
2819
2820 const DataLayout &DL;
2821 AllocaSlices &AS;
2822 SROA &Pass;
2823 AllocaInst &OldAI, &NewAI;
2824 const uint64_t NewAllocaBeginOffset, NewAllocaEndOffset;
2825 Type *NewAllocaTy;
2826
2827 // This is a convenience and flag variable that will be null unless the new
2828 // alloca's integer operations should be widened to this integer type due to
2829 // passing isIntegerWideningViable above. If it is non-null, the desired
2830 // integer type will be stored here for easy access during rewriting.
2831 IntegerType *IntTy;
2832
2833 // If we are rewriting an alloca partition which can be written as pure
2834 // vector operations, we stash extra information here. When VecTy is
2835 // non-null, we have some strict guarantees about the rewritten alloca:
2836 // - The new alloca is exactly the size of the vector type here.
2837 // - The accesses all either map to the entire vector or to a single
2838 // element.
2839 // - The set of accessing instructions is only one of those handled above
2840 // in isVectorPromotionViable. Generally these are the same access kinds
2841 // which are promotable via mem2reg.
2842 VectorType *VecTy;
2843 Type *ElementTy;
2844 uint64_t ElementSize;
2845
2846 // The original offset of the slice currently being rewritten relative to
2847 // the original alloca.
2848 uint64_t BeginOffset = 0;
2849 uint64_t EndOffset = 0;
2850
2851 // The new offsets of the slice currently being rewritten relative to the
2852 // original alloca.
2853 uint64_t NewBeginOffset = 0, NewEndOffset = 0;
2854
2855 uint64_t SliceSize = 0;
2856 bool IsSplittable = false;
2857 bool IsSplit = false;
2858 Use *OldUse = nullptr;
2859 Instruction *OldPtr = nullptr;
2860
2861 // Track post-rewrite users which are PHI nodes and Selects.
2862 SmallSetVector<PHINode *, 8> &PHIUsers;
2863 SmallSetVector<SelectInst *, 8> &SelectUsers;
2864
2865 // Utility IR builder, whose name prefix is setup for each visited use, and
2866 // the insertion point is set to point to the user.
2867 IRBuilderTy IRB;
2868
2869 // Return the new alloca, addrspacecasted if required to avoid changing the
2870 // addrspace of a volatile access.
2871 Value *getPtrToNewAI(unsigned AddrSpace, bool IsVolatile) {
2872 if (!IsVolatile || AddrSpace == NewAI.getType()->getPointerAddressSpace())
2873 return &NewAI;
2874
2875 Type *AccessTy = IRB.getPtrTy(AddrSpace);
2876 return IRB.CreateAddrSpaceCast(&NewAI, AccessTy);
2877 }
2878
2879public:
2880 AllocaSliceRewriter(const DataLayout &DL, AllocaSlices &AS, SROA &Pass,
2881 AllocaInst &OldAI, AllocaInst &NewAI, Type *NewAllocaTy,
2882 uint64_t NewAllocaBeginOffset,
2883 uint64_t NewAllocaEndOffset, bool IsIntegerPromotable,
2884 VectorType *PromotableVecTy,
2885 SmallSetVector<PHINode *, 8> &PHIUsers,
2886 SmallSetVector<SelectInst *, 8> &SelectUsers)
2887 : DL(DL), AS(AS), Pass(Pass), OldAI(OldAI), NewAI(NewAI),
2888 NewAllocaBeginOffset(NewAllocaBeginOffset),
2889 NewAllocaEndOffset(NewAllocaEndOffset), NewAllocaTy(NewAllocaTy),
2890 IntTy(IsIntegerPromotable
2891 ? Type::getIntNTy(
2892 NewAI.getContext(),
2893 DL.getTypeSizeInBits(NewAllocaTy).getFixedValue())
2894 : nullptr),
2895 VecTy(PromotableVecTy),
2896 ElementTy(VecTy ? VecTy->getElementType() : nullptr),
2897 ElementSize(VecTy ? DL.getTypeSizeInBits(ElementTy).getFixedValue() / 8
2898 : 0),
2899 PHIUsers(PHIUsers), SelectUsers(SelectUsers),
2900 IRB(NewAI.getContext(), ConstantFolder()) {
2901 if (VecTy) {
2902 assert((DL.getTypeSizeInBits(ElementTy).getFixedValue() % 8) == 0 &&
2903 "Only multiple-of-8 sized vector elements are viable");
2904 ++NumVectorized;
2905 }
2906 assert((!IntTy && !VecTy) || (IntTy && !VecTy) || (!IntTy && VecTy));
2907 }
2908
2909 bool visit(AllocaSlices::const_iterator I) {
2910 bool CanSROA = true;
2911 BeginOffset = I->beginOffset();
2912 EndOffset = I->endOffset();
2913 IsSplittable = I->isSplittable();
2914 IsSplit =
2915 BeginOffset < NewAllocaBeginOffset || EndOffset > NewAllocaEndOffset;
2916 LLVM_DEBUG(dbgs() << " rewriting " << (IsSplit ? "split " : ""));
2917 LLVM_DEBUG(AS.printSlice(dbgs(), I, ""));
2918 LLVM_DEBUG(dbgs() << "\n");
2919
2920 // Compute the intersecting offset range.
2921 assert(BeginOffset < NewAllocaEndOffset);
2922 assert(EndOffset > NewAllocaBeginOffset);
2923 NewBeginOffset = std::max(BeginOffset, NewAllocaBeginOffset);
2924 NewEndOffset = std::min(EndOffset, NewAllocaEndOffset);
2925
2926 SliceSize = NewEndOffset - NewBeginOffset;
2927 LLVM_DEBUG(dbgs() << " Begin:(" << BeginOffset << ", " << EndOffset
2928 << ") NewBegin:(" << NewBeginOffset << ", "
2929 << NewEndOffset << ") NewAllocaBegin:("
2930 << NewAllocaBeginOffset << ", " << NewAllocaEndOffset
2931 << ")\n");
2932 assert(IsSplit || NewBeginOffset == BeginOffset);
2933 OldUse = I->getUse();
2934 OldPtr = cast<Instruction>(OldUse->get());
2935
2936 Instruction *OldUserI = cast<Instruction>(OldUse->getUser());
2937 IRB.SetInsertPoint(OldUserI);
2938 IRB.SetCurrentDebugLocation(OldUserI->getDebugLoc());
2939 IRB.getInserter().SetNamePrefix(Twine(NewAI.getName()) + "." +
2940 Twine(BeginOffset) + ".");
2941
2942 CanSROA &= visit(cast<Instruction>(OldUse->getUser()));
2943 if (VecTy || IntTy)
2944 assert(CanSROA);
2945 return CanSROA;
2946 }
2947
2948 /// Attempts to rewrite a partition using tree-structured merge optimization.
2949 ///
2950 /// This function analyzes a partition to determine if it can be optimized
2951 /// using a tree-structured merge pattern, where multiple non-overlapping
2952 /// stores completely fill an alloca. And there is no load from the alloca in
2953 /// the middle of the stores. Such patterns can be optimized by eliminating
2954 /// the intermediate stores and directly constructing the final vector by
2955 /// using shufflevectors.
2956 ///
2957 /// Example transformation:
2958 /// Before: (stores do not have to be in order)
2959 /// %alloca = alloca <8 x float>
2960 /// store <2 x float> %val0, ptr %alloca ; offset 0-1
2961 /// store <2 x float> %val2, ptr %alloca+16 ; offset 4-5
2962 /// store <2 x float> %val1, ptr %alloca+8 ; offset 2-3
2963 /// store <2 x float> %val3, ptr %alloca+24 ; offset 6-7
2964 ///
2965 /// After:
2966 /// %alloca = alloca <8 x float>
2967 /// %shuffle0 = shufflevector %val0, %val1, <4 x i32> <i32 0, i32 1, i32 2,
2968 /// i32 3>
2969 /// %shuffle1 = shufflevector %val2, %val3, <4 x i32> <i32 0, i32 1, i32 2,
2970 /// i32 3>
2971 /// %shuffle2 = shufflevector %shuffle0, %shuffle1, <8 x i32> <i32 0, i32 1,
2972 /// i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
2973 /// store %shuffle2, ptr %alloca
2974 ///
2975 /// The optimization looks for partitions that:
2976 /// 1. Have no overlapping split slice tails
2977 /// 2. Contain non-overlapping stores that cover the entire alloca
2978 /// 3. Have exactly one load that reads the complete alloca structure and not
2979 /// in the middle of the stores (TODO: maybe we can relax the constraint
2980 /// about reading the entire alloca structure)
2981 ///
2982 /// \param P The partition to analyze and potentially rewrite
2983 /// \return An optional vector of values that were deleted during the rewrite
2984 /// process, or std::nullopt if the partition cannot be optimized
2985 /// using tree-structured merge
2986 std::optional<SmallVector<Value *, 4>>
2987 rewriteTreeStructuredMerge(Partition &P) {
2988 // No tail slices that overlap with the partition
2989 if (P.splitSliceTails().size() > 0)
2990 return std::nullopt;
2991
2992 SmallVector<Value *, 4> DeletedValues;
2993 LoadInst *TheLoad = nullptr;
2994
2995 // Structure to hold store information
2996 struct StoreInfo {
2997 StoreInst *Store;
2998 uint64_t BeginOffset;
2999 uint64_t EndOffset;
3000 Value *StoredValue;
3001 StoreInfo(StoreInst *SI, uint64_t Begin, uint64_t End, Value *Val)
3002 : Store(SI), BeginOffset(Begin), EndOffset(End), StoredValue(Val) {}
3003 };
3004
3005 SmallVector<StoreInfo, 4> StoreInfos;
3006
3007 // If the new alloca is a fixed vector type, we use its element type as the
3008 // allocated element type, otherwise we use i8 as the allocated element
3009 Type *AllocatedEltTy =
3010 isa<FixedVectorType>(NewAllocaTy)
3011 ? cast<FixedVectorType>(NewAllocaTy)->getElementType()
3012 : Type::getInt8Ty(NewAI.getContext());
3013 unsigned AllocatedEltTySize = DL.getTypeSizeInBits(AllocatedEltTy);
3014
3015 // Helper to check if a type is
3016 // 1. A fixed vector type
3017 // 2. The element type is not a pointer
3018 // 3. The element type size is byte-aligned
3019 // We only handle the cases that the ld/st meet these conditions
3020 auto IsTypeValidForTreeStructuredMerge = [&](Type *Ty) -> bool {
3021 auto *FixedVecTy = dyn_cast<FixedVectorType>(Ty);
3022 return FixedVecTy &&
3023 DL.getTypeSizeInBits(FixedVecTy->getElementType()) % 8 == 0 &&
3024 !FixedVecTy->getElementType()->isPointerTy();
3025 };
3026
3027 for (Slice &S : P) {
3028 auto *User = cast<Instruction>(S.getUse()->getUser());
3029 if (auto *LI = dyn_cast<LoadInst>(User)) {
3030 // Do not handle the case if
3031 // 1. There is more than one load
3032 // 2. The load is volatile
3033 // 3. The load does not read the entire alloca structure
3034 // 4. The load does not meet the conditions in the helper function
3035 if (TheLoad || !IsTypeValidForTreeStructuredMerge(LI->getType()) ||
3036 S.beginOffset() != NewAllocaBeginOffset ||
3037 S.endOffset() != NewAllocaEndOffset || LI->isVolatile())
3038 return std::nullopt;
3039 TheLoad = LI;
3040 } else if (auto *SI = dyn_cast<StoreInst>(User)) {
3041 // Do not handle the case if
3042 // 1. The store does not meet the conditions in the helper function
3043 // 2. The store is volatile
3044 // 3. The total store size is not a multiple of the allocated element
3045 // type size
3046 if (!IsTypeValidForTreeStructuredMerge(
3047 SI->getValueOperand()->getType()) ||
3048 SI->isVolatile())
3049 return std::nullopt;
3050 auto *VecTy = cast<FixedVectorType>(SI->getValueOperand()->getType());
3051 unsigned NumElts = VecTy->getNumElements();
3052 unsigned EltSize = DL.getTypeSizeInBits(VecTy->getElementType());
3053 if (NumElts * EltSize % AllocatedEltTySize != 0)
3054 return std::nullopt;
3055 StoreInfos.emplace_back(SI, S.beginOffset(), S.endOffset(),
3056 SI->getValueOperand());
3057 } else {
3058 // If we have instructions other than load and store, we cannot do the
3059 // tree structured merge
3060 return std::nullopt;
3061 }
3062 }
3063 // If we do not have any load, we cannot do the tree structured merge
3064 if (!TheLoad)
3065 return std::nullopt;
3066
3067 // If we do not have multiple stores, we cannot do the tree structured merge
3068 if (StoreInfos.size() < 2)
3069 return std::nullopt;
3070
3071 // Stores should not overlap and should cover the whole alloca
3072 // Sort by begin offset
3073 llvm::sort(StoreInfos, [](const StoreInfo &A, const StoreInfo &B) {
3074 return A.BeginOffset < B.BeginOffset;
3075 });
3076
3077 // Check for overlaps and coverage
3078 uint64_t ExpectedStart = NewAllocaBeginOffset;
3079 for (auto &StoreInfo : StoreInfos) {
3080 uint64_t BeginOff = StoreInfo.BeginOffset;
3081 uint64_t EndOff = StoreInfo.EndOffset;
3082
3083 // Check for gap or overlap
3084 if (BeginOff != ExpectedStart)
3085 return std::nullopt;
3086
3087 ExpectedStart = EndOff;
3088 }
3089 // Check that stores cover the entire alloca
3090 if (ExpectedStart != NewAllocaEndOffset)
3091 return std::nullopt;
3092
3093 // Stores should be in the same basic block
3094 // The load should not be in the middle of the stores
3095 // Note:
3096 // If the load is in a different basic block with the stores, we can still
3097 // do the tree structured merge. This is because we do not have the
3098 // store->load forwarding here. The merged vector will be stored back to
3099 // NewAI and the new load will load from NewAI. The forwarding will be
3100 // handled later when we try to promote NewAI.
3101 BasicBlock *LoadBB = TheLoad->getParent();
3102 BasicBlock *StoreBB = StoreInfos[0].Store->getParent();
3103
3104 for (auto &StoreInfo : StoreInfos) {
3105 if (StoreInfo.Store->getParent() != StoreBB)
3106 return std::nullopt;
3107 if (LoadBB == StoreBB && !StoreInfo.Store->comesBefore(TheLoad))
3108 return std::nullopt;
3109 }
3110
3111 // If we reach here, the partition can be merged with a tree structured
3112 // merge
3113 LLVM_DEBUG({
3114 dbgs() << "Tree structured merge rewrite:\n Load: " << *TheLoad
3115 << "\n Ordered stores:\n";
3116 for (auto [i, Info] : enumerate(StoreInfos))
3117 dbgs() << " [" << i << "] Range[" << Info.BeginOffset << ", "
3118 << Info.EndOffset << ") \tStore: " << *Info.Store
3119 << "\tValue: " << *Info.StoredValue << "\n";
3120 });
3121
3122 // Instead of having these stores, we merge all the stored values into a
3123 // vector and store the merged value into the alloca
3124 std::queue<Value *> VecElements;
3125 IRBuilder<> Builder(StoreInfos.back().Store);
3126 for (const auto &Info : StoreInfos) {
3127 DeletedValues.push_back(Info.Store);
3128 VecElements.push(Info.StoredValue);
3129 }
3130
3131 LLVM_DEBUG(dbgs() << " Rewrite stores into shufflevectors:\n");
3132 while (VecElements.size() > 1) {
3133 const auto NumElts = VecElements.size();
3134 for ([[maybe_unused]] const auto _ : llvm::seq(NumElts / 2)) {
3135 Value *V0 = VecElements.front();
3136 VecElements.pop();
3137 Value *V1 = VecElements.front();
3138 VecElements.pop();
3139 Value *Merged = mergeTwoVectors(V0, V1, DL, AllocatedEltTy, Builder);
3140 LLVM_DEBUG(dbgs() << " shufflevector: " << *Merged << "\n");
3141 VecElements.push(Merged);
3142 }
3143 if (NumElts % 2 == 1) {
3144 Value *V = VecElements.front();
3145 VecElements.pop();
3146 VecElements.push(V);
3147 }
3148 }
3149
3150 // Store the merged value into the alloca
3151 Value *MergedValue = VecElements.front();
3152 Builder.CreateAlignedStore(MergedValue, &NewAI, getSliceAlign());
3153
3154 IRBuilder<> LoadBuilder(TheLoad);
3155 TheLoad->replaceAllUsesWith(LoadBuilder.CreateAlignedLoad(
3156 TheLoad->getType(), &NewAI, getSliceAlign(), TheLoad->isVolatile(),
3157 TheLoad->getName() + ".sroa.new.load"));
3158 DeletedValues.push_back(TheLoad);
3159
3160 return DeletedValues;
3161 }
3162
3163private:
3164 // Make sure the other visit overloads are visible.
3165 using Base::visit;
3166
3167 // Every instruction which can end up as a user must have a rewrite rule.
3168 bool visitInstruction(Instruction &I) {
3169 LLVM_DEBUG(dbgs() << " !!!! Cannot rewrite: " << I << "\n");
3170 llvm_unreachable("No rewrite rule for this instruction!");
3171 }
3172
3173 Value *getNewAllocaSlicePtr(IRBuilderTy &IRB, Type *PointerTy) {
3174 // Note that the offset computation can use BeginOffset or NewBeginOffset
3175 // interchangeably for unsplit slices.
3176 assert(IsSplit || BeginOffset == NewBeginOffset);
3177 uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
3178
3179 StringRef OldName = OldPtr->getName();
3180 // Skip through the last '.sroa.' component of the name.
3181 size_t LastSROAPrefix = OldName.rfind(".sroa.");
3182 if (LastSROAPrefix != StringRef::npos) {
3183 OldName = OldName.substr(LastSROAPrefix + strlen(".sroa."));
3184 // Look for an SROA slice index.
3185 size_t IndexEnd = OldName.find_first_not_of("0123456789");
3186 if (IndexEnd != StringRef::npos && OldName[IndexEnd] == '.') {
3187 // Strip the index and look for the offset.
3188 OldName = OldName.substr(IndexEnd + 1);
3189 size_t OffsetEnd = OldName.find_first_not_of("0123456789");
3190 if (OffsetEnd != StringRef::npos && OldName[OffsetEnd] == '.')
3191 // Strip the offset.
3192 OldName = OldName.substr(OffsetEnd + 1);
3193 }
3194 }
3195 // Strip any SROA suffixes as well.
3196 OldName = OldName.substr(0, OldName.find(".sroa_"));
3197
3198 return getAdjustedPtr(IRB, DL, &NewAI,
3199 APInt(DL.getIndexTypeSizeInBits(PointerTy), Offset),
3200 PointerTy, Twine(OldName) + ".");
3201 }
3202
3203 /// Compute suitable alignment to access this slice of the *new*
3204 /// alloca.
3205 ///
3206 /// You can optionally pass a type to this routine and if that type's ABI
3207 /// alignment is itself suitable, this will return zero.
3208 Align getSliceAlign() {
3209 return commonAlignment(NewAI.getAlign(),
3210 NewBeginOffset - NewAllocaBeginOffset);
3211 }
3212
3213 unsigned getIndex(uint64_t Offset) {
3214 assert(VecTy && "Can only call getIndex when rewriting a vector");
3215 uint64_t RelOffset = Offset - NewAllocaBeginOffset;
3216 assert(RelOffset / ElementSize < UINT32_MAX && "Index out of bounds");
3217 uint32_t Index = RelOffset / ElementSize;
3218 assert(Index * ElementSize == RelOffset);
3219 return Index;
3220 }
3221
3222 void deleteIfTriviallyDead(Value *V) {
3225 Pass.DeadInsts.push_back(I);
3226 }
3227
3228 Value *rewriteVectorizedLoadInst(LoadInst &LI) {
3229 unsigned BeginIndex = getIndex(NewBeginOffset);
3230 unsigned EndIndex = getIndex(NewEndOffset);
3231 assert(EndIndex > BeginIndex && "Empty vector!");
3232
3233 LoadInst *Load =
3234 IRB.CreateAlignedLoad(NewAllocaTy, &NewAI, NewAI.getAlign(), "load");
3235
3236 Load->copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access,
3237 LLVMContext::MD_access_group});
3238 return extractVector(IRB, Load, BeginIndex, EndIndex, "vec");
3239 }
3240
3241 Value *rewriteIntegerLoad(LoadInst &LI) {
3242 assert(IntTy && "We cannot insert an integer to the alloca");
3243 assert(!LI.isVolatile());
3244 Value *V =
3245 IRB.CreateAlignedLoad(NewAllocaTy, &NewAI, NewAI.getAlign(), "load");
3246 V = convertValue(DL, IRB, V, IntTy);
3247 assert(NewBeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
3248 uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
3249 if (Offset > 0 || NewEndOffset < NewAllocaEndOffset) {
3250 IntegerType *ExtractTy = Type::getIntNTy(LI.getContext(), SliceSize * 8);
3251 V = extractInteger(DL, IRB, V, ExtractTy, Offset, "extract");
3252 }
3253 // It is possible that the extracted type is not the load type. This
3254 // happens if there is a load past the end of the alloca, and as
3255 // a consequence the slice is narrower but still a candidate for integer
3256 // lowering. To handle this case, we just zero extend the extracted
3257 // integer.
3258 assert(cast<IntegerType>(LI.getType())->getBitWidth() >= SliceSize * 8 &&
3259 "Can only handle an extract for an overly wide load");
3260 if (cast<IntegerType>(LI.getType())->getBitWidth() > SliceSize * 8)
3261 V = IRB.CreateZExt(V, LI.getType());
3262 return V;
3263 }
3264
3265 bool visitLoadInst(LoadInst &LI) {
3266 LLVM_DEBUG(dbgs() << " original: " << LI << "\n");
3267 Value *OldOp = LI.getOperand(0);
3268 assert(OldOp == OldPtr);
3269
3270 AAMDNodes AATags = LI.getAAMetadata();
3271
3272 unsigned AS = LI.getPointerAddressSpace();
3273
3274 Type *TargetTy = IsSplit ? Type::getIntNTy(LI.getContext(), SliceSize * 8)
3275 : LI.getType();
3276 bool IsPtrAdjusted = false;
3277 Value *V;
3278 if (VecTy) {
3279 V = rewriteVectorizedLoadInst(LI);
3280 } else if (IntTy && LI.getType()->isIntegerTy()) {
3281 V = rewriteIntegerLoad(LI);
3282 } else if (NewBeginOffset == NewAllocaBeginOffset &&
3283 NewEndOffset == NewAllocaEndOffset &&
3284 (canConvertValue(DL, NewAllocaTy, TargetTy) ||
3285 (NewAllocaTy->isIntegerTy() && TargetTy->isIntegerTy() &&
3286 DL.getTypeStoreSize(TargetTy).getFixedValue() > SliceSize &&
3287 !LI.isVolatile()))) {
3288 Value *NewPtr =
3289 getPtrToNewAI(LI.getPointerAddressSpace(), LI.isVolatile());
3290 LoadInst *NewLI = IRB.CreateAlignedLoad(
3291 NewAllocaTy, NewPtr, NewAI.getAlign(), LI.isVolatile(), LI.getName());
3292 if (LI.isVolatile())
3293 NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
3294 if (NewLI->isAtomic())
3295 NewLI->setAlignment(LI.getAlign());
3296
3297 // Copy any metadata that is valid for the new load. This may require
3298 // conversion to a different kind of metadata, e.g. !nonnull might change
3299 // to !range or vice versa.
3300 copyMetadataForLoad(*NewLI, LI);
3301
3302 // Do this after copyMetadataForLoad() to preserve the TBAA shift.
3303 if (AATags)
3304 NewLI->setAAMetadata(AATags.adjustForAccess(
3305 NewBeginOffset - BeginOffset, NewLI->getType(), DL));
3306
3307 // Try to preserve nonnull metadata
3308 V = NewLI;
3309
3310 // If this is an integer load past the end of the slice (which means the
3311 // bytes outside the slice are undef or this load is dead) just forcibly
3312 // fix the integer size with correct handling of endianness.
3313 if (auto *AITy = dyn_cast<IntegerType>(NewAllocaTy))
3314 if (auto *TITy = dyn_cast<IntegerType>(TargetTy))
3315 if (AITy->getBitWidth() < TITy->getBitWidth()) {
3316 V = IRB.CreateZExt(V, TITy, "load.ext");
3317 if (DL.isBigEndian())
3318 V = IRB.CreateShl(V, TITy->getBitWidth() - AITy->getBitWidth(),
3319 "endian_shift");
3320 }
3321 } else {
3322 Type *LTy = IRB.getPtrTy(AS);
3323 LoadInst *NewLI =
3324 IRB.CreateAlignedLoad(TargetTy, getNewAllocaSlicePtr(IRB, LTy),
3325 getSliceAlign(), LI.isVolatile(), LI.getName());
3326
3327 if (AATags)
3328 NewLI->setAAMetadata(AATags.adjustForAccess(
3329 NewBeginOffset - BeginOffset, NewLI->getType(), DL));
3330
3331 if (LI.isVolatile())
3332 NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
3333 NewLI->copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access,
3334 LLVMContext::MD_access_group});
3335
3336 V = NewLI;
3337 IsPtrAdjusted = true;
3338 }
3339 V = convertValue(DL, IRB, V, TargetTy);
3340
3341 if (IsSplit) {
3342 assert(!LI.isVolatile());
3343 assert(LI.getType()->isIntegerTy() &&
3344 "Only integer type loads and stores are split");
3345 assert(SliceSize < DL.getTypeStoreSize(LI.getType()).getFixedValue() &&
3346 "Split load isn't smaller than original load");
3347 assert(DL.typeSizeEqualsStoreSize(LI.getType()) &&
3348 "Non-byte-multiple bit width");
3349 // Move the insertion point just past the load so that we can refer to it.
3350 BasicBlock::iterator LIIt = std::next(LI.getIterator());
3351 // Ensure the insertion point comes before any debug-info immediately
3352 // after the load, so that variable values referring to the load are
3353 // dominated by it.
3354 LIIt.setHeadBit(true);
3355 IRB.SetInsertPoint(LI.getParent(), LIIt);
3356 // Create a placeholder value with the same type as LI to use as the
3357 // basis for the new value. This allows us to replace the uses of LI with
3358 // the computed value, and then replace the placeholder with LI, leaving
3359 // LI only used for this computation.
3360 Value *Placeholder =
3361 new LoadInst(LI.getType(), PoisonValue::get(IRB.getPtrTy(AS)), "",
3362 false, Align(1));
3363 V = insertInteger(DL, IRB, Placeholder, V, NewBeginOffset - BeginOffset,
3364 "insert");
3365 LI.replaceAllUsesWith(V);
3366 Placeholder->replaceAllUsesWith(&LI);
3367 Placeholder->deleteValue();
3368 } else {
3369 LI.replaceAllUsesWith(V);
3370 }
3371
3372 Pass.DeadInsts.push_back(&LI);
3373 deleteIfTriviallyDead(OldOp);
3374 LLVM_DEBUG(dbgs() << " to: " << *V << "\n");
3375 return !LI.isVolatile() && !IsPtrAdjusted;
3376 }
3377
3378 bool rewriteVectorizedStoreInst(Value *V, StoreInst &SI, Value *OldOp,
3379 AAMDNodes AATags) {
3380 // Capture V for the purpose of debug-info accounting once it's converted
3381 // to a vector store.
3382 Value *OrigV = V;
3383 if (V->getType() != VecTy) {
3384 unsigned BeginIndex = getIndex(NewBeginOffset);
3385 unsigned EndIndex = getIndex(NewEndOffset);
3386 assert(EndIndex > BeginIndex && "Empty vector!");
3387 unsigned NumElements = EndIndex - BeginIndex;
3388 assert(NumElements <= cast<FixedVectorType>(VecTy)->getNumElements() &&
3389 "Too many elements!");
3390 Type *SliceTy = (NumElements == 1)
3391 ? ElementTy
3392 : FixedVectorType::get(ElementTy, NumElements);
3393 if (V->getType() != SliceTy)
3394 V = convertValue(DL, IRB, V, SliceTy);
3395
3396 // Mix in the existing elements.
3397 Value *Old =
3398 IRB.CreateAlignedLoad(NewAllocaTy, &NewAI, NewAI.getAlign(), "load");
3399 V = insertVector(IRB, Old, V, BeginIndex, "vec");
3400 }
3401 StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlign());
3402 Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
3403 LLVMContext::MD_access_group});
3404 if (AATags)
3405 Store->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
3406 V->getType(), DL));
3407 Pass.DeadInsts.push_back(&SI);
3408
3409 // NOTE: Careful to use OrigV rather than V.
3410 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &SI,
3411 Store, Store->getPointerOperand(), OrigV, DL);
3412 LLVM_DEBUG(dbgs() << " to: " << *Store << "\n");
3413 return true;
3414 }
3415
3416 bool rewriteIntegerStore(Value *V, StoreInst &SI, AAMDNodes AATags) {
3417 assert(IntTy && "We cannot extract an integer from the alloca");
3418 assert(!SI.isVolatile());
3419 if (DL.getTypeSizeInBits(V->getType()).getFixedValue() !=
3420 IntTy->getBitWidth()) {
3421 Value *Old = IRB.CreateAlignedLoad(NewAllocaTy, &NewAI, NewAI.getAlign(),
3422 "oldload");
3423 Old = convertValue(DL, IRB, Old, IntTy);
3424 assert(BeginOffset >= NewAllocaBeginOffset && "Out of bounds offset");
3425 uint64_t Offset = BeginOffset - NewAllocaBeginOffset;
3426 V = insertInteger(DL, IRB, Old, SI.getValueOperand(), Offset, "insert");
3427 }
3428 V = convertValue(DL, IRB, V, NewAllocaTy);
3429 StoreInst *Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.getAlign());
3430 Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
3431 LLVMContext::MD_access_group});
3432 if (AATags)
3433 Store->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
3434 V->getType(), DL));
3435
3436 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &SI,
3437 Store, Store->getPointerOperand(),
3438 Store->getValueOperand(), DL);
3439
3440 Pass.DeadInsts.push_back(&SI);
3441 LLVM_DEBUG(dbgs() << " to: " << *Store << "\n");
3442 return true;
3443 }
3444
3445 bool visitStoreInst(StoreInst &SI) {
3446 LLVM_DEBUG(dbgs() << " original: " << SI << "\n");
3447 Value *OldOp = SI.getOperand(1);
3448 assert(OldOp == OldPtr);
3449
3450 AAMDNodes AATags = SI.getAAMetadata();
3451 Value *V = SI.getValueOperand();
3452
3453 // Strip all inbounds GEPs and pointer casts to try to dig out any root
3454 // alloca that should be re-examined after promoting this alloca.
3455 if (V->getType()->isPointerTy())
3456 if (AllocaInst *AI = dyn_cast<AllocaInst>(V->stripInBoundsOffsets()))
3457 Pass.PostPromotionWorklist.insert(AI);
3458
3459 TypeSize StoreSize = DL.getTypeStoreSize(V->getType());
3460 if (StoreSize.isFixed() && SliceSize < StoreSize.getFixedValue()) {
3461 assert(!SI.isVolatile());
3462 assert(V->getType()->isIntegerTy() &&
3463 "Only integer type loads and stores are split");
3464 assert(DL.typeSizeEqualsStoreSize(V->getType()) &&
3465 "Non-byte-multiple bit width");
3466 IntegerType *NarrowTy = Type::getIntNTy(SI.getContext(), SliceSize * 8);
3467 V = extractInteger(DL, IRB, V, NarrowTy, NewBeginOffset - BeginOffset,
3468 "extract");
3469 }
3470
3471 if (VecTy)
3472 return rewriteVectorizedStoreInst(V, SI, OldOp, AATags);
3473 if (IntTy && V->getType()->isIntegerTy())
3474 return rewriteIntegerStore(V, SI, AATags);
3475
3476 StoreInst *NewSI;
3477 if (NewBeginOffset == NewAllocaBeginOffset &&
3478 NewEndOffset == NewAllocaEndOffset &&
3479 canConvertValue(DL, V->getType(), NewAllocaTy)) {
3480 V = convertValue(DL, IRB, V, NewAllocaTy);
3481 Value *NewPtr =
3482 getPtrToNewAI(SI.getPointerAddressSpace(), SI.isVolatile());
3483
3484 NewSI =
3485 IRB.CreateAlignedStore(V, NewPtr, NewAI.getAlign(), SI.isVolatile());
3486 } else {
3487 unsigned AS = SI.getPointerAddressSpace();
3488 Value *NewPtr = getNewAllocaSlicePtr(IRB, IRB.getPtrTy(AS));
3489 NewSI =
3490 IRB.CreateAlignedStore(V, NewPtr, getSliceAlign(), SI.isVolatile());
3491 }
3492 NewSI->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
3493 LLVMContext::MD_access_group});
3494 if (AATags)
3495 NewSI->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
3496 V->getType(), DL));
3497 if (SI.isVolatile())
3498 NewSI->setAtomic(SI.getOrdering(), SI.getSyncScopeID());
3499 if (NewSI->isAtomic())
3500 NewSI->setAlignment(SI.getAlign());
3501
3502 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &SI,
3503 NewSI, NewSI->getPointerOperand(),
3504 NewSI->getValueOperand(), DL);
3505
3506 Pass.DeadInsts.push_back(&SI);
3507 deleteIfTriviallyDead(OldOp);
3508
3509 LLVM_DEBUG(dbgs() << " to: " << *NewSI << "\n");
3510 return NewSI->getPointerOperand() == &NewAI &&
3511 NewSI->getValueOperand()->getType() == NewAllocaTy &&
3512 !SI.isVolatile();
3513 }
3514
3515 /// Compute an integer value from splatting an i8 across the given
3516 /// number of bytes.
3517 ///
3518 /// Note that this routine assumes an i8 is a byte. If that isn't true, don't
3519 /// call this routine.
3520 /// FIXME: Heed the advice above.
3521 ///
3522 /// \param V The i8 value to splat.
3523 /// \param Size The number of bytes in the output (assuming i8 is one byte)
3524 Value *getIntegerSplat(Value *V, unsigned Size) {
3525 assert(Size > 0 && "Expected a positive number of bytes.");
3526 IntegerType *VTy = cast<IntegerType>(V->getType());
3527 assert(VTy->getBitWidth() == 8 && "Expected an i8 value for the byte");
3528 if (Size == 1)
3529 return V;
3530
3531 Type *SplatIntTy = Type::getIntNTy(VTy->getContext(), Size * 8);
3532 V = IRB.CreateMul(
3533 IRB.CreateZExt(V, SplatIntTy, "zext"),
3534 IRB.CreateUDiv(Constant::getAllOnesValue(SplatIntTy),
3535 IRB.CreateZExt(Constant::getAllOnesValue(V->getType()),
3536 SplatIntTy)),
3537 "isplat");
3538 return V;
3539 }
3540
3541 /// Compute a vector splat for a given element value.
3542 Value *getVectorSplat(Value *V, unsigned NumElements) {
3543 V = IRB.CreateVectorSplat(NumElements, V, "vsplat");
3544 LLVM_DEBUG(dbgs() << " splat: " << *V << "\n");
3545 return V;
3546 }
3547
3548 bool visitMemSetInst(MemSetInst &II) {
3549 LLVM_DEBUG(dbgs() << " original: " << II << "\n");
3550 assert(II.getRawDest() == OldPtr);
3551
3552 AAMDNodes AATags = II.getAAMetadata();
3553
3554 // If the memset has a variable size, it cannot be split, just adjust the
3555 // pointer to the new alloca.
3556 if (!isa<ConstantInt>(II.getLength())) {
3557 assert(!IsSplit);
3558 assert(NewBeginOffset == BeginOffset);
3559 II.setDest(getNewAllocaSlicePtr(IRB, OldPtr->getType()));
3560 II.setDestAlignment(getSliceAlign());
3561 // In theory we should call migrateDebugInfo here. However, we do not
3562 // emit dbg.assign intrinsics for mem intrinsics storing through non-
3563 // constant geps, or storing a variable number of bytes.
3565 "AT: Unexpected link to non-const GEP");
3566 deleteIfTriviallyDead(OldPtr);
3567 return false;
3568 }
3569
3570 // Record this instruction for deletion.
3571 Pass.DeadInsts.push_back(&II);
3572
3573 Type *ScalarTy = NewAllocaTy->getScalarType();
3574
3575 const bool CanContinue = [&]() {
3576 if (VecTy || IntTy)
3577 return true;
3578 if (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset)
3579 return false;
3580 // Length must be in range for FixedVectorType.
3581 auto *C = cast<ConstantInt>(II.getLength());
3582 const uint64_t Len = C->getLimitedValue();
3583 if (Len > std::numeric_limits<unsigned>::max())
3584 return false;
3585 auto *Int8Ty = IntegerType::getInt8Ty(NewAI.getContext());
3586 auto *SrcTy = FixedVectorType::get(Int8Ty, Len);
3587 return canConvertValue(DL, SrcTy, NewAllocaTy) &&
3588 DL.isLegalInteger(DL.getTypeSizeInBits(ScalarTy).getFixedValue());
3589 }();
3590
3591 // If this doesn't map cleanly onto the alloca type, and that type isn't
3592 // a single value type, just emit a memset.
3593 if (!CanContinue) {
3594 Type *SizeTy = II.getLength()->getType();
3595 unsigned Sz = NewEndOffset - NewBeginOffset;
3596 Constant *Size = ConstantInt::get(SizeTy, Sz);
3597 MemIntrinsic *New = cast<MemIntrinsic>(IRB.CreateMemSet(
3598 getNewAllocaSlicePtr(IRB, OldPtr->getType()), II.getValue(), Size,
3599 MaybeAlign(getSliceAlign()), II.isVolatile()));
3600 if (AATags)
3601 New->setAAMetadata(
3602 AATags.adjustForAccess(NewBeginOffset - BeginOffset, Sz));
3603
3604 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &II,
3605 New, New->getRawDest(), nullptr, DL);
3606
3607 LLVM_DEBUG(dbgs() << " to: " << *New << "\n");
3608 return false;
3609 }
3610
3611 // If we can represent this as a simple value, we have to build the actual
3612 // value to store, which requires expanding the byte present in memset to
3613 // a sensible representation for the alloca type. This is essentially
3614 // splatting the byte to a sufficiently wide integer, splatting it across
3615 // any desired vector width, and bitcasting to the final type.
3616 Value *V;
3617
3618 if (VecTy) {
3619 // If this is a memset of a vectorized alloca, insert it.
3620 assert(ElementTy == ScalarTy);
3621
3622 unsigned BeginIndex = getIndex(NewBeginOffset);
3623 unsigned EndIndex = getIndex(NewEndOffset);
3624 assert(EndIndex > BeginIndex && "Empty vector!");
3625 unsigned NumElements = EndIndex - BeginIndex;
3626 assert(NumElements <= cast<FixedVectorType>(VecTy)->getNumElements() &&
3627 "Too many elements!");
3628
3629 Value *Splat = getIntegerSplat(
3630 II.getValue(), DL.getTypeSizeInBits(ElementTy).getFixedValue() / 8);
3631 Splat = convertValue(DL, IRB, Splat, ElementTy);
3632 if (NumElements > 1)
3633 Splat = getVectorSplat(Splat, NumElements);
3634
3635 Value *Old = IRB.CreateAlignedLoad(NewAllocaTy, &NewAI, NewAI.getAlign(),
3636 "oldload");
3637 V = insertVector(IRB, Old, Splat, BeginIndex, "vec");
3638 } else if (IntTy) {
3639 // If this is a memset on an alloca where we can widen stores, insert the
3640 // set integer.
3641 assert(!II.isVolatile());
3642
3643 uint64_t Size = NewEndOffset - NewBeginOffset;
3644 V = getIntegerSplat(II.getValue(), Size);
3645
3646 if (IntTy && (BeginOffset != NewAllocaBeginOffset ||
3647 EndOffset != NewAllocaBeginOffset)) {
3648 Value *Old = IRB.CreateAlignedLoad(NewAllocaTy, &NewAI,
3649 NewAI.getAlign(), "oldload");
3650 Old = convertValue(DL, IRB, Old, IntTy);
3651 uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
3652 V = insertInteger(DL, IRB, Old, V, Offset, "insert");
3653 } else {
3654 assert(V->getType() == IntTy &&
3655 "Wrong type for an alloca wide integer!");
3656 }
3657 V = convertValue(DL, IRB, V, NewAllocaTy);
3658 } else {
3659 // Established these invariants above.
3660 assert(NewBeginOffset == NewAllocaBeginOffset);
3661 assert(NewEndOffset == NewAllocaEndOffset);
3662
3663 V = getIntegerSplat(II.getValue(),
3664 DL.getTypeSizeInBits(ScalarTy).getFixedValue() / 8);
3665 if (VectorType *AllocaVecTy = dyn_cast<VectorType>(NewAllocaTy))
3666 V = getVectorSplat(
3667 V, cast<FixedVectorType>(AllocaVecTy)->getNumElements());
3668
3669 V = convertValue(DL, IRB, V, NewAllocaTy);
3670 }
3671
3672 Value *NewPtr = getPtrToNewAI(II.getDestAddressSpace(), II.isVolatile());
3673 StoreInst *New =
3674 IRB.CreateAlignedStore(V, NewPtr, NewAI.getAlign(), II.isVolatile());
3675 New->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
3676 LLVMContext::MD_access_group});
3677 if (AATags)
3678 New->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
3679 V->getType(), DL));
3680
3681 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &II,
3682 New, New->getPointerOperand(), V, DL);
3683
3684 LLVM_DEBUG(dbgs() << " to: " << *New << "\n");
3685 return !II.isVolatile();
3686 }
3687
3688 bool visitMemTransferInst(MemTransferInst &II) {
3689 // Rewriting of memory transfer instructions can be a bit tricky. We break
3690 // them into two categories: split intrinsics and unsplit intrinsics.
3691
3692 LLVM_DEBUG(dbgs() << " original: " << II << "\n");
3693
3694 AAMDNodes AATags = II.getAAMetadata();
3695
3696 bool IsDest = &II.getRawDestUse() == OldUse;
3697 assert((IsDest && II.getRawDest() == OldPtr) ||
3698 (!IsDest && II.getRawSource() == OldPtr));
3699
3700 Align SliceAlign = getSliceAlign();
3701 // For unsplit intrinsics, we simply modify the source and destination
3702 // pointers in place. This isn't just an optimization, it is a matter of
3703 // correctness. With unsplit intrinsics we may be dealing with transfers
3704 // within a single alloca before SROA ran, or with transfers that have
3705 // a variable length. We may also be dealing with memmove instead of
3706 // memcpy, and so simply updating the pointers is the necessary for us to
3707 // update both source and dest of a single call.
3708 if (!IsSplittable) {
3709 Value *AdjustedPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
3710 if (IsDest) {
3711 // Update the address component of linked dbg.assigns.
3712 for (DbgVariableRecord *DbgAssign : at::getDVRAssignmentMarkers(&II)) {
3713 if (llvm::is_contained(DbgAssign->location_ops(), II.getDest()) ||
3714 DbgAssign->getAddress() == II.getDest())
3715 DbgAssign->replaceVariableLocationOp(II.getDest(), AdjustedPtr);
3716 }
3717 II.setDest(AdjustedPtr);
3718 II.setDestAlignment(SliceAlign);
3719 } else {
3720 II.setSource(AdjustedPtr);
3721 II.setSourceAlignment(SliceAlign);
3722 }
3723
3724 LLVM_DEBUG(dbgs() << " to: " << II << "\n");
3725 deleteIfTriviallyDead(OldPtr);
3726 return false;
3727 }
3728 // For split transfer intrinsics we have an incredibly useful assurance:
3729 // the source and destination do not reside within the same alloca, and at
3730 // least one of them does not escape. This means that we can replace
3731 // memmove with memcpy, and we don't need to worry about all manner of
3732 // downsides to splitting and transforming the operations.
3733
3734 // If this doesn't map cleanly onto the alloca type, and that type isn't
3735 // a single value type, just emit a memcpy.
3736 bool EmitMemCpy =
3737 !VecTy && !IntTy &&
3738 (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset ||
3739 SliceSize != DL.getTypeStoreSize(NewAllocaTy).getFixedValue() ||
3740 !DL.typeSizeEqualsStoreSize(NewAllocaTy) ||
3741 !NewAllocaTy->isSingleValueType());
3742
3743 // If we're just going to emit a memcpy, the alloca hasn't changed, and the
3744 // size hasn't been shrunk based on analysis of the viable range, this is
3745 // a no-op.
3746 if (EmitMemCpy && &OldAI == &NewAI) {
3747 // Ensure the start lines up.
3748 assert(NewBeginOffset == BeginOffset);
3749
3750 // Rewrite the size as needed.
3751 if (NewEndOffset != EndOffset)
3752 II.setLength(NewEndOffset - NewBeginOffset);
3753 return false;
3754 }
3755 // Record this instruction for deletion.
3756 Pass.DeadInsts.push_back(&II);
3757
3758 // Strip all inbounds GEPs and pointer casts to try to dig out any root
3759 // alloca that should be re-examined after rewriting this instruction.
3760 Value *OtherPtr = IsDest ? II.getRawSource() : II.getRawDest();
3761 if (AllocaInst *AI =
3763 assert(AI != &OldAI && AI != &NewAI &&
3764 "Splittable transfers cannot reach the same alloca on both ends.");
3765 Pass.Worklist.insert(AI);
3766 }
3767
3768 Type *OtherPtrTy = OtherPtr->getType();
3769 unsigned OtherAS = OtherPtrTy->getPointerAddressSpace();
3770
3771 // Compute the relative offset for the other pointer within the transfer.
3772 unsigned OffsetWidth = DL.getIndexSizeInBits(OtherAS);
3773 APInt OtherOffset(OffsetWidth, NewBeginOffset - BeginOffset);
3774 Align OtherAlign =
3775 (IsDest ? II.getSourceAlign() : II.getDestAlign()).valueOrOne();
3776 OtherAlign =
3777 commonAlignment(OtherAlign, OtherOffset.zextOrTrunc(64).getZExtValue());
3778
3779 if (EmitMemCpy) {
3780 // Compute the other pointer, folding as much as possible to produce
3781 // a single, simple GEP in most cases.
3782 OtherPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy,
3783 OtherPtr->getName() + ".");
3784
3785 Value *OurPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
3786 Type *SizeTy = II.getLength()->getType();
3787 Constant *Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset);
3788
3789 Value *DestPtr, *SrcPtr;
3790 MaybeAlign DestAlign, SrcAlign;
3791 // Note: IsDest is true iff we're copying into the new alloca slice
3792 if (IsDest) {
3793 DestPtr = OurPtr;
3794 DestAlign = SliceAlign;
3795 SrcPtr = OtherPtr;
3796 SrcAlign = OtherAlign;
3797 } else {
3798 DestPtr = OtherPtr;
3799 DestAlign = OtherAlign;
3800 SrcPtr = OurPtr;
3801 SrcAlign = SliceAlign;
3802 }
3803 CallInst *New = IRB.CreateMemCpy(DestPtr, DestAlign, SrcPtr, SrcAlign,
3804 Size, II.isVolatile());
3805 if (AATags)
3806 New->setAAMetadata(AATags.shift(NewBeginOffset - BeginOffset));
3807
3808 APInt Offset(DL.getIndexTypeSizeInBits(DestPtr->getType()), 0);
3809 if (IsDest) {
3810 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8,
3811 &II, New, DestPtr, nullptr, DL);
3812 } else if (AllocaInst *Base = dyn_cast<AllocaInst>(
3814 DL, Offset, /*AllowNonInbounds*/ true))) {
3815 migrateDebugInfo(Base, IsSplit, Offset.getZExtValue() * 8,
3816 SliceSize * 8, &II, New, DestPtr, nullptr, DL);
3817 }
3818 LLVM_DEBUG(dbgs() << " to: " << *New << "\n");
3819 return false;
3820 }
3821
3822 bool IsWholeAlloca = NewBeginOffset == NewAllocaBeginOffset &&
3823 NewEndOffset == NewAllocaEndOffset;
3824 uint64_t Size = NewEndOffset - NewBeginOffset;
3825 unsigned BeginIndex = VecTy ? getIndex(NewBeginOffset) : 0;
3826 unsigned EndIndex = VecTy ? getIndex(NewEndOffset) : 0;
3827 unsigned NumElements = EndIndex - BeginIndex;
3828 IntegerType *SubIntTy =
3829 IntTy ? Type::getIntNTy(IntTy->getContext(), Size * 8) : nullptr;
3830
3831 // Reset the other pointer type to match the register type we're going to
3832 // use, but using the address space of the original other pointer.
3833 Type *OtherTy;
3834 if (VecTy && !IsWholeAlloca) {
3835 if (NumElements == 1)
3836 OtherTy = VecTy->getElementType();
3837 else
3838 OtherTy = FixedVectorType::get(VecTy->getElementType(), NumElements);
3839 } else if (IntTy && !IsWholeAlloca) {
3840 OtherTy = SubIntTy;
3841 } else {
3842 OtherTy = NewAllocaTy;
3843 }
3844
3845 Value *AdjPtr = getAdjustedPtr(IRB, DL, OtherPtr, OtherOffset, OtherPtrTy,
3846 OtherPtr->getName() + ".");
3847 MaybeAlign SrcAlign = OtherAlign;
3848 MaybeAlign DstAlign = SliceAlign;
3849 if (!IsDest)
3850 std::swap(SrcAlign, DstAlign);
3851
3852 Value *SrcPtr;
3853 Value *DstPtr;
3854
3855 if (IsDest) {
3856 DstPtr = getPtrToNewAI(II.getDestAddressSpace(), II.isVolatile());
3857 SrcPtr = AdjPtr;
3858 } else {
3859 DstPtr = AdjPtr;
3860 SrcPtr = getPtrToNewAI(II.getSourceAddressSpace(), II.isVolatile());
3861 }
3862
3863 Value *Src;
3864 if (VecTy && !IsWholeAlloca && !IsDest) {
3865 Src =
3866 IRB.CreateAlignedLoad(NewAllocaTy, &NewAI, NewAI.getAlign(), "load");
3867 Src = extractVector(IRB, Src, BeginIndex, EndIndex, "vec");
3868 } else if (IntTy && !IsWholeAlloca && !IsDest) {
3869 Src =
3870 IRB.CreateAlignedLoad(NewAllocaTy, &NewAI, NewAI.getAlign(), "load");
3871 Src = convertValue(DL, IRB, Src, IntTy);
3872 uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
3873 Src = extractInteger(DL, IRB, Src, SubIntTy, Offset, "extract");
3874 } else {
3875 LoadInst *Load = IRB.CreateAlignedLoad(OtherTy, SrcPtr, SrcAlign,
3876 II.isVolatile(), "copyload");
3877 Load->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
3878 LLVMContext::MD_access_group});
3879 if (AATags)
3880 Load->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
3881 Load->getType(), DL));
3882 Src = Load;
3883 }
3884
3885 if (VecTy && !IsWholeAlloca && IsDest) {
3886 Value *Old = IRB.CreateAlignedLoad(NewAllocaTy, &NewAI, NewAI.getAlign(),
3887 "oldload");
3888 Src = insertVector(IRB, Old, Src, BeginIndex, "vec");
3889 } else if (IntTy && !IsWholeAlloca && IsDest) {
3890 Value *Old = IRB.CreateAlignedLoad(NewAllocaTy, &NewAI, NewAI.getAlign(),
3891 "oldload");
3892 Old = convertValue(DL, IRB, Old, IntTy);
3893 uint64_t Offset = NewBeginOffset - NewAllocaBeginOffset;
3894 Src = insertInteger(DL, IRB, Old, Src, Offset, "insert");
3895 Src = convertValue(DL, IRB, Src, NewAllocaTy);
3896 }
3897
3898 StoreInst *Store = cast<StoreInst>(
3899 IRB.CreateAlignedStore(Src, DstPtr, DstAlign, II.isVolatile()));
3900 Store->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
3901 LLVMContext::MD_access_group});
3902 if (AATags)
3903 Store->setAAMetadata(AATags.adjustForAccess(NewBeginOffset - BeginOffset,
3904 Src->getType(), DL));
3905
3906 APInt Offset(DL.getIndexTypeSizeInBits(DstPtr->getType()), 0);
3907 if (IsDest) {
3908
3909 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &II,
3910 Store, DstPtr, Src, DL);
3911 } else if (AllocaInst *Base = dyn_cast<AllocaInst>(
3913 DL, Offset, /*AllowNonInbounds*/ true))) {
3914 migrateDebugInfo(Base, IsSplit, Offset.getZExtValue() * 8, SliceSize * 8,
3915 &II, Store, DstPtr, Src, DL);
3916 }
3917
3918 LLVM_DEBUG(dbgs() << " to: " << *Store << "\n");
3919 return !II.isVolatile();
3920 }
3921
3922 bool visitIntrinsicInst(IntrinsicInst &II) {
3923 assert((II.isLifetimeStartOrEnd() || II.isDroppable()) &&
3924 "Unexpected intrinsic!");
3925 LLVM_DEBUG(dbgs() << " original: " << II << "\n");
3926
3927 // Record this instruction for deletion.
3928 Pass.DeadInsts.push_back(&II);
3929
3930 if (II.isDroppable()) {
3931 assert(II.getIntrinsicID() == Intrinsic::assume && "Expected assume");
3932 // TODO For now we forget assumed information, this can be improved.
3933 OldPtr->dropDroppableUsesIn(II);
3934 return true;
3935 }
3936
3937 assert(II.getArgOperand(0) == OldPtr);
3938 Type *PointerTy = IRB.getPtrTy(OldPtr->getType()->getPointerAddressSpace());
3939 Value *Ptr = getNewAllocaSlicePtr(IRB, PointerTy);
3940 Value *New;
3941 if (II.getIntrinsicID() == Intrinsic::lifetime_start)
3942 New = IRB.CreateLifetimeStart(Ptr);
3943 else
3944 New = IRB.CreateLifetimeEnd(Ptr);
3945
3946 (void)New;
3947 LLVM_DEBUG(dbgs() << " to: " << *New << "\n");
3948
3949 return true;
3950 }
3951
3952 void fixLoadStoreAlign(Instruction &Root) {
3953 // This algorithm implements the same visitor loop as
3954 // hasUnsafePHIOrSelectUse, and fixes the alignment of each load
3955 // or store found.
3956 SmallPtrSet<Instruction *, 4> Visited;
3957 SmallVector<Instruction *, 4> Uses;
3958 Visited.insert(&Root);
3959 Uses.push_back(&Root);
3960 do {
3961 Instruction *I = Uses.pop_back_val();
3962
3963 if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
3964 LI->setAlignment(std::min(LI->getAlign(), getSliceAlign()));
3965 continue;
3966 }
3967 if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
3968 SI->setAlignment(std::min(SI->getAlign(), getSliceAlign()));
3969 continue;
3970 }
3971
3975 for (User *U : I->users())
3976 if (Visited.insert(cast<Instruction>(U)).second)
3977 Uses.push_back(cast<Instruction>(U));
3978 } while (!Uses.empty());
3979 }
3980
3981 bool visitPHINode(PHINode &PN) {
3982 LLVM_DEBUG(dbgs() << " original: " << PN << "\n");
3983 assert(BeginOffset >= NewAllocaBeginOffset && "PHIs are unsplittable");
3984 assert(EndOffset <= NewAllocaEndOffset && "PHIs are unsplittable");
3985
3986 // We would like to compute a new pointer in only one place, but have it be
3987 // as local as possible to the PHI. To do that, we re-use the location of
3988 // the old pointer, which necessarily must be in the right position to
3989 // dominate the PHI.
3990 IRBuilderBase::InsertPointGuard Guard(IRB);
3991 if (isa<PHINode>(OldPtr))
3992 IRB.SetInsertPoint(OldPtr->getParent(),
3993 OldPtr->getParent()->getFirstInsertionPt());
3994 else
3995 IRB.SetInsertPoint(OldPtr);
3996 IRB.SetCurrentDebugLocation(OldPtr->getDebugLoc());
3997
3998 Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
3999 // Replace the operands which were using the old pointer.
4000 std::replace(PN.op_begin(), PN.op_end(), cast<Value>(OldPtr), NewPtr);
4001
4002 LLVM_DEBUG(dbgs() << " to: " << PN << "\n");
4003 deleteIfTriviallyDead(OldPtr);
4004
4005 // Fix the alignment of any loads or stores using this PHI node.
4006 fixLoadStoreAlign(PN);
4007
4008 // PHIs can't be promoted on their own, but often can be speculated. We
4009 // check the speculation outside of the rewriter so that we see the
4010 // fully-rewritten alloca.
4011 PHIUsers.insert(&PN);
4012 return true;
4013 }
4014
4015 bool visitSelectInst(SelectInst &SI) {
4016 LLVM_DEBUG(dbgs() << " original: " << SI << "\n");
4017 assert((SI.getTrueValue() == OldPtr || SI.getFalseValue() == OldPtr) &&
4018 "Pointer isn't an operand!");
4019 assert(BeginOffset >= NewAllocaBeginOffset && "Selects are unsplittable");
4020 assert(EndOffset <= NewAllocaEndOffset && "Selects are unsplittable");
4021
4022 Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->getType());
4023 // Replace the operands which were using the old pointer.
4024 if (SI.getOperand(1) == OldPtr)
4025 SI.setOperand(1, NewPtr);
4026 if (SI.getOperand(2) == OldPtr)
4027 SI.setOperand(2, NewPtr);
4028
4029 LLVM_DEBUG(dbgs() << " to: " << SI << "\n");
4030 deleteIfTriviallyDead(OldPtr);
4031
4032 // Fix the alignment of any loads or stores using this select.
4033 fixLoadStoreAlign(SI);
4034
4035 // Selects can't be promoted on their own, but often can be speculated. We
4036 // check the speculation outside of the rewriter so that we see the
4037 // fully-rewritten alloca.
4038 SelectUsers.insert(&SI);
4039 return true;
4040 }
4041};
4042
4043/// Visitor to rewrite aggregate loads and stores as scalar.
4044///
4045/// This pass aggressively rewrites all aggregate loads and stores on
4046/// a particular pointer (or any pointer derived from it which we can identify)
4047/// with scalar loads and stores.
4048class AggLoadStoreRewriter : public InstVisitor<AggLoadStoreRewriter, bool> {
4049 // Befriend the base class so it can delegate to private visit methods.
4050 friend class InstVisitor<AggLoadStoreRewriter, bool>;
4051
4052 /// Queue of pointer uses to analyze and potentially rewrite.
4054
4055 /// Set to prevent us from cycling with phi nodes and loops.
4056 SmallPtrSet<User *, 8> Visited;
4057
4058 /// The current pointer use being rewritten. This is used to dig up the used
4059 /// value (as opposed to the user).
4060 Use *U = nullptr;
4061
4062 /// Used to calculate offsets, and hence alignment, of subobjects.
4063 const DataLayout &DL;
4064
4065 IRBuilderTy &IRB;
4066
4067public:
4068 AggLoadStoreRewriter(const DataLayout &DL, IRBuilderTy &IRB)
4069 : DL(DL), IRB(IRB) {}
4070
4071 /// Rewrite loads and stores through a pointer and all pointers derived from
4072 /// it.
4073 bool rewrite(Instruction &I) {
4074 LLVM_DEBUG(dbgs() << " Rewriting FCA loads and stores...\n");
4075 enqueueUsers(I);
4076 bool Changed = false;
4077 while (!Queue.empty()) {
4078 U = Queue.pop_back_val();
4079 Changed |= visit(cast<Instruction>(U->getUser()));
4080 }
4081 return Changed;
4082 }
4083
4084private:
4085 /// Enqueue all the users of the given instruction for further processing.
4086 /// This uses a set to de-duplicate users.
4087 void enqueueUsers(Instruction &I) {
4088 for (Use &U : I.uses())
4089 if (Visited.insert(U.getUser()).second)
4090 Queue.push_back(&U);
4091 }
4092
4093 // Conservative default is to not rewrite anything.
4094 bool visitInstruction(Instruction &I) { return false; }
4095
4096 /// Generic recursive split emission class.
4097 template <typename Derived> class OpSplitter {
4098 protected:
4099 /// The builder used to form new instructions.
4100 IRBuilderTy &IRB;
4101
4102 /// The indices which to be used with insert- or extractvalue to select the
4103 /// appropriate value within the aggregate.
4104 SmallVector<unsigned, 4> Indices;
4105
4106 /// The indices to a GEP instruction which will move Ptr to the correct slot
4107 /// within the aggregate.
4108 SmallVector<Value *, 4> GEPIndices;
4109
4110 /// The base pointer of the original op, used as a base for GEPing the
4111 /// split operations.
4112 Value *Ptr;
4113
4114 /// The base pointee type being GEPed into.
4115 Type *BaseTy;
4116
4117 /// Known alignment of the base pointer.
4118 Align BaseAlign;
4119
4120 /// To calculate offset of each component so we can correctly deduce
4121 /// alignments.
4122 const DataLayout &DL;
4123
4124 /// Initialize the splitter with an insertion point, Ptr and start with a
4125 /// single zero GEP index.
4126 OpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy,
4127 Align BaseAlign, const DataLayout &DL, IRBuilderTy &IRB)
4128 : IRB(IRB), GEPIndices(1, IRB.getInt32(0)), Ptr(Ptr), BaseTy(BaseTy),
4129 BaseAlign(BaseAlign), DL(DL) {
4130 IRB.SetInsertPoint(InsertionPoint);
4131 }
4132
4133 public:
4134 /// Generic recursive split emission routine.
4135 ///
4136 /// This method recursively splits an aggregate op (load or store) into
4137 /// scalar or vector ops. It splits recursively until it hits a single value
4138 /// and emits that single value operation via the template argument.
4139 ///
4140 /// The logic of this routine relies on GEPs and insertvalue and
4141 /// extractvalue all operating with the same fundamental index list, merely
4142 /// formatted differently (GEPs need actual values).
4143 ///
4144 /// \param Ty The type being split recursively into smaller ops.
4145 /// \param Agg The aggregate value being built up or stored, depending on
4146 /// whether this is splitting a load or a store respectively.
4147 void emitSplitOps(Type *Ty, Value *&Agg, const Twine &Name) {
4148 if (Ty->isSingleValueType()) {
4149 unsigned Offset = DL.getIndexedOffsetInType(BaseTy, GEPIndices);
4150 return static_cast<Derived *>(this)->emitFunc(
4151 Ty, Agg, commonAlignment(BaseAlign, Offset), Name);
4152 }
4153
4154 if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
4155 unsigned OldSize = Indices.size();
4156 (void)OldSize;
4157 for (unsigned Idx = 0, Size = ATy->getNumElements(); Idx != Size;
4158 ++Idx) {
4159 assert(Indices.size() == OldSize && "Did not return to the old size");
4160 Indices.push_back(Idx);
4161 GEPIndices.push_back(IRB.getInt32(Idx));
4162 emitSplitOps(ATy->getElementType(), Agg, Name + "." + Twine(Idx));
4163 GEPIndices.pop_back();
4164 Indices.pop_back();
4165 }
4166 return;
4167 }
4168
4169 if (StructType *STy = dyn_cast<StructType>(Ty)) {
4170 unsigned OldSize = Indices.size();
4171 (void)OldSize;
4172 for (unsigned Idx = 0, Size = STy->getNumElements(); Idx != Size;
4173 ++Idx) {
4174 assert(Indices.size() == OldSize && "Did not return to the old size");
4175 Indices.push_back(Idx);
4176 GEPIndices.push_back(IRB.getInt32(Idx));
4177 emitSplitOps(STy->getElementType(Idx), Agg, Name + "." + Twine(Idx));
4178 GEPIndices.pop_back();
4179 Indices.pop_back();
4180 }
4181 return;
4182 }
4183
4184 llvm_unreachable("Only arrays and structs are aggregate loadable types");
4185 }
4186 };
4187
4188 struct LoadOpSplitter : public OpSplitter<LoadOpSplitter> {
4189 AAMDNodes AATags;
4190 // A vector to hold the split components that we want to emit
4191 // separate fake uses for.
4192 SmallVector<Value *, 4> Components;
4193 // A vector to hold all the fake uses of the struct that we are splitting.
4194 // Usually there should only be one, but we are handling the general case.
4196
4197 LoadOpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy,
4198 AAMDNodes AATags, Align BaseAlign, const DataLayout &DL,
4199 IRBuilderTy &IRB)
4200 : OpSplitter<LoadOpSplitter>(InsertionPoint, Ptr, BaseTy, BaseAlign, DL,
4201 IRB),
4202 AATags(AATags) {}
4203
4204 /// Emit a leaf load of a single value. This is called at the leaves of the
4205 /// recursive emission to actually load values.
4206 void emitFunc(Type *Ty, Value *&Agg, Align Alignment, const Twine &Name) {
4208 // Load the single value and insert it using the indices.
4209 Value *GEP =
4210 IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name + ".gep");
4211 LoadInst *Load =
4212 IRB.CreateAlignedLoad(Ty, GEP, Alignment, Name + ".load");
4213
4214 APInt Offset(
4215 DL.getIndexSizeInBits(Ptr->getType()->getPointerAddressSpace()), 0);
4216 if (AATags &&
4217 GEPOperator::accumulateConstantOffset(BaseTy, GEPIndices, DL, Offset))
4218 Load->setAAMetadata(
4219 AATags.adjustForAccess(Offset.getZExtValue(), Load->getType(), DL));
4220 // Record the load so we can generate a fake use for this aggregate
4221 // component.
4222 Components.push_back(Load);
4223
4224 Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name + ".insert");
4225 LLVM_DEBUG(dbgs() << " to: " << *Load << "\n");
4226 }
4227
4228 // Stash the fake uses that use the value generated by this instruction.
4229 void recordFakeUses(LoadInst &LI) {
4230 for (Use &U : LI.uses())
4231 if (auto *II = dyn_cast<IntrinsicInst>(U.getUser()))
4232 if (II->getIntrinsicID() == Intrinsic::fake_use)
4233 FakeUses.push_back(II);
4234 }
4235
4236 // Replace all fake uses of the aggregate with a series of fake uses, one
4237 // for each split component.
4238 void emitFakeUses() {
4239 for (Instruction *I : FakeUses) {
4240 IRB.SetInsertPoint(I);
4241 for (auto *V : Components)
4242 IRB.CreateIntrinsic(Intrinsic::fake_use, {V});
4243 I->eraseFromParent();
4244 }
4245 }
4246 };
4247
4248 bool visitLoadInst(LoadInst &LI) {
4249 assert(LI.getPointerOperand() == *U);
4250 if (!LI.isSimple() || LI.getType()->isSingleValueType())
4251 return false;
4252
4253 // We have an aggregate being loaded, split it apart.
4254 LLVM_DEBUG(dbgs() << " original: " << LI << "\n");
4255 LoadOpSplitter Splitter(&LI, *U, LI.getType(), LI.getAAMetadata(),
4256 getAdjustedAlignment(&LI, 0), DL, IRB);
4257 Splitter.recordFakeUses(LI);
4259 Splitter.emitSplitOps(LI.getType(), V, LI.getName() + ".fca");
4260 Splitter.emitFakeUses();
4261 Visited.erase(&LI);
4262 LI.replaceAllUsesWith(V);
4263 LI.eraseFromParent();
4264 return true;
4265 }
4266
4267 struct StoreOpSplitter : public OpSplitter<StoreOpSplitter> {
4268 StoreOpSplitter(Instruction *InsertionPoint, Value *Ptr, Type *BaseTy,
4269 AAMDNodes AATags, StoreInst *AggStore, Align BaseAlign,
4270 const DataLayout &DL, IRBuilderTy &IRB)
4271 : OpSplitter<StoreOpSplitter>(InsertionPoint, Ptr, BaseTy, BaseAlign,
4272 DL, IRB),
4273 AATags(AATags), AggStore(AggStore) {}
4274 AAMDNodes AATags;
4275 StoreInst *AggStore;
4276 /// Emit a leaf store of a single value. This is called at the leaves of the
4277 /// recursive emission to actually produce stores.
4278 void emitFunc(Type *Ty, Value *&Agg, Align Alignment, const Twine &Name) {
4280 // Extract the single value and store it using the indices.
4281 //
4282 // The gep and extractvalue values are factored out of the CreateStore
4283 // call to make the output independent of the argument evaluation order.
4284 Value *ExtractValue =
4285 IRB.CreateExtractValue(Agg, Indices, Name + ".extract");
4286 Value *InBoundsGEP =
4287 IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name + ".gep");
4288 StoreInst *Store =
4289 IRB.CreateAlignedStore(ExtractValue, InBoundsGEP, Alignment);
4290
4291 APInt Offset(
4292 DL.getIndexSizeInBits(Ptr->getType()->getPointerAddressSpace()), 0);
4293 GEPOperator::accumulateConstantOffset(BaseTy, GEPIndices, DL, Offset);
4294 if (AATags) {
4295 Store->setAAMetadata(AATags.adjustForAccess(
4296 Offset.getZExtValue(), ExtractValue->getType(), DL));
4297 }
4298
4299 // migrateDebugInfo requires the base Alloca. Walk to it from this gep.
4300 // If we cannot (because there's an intervening non-const or unbounded
4301 // gep) then we wouldn't expect to see dbg.assign intrinsics linked to
4302 // this instruction.
4304 if (auto *OldAI = dyn_cast<AllocaInst>(Base)) {
4305 uint64_t SizeInBits =
4306 DL.getTypeSizeInBits(Store->getValueOperand()->getType());
4307 migrateDebugInfo(OldAI, /*IsSplit*/ true, Offset.getZExtValue() * 8,
4308 SizeInBits, AggStore, Store,
4309 Store->getPointerOperand(), Store->getValueOperand(),
4310 DL);
4311 } else {
4313 "AT: unexpected debug.assign linked to store through "
4314 "unbounded GEP");
4315 }
4316 LLVM_DEBUG(dbgs() << " to: " << *Store << "\n");
4317 }
4318 };
4319
4320 bool visitStoreInst(StoreInst &SI) {
4321 if (!SI.isSimple() || SI.getPointerOperand() != *U)
4322 return false;
4323 Value *V = SI.getValueOperand();
4324 if (V->getType()->isSingleValueType())
4325 return false;
4326
4327 // We have an aggregate being stored, split it apart.
4328 LLVM_DEBUG(dbgs() << " original: " << SI << "\n");
4329 StoreOpSplitter Splitter(&SI, *U, V->getType(), SI.getAAMetadata(), &SI,
4330 getAdjustedAlignment(&SI, 0), DL, IRB);
4331 Splitter.emitSplitOps(V->getType(), V, V->getName() + ".fca");
4332 Visited.erase(&SI);
4333 // The stores replacing SI each have markers describing fragments of the
4334 // assignment so delete the assignment markers linked to SI.
4336 SI.eraseFromParent();
4337 return true;
4338 }
4339
4340 bool visitBitCastInst(BitCastInst &BC) {
4341 enqueueUsers(BC);
4342 return false;
4343 }
4344
4345 bool visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
4346 enqueueUsers(ASC);
4347 return false;
4348 }
4349
4350 // Unfold gep (select cond, ptr1, ptr2), idx
4351 // => select cond, gep(ptr1, idx), gep(ptr2, idx)
4352 // and gep ptr, (select cond, idx1, idx2)
4353 // => select cond, gep(ptr, idx1), gep(ptr, idx2)
4354 // We also allow for i1 zext indices, which are equivalent to selects.
4355 bool unfoldGEPSelect(GetElementPtrInst &GEPI) {
4356 // Check whether the GEP has exactly one select operand and all indices
4357 // will become constant after the transform.
4359 for (Value *Op : GEPI.indices()) {
4360 if (auto *SI = dyn_cast<SelectInst>(Op)) {
4361 if (Sel)
4362 return false;
4363
4364 Sel = SI;
4365 if (!isa<ConstantInt>(SI->getTrueValue()) ||
4366 !isa<ConstantInt>(SI->getFalseValue()))
4367 return false;
4368 continue;
4369 }
4370 if (auto *ZI = dyn_cast<ZExtInst>(Op)) {
4371 if (Sel)
4372 return false;
4373 Sel = ZI;
4374 if (!ZI->getSrcTy()->isIntegerTy(1))
4375 return false;
4376 continue;
4377 }
4378
4379 if (!isa<ConstantInt>(Op))
4380 return false;
4381 }
4382
4383 if (!Sel)
4384 return false;
4385
4386 LLVM_DEBUG(dbgs() << " Rewriting gep(select) -> select(gep):\n";
4387 dbgs() << " original: " << *Sel << "\n";
4388 dbgs() << " " << GEPI << "\n";);
4389
4390 auto GetNewOps = [&](Value *SelOp) {
4391 SmallVector<Value *> NewOps;
4392 for (Value *Op : GEPI.operands())
4393 if (Op == Sel)
4394 NewOps.push_back(SelOp);
4395 else
4396 NewOps.push_back(Op);
4397 return NewOps;
4398 };
4399
4400 Value *Cond, *True, *False;
4401 Instruction *MDFrom = nullptr;
4402 if (auto *SI = dyn_cast<SelectInst>(Sel)) {
4403 Cond = SI->getCondition();
4404 True = SI->getTrueValue();
4405 False = SI->getFalseValue();
4407 MDFrom = SI;
4408 } else {
4409 Cond = Sel->getOperand(0);
4410 True = ConstantInt::get(Sel->getType(), 1);
4411 False = ConstantInt::get(Sel->getType(), 0);
4412 }
4413 SmallVector<Value *> TrueOps = GetNewOps(True);
4414 SmallVector<Value *> FalseOps = GetNewOps(False);
4415
4416 IRB.SetInsertPoint(&GEPI);
4417 GEPNoWrapFlags NW = GEPI.getNoWrapFlags();
4418
4419 Type *Ty = GEPI.getSourceElementType();
4420 Value *NTrue = IRB.CreateGEP(Ty, TrueOps[0], ArrayRef(TrueOps).drop_front(),
4421 True->getName() + ".sroa.gep", NW);
4422
4423 Value *NFalse =
4424 IRB.CreateGEP(Ty, FalseOps[0], ArrayRef(FalseOps).drop_front(),
4425 False->getName() + ".sroa.gep", NW);
4426
4427 Value *NSel = MDFrom
4428 ? IRB.CreateSelect(Cond, NTrue, NFalse,
4429 Sel->getName() + ".sroa.sel", MDFrom)
4430 : IRB.CreateSelectWithUnknownProfile(
4431 Cond, NTrue, NFalse, DEBUG_TYPE,
4432 Sel->getName() + ".sroa.sel");
4433 Visited.erase(&GEPI);
4434 GEPI.replaceAllUsesWith(NSel);
4435 GEPI.eraseFromParent();
4436 Instruction *NSelI = cast<Instruction>(NSel);
4437 Visited.insert(NSelI);
4438 enqueueUsers(*NSelI);
4439
4440 LLVM_DEBUG(dbgs() << " to: " << *NTrue << "\n";
4441 dbgs() << " " << *NFalse << "\n";
4442 dbgs() << " " << *NSel << "\n";);
4443
4444 return true;
4445 }
4446
4447 // Unfold gep (phi ptr1, ptr2), idx
4448 // => phi ((gep ptr1, idx), (gep ptr2, idx))
4449 // and gep ptr, (phi idx1, idx2)
4450 // => phi ((gep ptr, idx1), (gep ptr, idx2))
4451 bool unfoldGEPPhi(GetElementPtrInst &GEPI) {
4452 // To prevent infinitely expanding recursive phis, bail if the GEP pointer
4453 // operand (looking through the phi if it is the phi we want to unfold) is
4454 // an instruction besides a static alloca.
4455 PHINode *Phi = dyn_cast<PHINode>(GEPI.getPointerOperand());
4456 auto IsInvalidPointerOperand = [](Value *V) {
4457 if (!isa<Instruction>(V))
4458 return false;
4459 if (auto *AI = dyn_cast<AllocaInst>(V))
4460 return !AI->isStaticAlloca();
4461 return true;
4462 };
4463 if (Phi) {
4464 if (any_of(Phi->operands(), IsInvalidPointerOperand))
4465 return false;
4466 } else {
4467 if (IsInvalidPointerOperand(GEPI.getPointerOperand()))
4468 return false;
4469 }
4470 // Check whether the GEP has exactly one phi operand (including the pointer
4471 // operand) and all indices will become constant after the transform.
4472 for (Value *Op : GEPI.indices()) {
4473 if (auto *SI = dyn_cast<PHINode>(Op)) {
4474 if (Phi)
4475 return false;
4476
4477 Phi = SI;
4478 if (!all_of(Phi->incoming_values(),
4479 [](Value *V) { return isa<ConstantInt>(V); }))
4480 return false;
4481 continue;
4482 }
4483
4484 if (!isa<ConstantInt>(Op))
4485 return false;
4486 }
4487
4488 if (!Phi)
4489 return false;
4490
4491 LLVM_DEBUG(dbgs() << " Rewriting gep(phi) -> phi(gep):\n";
4492 dbgs() << " original: " << *Phi << "\n";
4493 dbgs() << " " << GEPI << "\n";);
4494
4495 auto GetNewOps = [&](Value *PhiOp) {
4496 SmallVector<Value *> NewOps;
4497 for (Value *Op : GEPI.operands())
4498 if (Op == Phi)
4499 NewOps.push_back(PhiOp);
4500 else
4501 NewOps.push_back(Op);
4502 return NewOps;
4503 };
4504
4505 IRB.SetInsertPoint(Phi);
4506 PHINode *NewPhi = IRB.CreatePHI(GEPI.getType(), Phi->getNumIncomingValues(),
4507 Phi->getName() + ".sroa.phi");
4508
4509 Type *SourceTy = GEPI.getSourceElementType();
4510 // We only handle arguments, constants, and static allocas here, so we can
4511 // insert GEPs at the end of the entry block.
4512 IRB.SetInsertPoint(GEPI.getFunction()->getEntryBlock().getTerminator());
4513 for (unsigned I = 0, E = Phi->getNumIncomingValues(); I != E; ++I) {
4514 Value *Op = Phi->getIncomingValue(I);
4515 BasicBlock *BB = Phi->getIncomingBlock(I);
4516 Value *NewGEP;
4517 if (int NI = NewPhi->getBasicBlockIndex(BB); NI >= 0) {
4518 NewGEP = NewPhi->getIncomingValue(NI);
4519 } else {
4520 SmallVector<Value *> NewOps = GetNewOps(Op);
4521 NewGEP =
4522 IRB.CreateGEP(SourceTy, NewOps[0], ArrayRef(NewOps).drop_front(),
4523 Phi->getName() + ".sroa.gep", GEPI.getNoWrapFlags());
4524 }
4525 NewPhi->addIncoming(NewGEP, BB);
4526 }
4527
4528 Visited.erase(&GEPI);
4529 GEPI.replaceAllUsesWith(NewPhi);
4530 GEPI.eraseFromParent();
4531 Visited.insert(NewPhi);
4532 enqueueUsers(*NewPhi);
4533
4534 LLVM_DEBUG(dbgs() << " to: ";
4535 for (Value *In
4536 : NewPhi->incoming_values()) dbgs()
4537 << "\n " << *In;
4538 dbgs() << "\n " << *NewPhi << '\n');
4539
4540 return true;
4541 }
4542
4543 bool visitGetElementPtrInst(GetElementPtrInst &GEPI) {
4544 if (unfoldGEPSelect(GEPI))
4545 return true;
4546
4547 if (unfoldGEPPhi(GEPI))
4548 return true;
4549
4550 enqueueUsers(GEPI);
4551 return false;
4552 }
4553
4554 bool visitPHINode(PHINode &PN) {
4555 enqueueUsers(PN);
4556 return false;
4557 }
4558
4559 bool visitSelectInst(SelectInst &SI) {
4560 enqueueUsers(SI);
4561 return false;
4562 }
4563};
4564
4565} // end anonymous namespace
4566
4567/// Strip aggregate type wrapping.
4568///
4569/// This removes no-op aggregate types wrapping an underlying type. It will
4570/// strip as many layers of types as it can without changing either the type
4571/// size or the allocated size.
4573 if (Ty->isSingleValueType())
4574 return Ty;
4575
4576 uint64_t AllocSize = DL.getTypeAllocSize(Ty).getFixedValue();
4577 uint64_t TypeSize = DL.getTypeSizeInBits(Ty).getFixedValue();
4578
4579 Type *InnerTy;
4580 if (ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) {
4581 InnerTy = ArrTy->getElementType();
4582 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
4583 const StructLayout *SL = DL.getStructLayout(STy);
4584 unsigned Index = SL->getElementContainingOffset(0);
4585 InnerTy = STy->getElementType(Index);
4586 } else {
4587 return Ty;
4588 }
4589
4590 if (AllocSize > DL.getTypeAllocSize(InnerTy).getFixedValue() ||
4591 TypeSize > DL.getTypeSizeInBits(InnerTy).getFixedValue())
4592 return Ty;
4593
4594 return stripAggregateTypeWrapping(DL, InnerTy);
4595}
4596
4597/// Try to find a partition of the aggregate type passed in for a given
4598/// offset and size.
4599///
4600/// This recurses through the aggregate type and tries to compute a subtype
4601/// based on the offset and size. When the offset and size span a sub-section
4602/// of an array, it will even compute a new array type for that sub-section,
4603/// and the same for structs.
4604///
4605/// Note that this routine is very strict and tries to find a partition of the
4606/// type which produces the *exact* right offset and size. It is not forgiving
4607/// when the size or offset cause either end of type-based partition to be off.
4608/// Also, this is a best-effort routine. It is reasonable to give up and not
4609/// return a type if necessary.
4611 uint64_t Size) {
4612 if (Offset == 0 && DL.getTypeAllocSize(Ty).getFixedValue() == Size)
4613 return stripAggregateTypeWrapping(DL, Ty);
4614 if (Offset > DL.getTypeAllocSize(Ty).getFixedValue() ||
4615 (DL.getTypeAllocSize(Ty).getFixedValue() - Offset) < Size)
4616 return nullptr;
4617
4618 if (isa<ArrayType>(Ty) || isa<VectorType>(Ty)) {
4619 Type *ElementTy;
4620 uint64_t TyNumElements;
4621 if (auto *AT = dyn_cast<ArrayType>(Ty)) {
4622 ElementTy = AT->getElementType();
4623 TyNumElements = AT->getNumElements();
4624 } else {
4625 // FIXME: This isn't right for vectors with non-byte-sized or
4626 // non-power-of-two sized elements.
4627 auto *VT = cast<FixedVectorType>(Ty);
4628 ElementTy = VT->getElementType();
4629 TyNumElements = VT->getNumElements();
4630 }
4631 uint64_t ElementSize = DL.getTypeAllocSize(ElementTy).getFixedValue();
4632 uint64_t NumSkippedElements = Offset / ElementSize;
4633 if (NumSkippedElements >= TyNumElements)
4634 return nullptr;
4635 Offset -= NumSkippedElements * ElementSize;
4636
4637 // First check if we need to recurse.
4638 if (Offset > 0 || Size < ElementSize) {
4639 // Bail if the partition ends in a different array element.
4640 if ((Offset + Size) > ElementSize)
4641 return nullptr;
4642 // Recurse through the element type trying to peel off offset bytes.
4643 return getTypePartition(DL, ElementTy, Offset, Size);
4644 }
4645 assert(Offset == 0);
4646
4647 if (Size == ElementSize)
4648 return stripAggregateTypeWrapping(DL, ElementTy);
4649 assert(Size > ElementSize);
4650 uint64_t NumElements = Size / ElementSize;
4651 if (NumElements * ElementSize != Size)
4652 return nullptr;
4653 return ArrayType::get(ElementTy, NumElements);
4654 }
4655
4657 if (!STy)
4658 return nullptr;
4659
4660 const StructLayout *SL = DL.getStructLayout(STy);
4661
4662 if (SL->getSizeInBits().isScalable())
4663 return nullptr;
4664
4665 if (Offset >= SL->getSizeInBytes())
4666 return nullptr;
4667 uint64_t EndOffset = Offset + Size;
4668 if (EndOffset > SL->getSizeInBytes())
4669 return nullptr;
4670
4671 unsigned Index = SL->getElementContainingOffset(Offset);
4672 Offset -= SL->getElementOffset(Index);
4673
4674 Type *ElementTy = STy->getElementType(Index);
4675 uint64_t ElementSize = DL.getTypeAllocSize(ElementTy).getFixedValue();
4676 if (Offset >= ElementSize)
4677 return nullptr; // The offset points into alignment padding.
4678
4679 // See if any partition must be contained by the element.
4680 if (Offset > 0 || Size < ElementSize) {
4681 if ((Offset + Size) > ElementSize)
4682 return nullptr;
4683 return getTypePartition(DL, ElementTy, Offset, Size);
4684 }
4685 assert(Offset == 0);
4686
4687 if (Size == ElementSize)
4688 return stripAggregateTypeWrapping(DL, ElementTy);
4689
4690 StructType::element_iterator EI = STy->element_begin() + Index,
4691 EE = STy->element_end();
4692 if (EndOffset < SL->getSizeInBytes()) {
4693 unsigned EndIndex = SL->getElementContainingOffset(EndOffset);
4694 if (Index == EndIndex)
4695 return nullptr; // Within a single element and its padding.
4696
4697 // Don't try to form "natural" types if the elements don't line up with the
4698 // expected size.
4699 // FIXME: We could potentially recurse down through the last element in the
4700 // sub-struct to find a natural end point.
4701 if (SL->getElementOffset(EndIndex) != EndOffset)
4702 return nullptr;
4703
4704 assert(Index < EndIndex);
4705 EE = STy->element_begin() + EndIndex;
4706 }
4707
4708 // Try to build up a sub-structure.
4709 StructType *SubTy =
4710 StructType::get(STy->getContext(), ArrayRef(EI, EE), STy->isPacked());
4711 const StructLayout *SubSL = DL.getStructLayout(SubTy);
4712 if (Size != SubSL->getSizeInBytes())
4713 return nullptr; // The sub-struct doesn't have quite the size needed.
4714
4715 return SubTy;
4716}
4717
4718/// Pre-split loads and stores to simplify rewriting.
4719///
4720/// We want to break up the splittable load+store pairs as much as
4721/// possible. This is important to do as a preprocessing step, as once we
4722/// start rewriting the accesses to partitions of the alloca we lose the
4723/// necessary information to correctly split apart paired loads and stores
4724/// which both point into this alloca. The case to consider is something like
4725/// the following:
4726///
4727/// %a = alloca [12 x i8]
4728/// %gep1 = getelementptr i8, ptr %a, i32 0
4729/// %gep2 = getelementptr i8, ptr %a, i32 4
4730/// %gep3 = getelementptr i8, ptr %a, i32 8
4731/// store float 0.0, ptr %gep1
4732/// store float 1.0, ptr %gep2
4733/// %v = load i64, ptr %gep1
4734/// store i64 %v, ptr %gep2
4735/// %f1 = load float, ptr %gep2
4736/// %f2 = load float, ptr %gep3
4737///
4738/// Here we want to form 3 partitions of the alloca, each 4 bytes large, and
4739/// promote everything so we recover the 2 SSA values that should have been
4740/// there all along.
4741///
4742/// \returns true if any changes are made.
4743bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
4744 LLVM_DEBUG(dbgs() << "Pre-splitting loads and stores\n");
4745
4746 // Track the loads and stores which are candidates for pre-splitting here, in
4747 // the order they first appear during the partition scan. These give stable
4748 // iteration order and a basis for tracking which loads and stores we
4749 // actually split.
4752
4753 // We need to accumulate the splits required of each load or store where we
4754 // can find them via a direct lookup. This is important to cross-check loads
4755 // and stores against each other. We also track the slice so that we can kill
4756 // all the slices that end up split.
4757 struct SplitOffsets {
4758 Slice *S;
4759 std::vector<uint64_t> Splits;
4760 };
4761 SmallDenseMap<Instruction *, SplitOffsets, 8> SplitOffsetsMap;
4762
4763 // Track loads out of this alloca which cannot, for any reason, be pre-split.
4764 // This is important as we also cannot pre-split stores of those loads!
4765 // FIXME: This is all pretty gross. It means that we can be more aggressive
4766 // in pre-splitting when the load feeding the store happens to come from
4767 // a separate alloca. Put another way, the effectiveness of SROA would be
4768 // decreased by a frontend which just concatenated all of its local allocas
4769 // into one big flat alloca. But defeating such patterns is exactly the job
4770 // SROA is tasked with! Sadly, to not have this discrepancy we would have
4771 // change store pre-splitting to actually force pre-splitting of the load
4772 // that feeds it *and all stores*. That makes pre-splitting much harder, but
4773 // maybe it would make it more principled?
4774 SmallPtrSet<LoadInst *, 8> UnsplittableLoads;
4775
4776 LLVM_DEBUG(dbgs() << " Searching for candidate loads and stores\n");
4777 for (auto &P : AS.partitions()) {
4778 for (Slice &S : P) {
4779 Instruction *I = cast<Instruction>(S.getUse()->getUser());
4780 if (!S.isSplittable() || S.endOffset() <= P.endOffset()) {
4781 // If this is a load we have to track that it can't participate in any
4782 // pre-splitting. If this is a store of a load we have to track that
4783 // that load also can't participate in any pre-splitting.
4784 if (auto *LI = dyn_cast<LoadInst>(I))
4785 UnsplittableLoads.insert(LI);
4786 else if (auto *SI = dyn_cast<StoreInst>(I))
4787 if (auto *LI = dyn_cast<LoadInst>(SI->getValueOperand()))
4788 UnsplittableLoads.insert(LI);
4789 continue;
4790 }
4791 assert(P.endOffset() > S.beginOffset() &&
4792 "Empty or backwards partition!");
4793
4794 // Determine if this is a pre-splittable slice.
4795 if (auto *LI = dyn_cast<LoadInst>(I)) {
4796 assert(!LI->isVolatile() && "Cannot split volatile loads!");
4797
4798 // The load must be used exclusively to store into other pointers for
4799 // us to be able to arbitrarily pre-split it. The stores must also be
4800 // simple to avoid changing semantics.
4801 auto IsLoadSimplyStored = [](LoadInst *LI) {
4802 for (User *LU : LI->users()) {
4803 auto *SI = dyn_cast<StoreInst>(LU);
4804 if (!SI || !SI->isSimple())
4805 return false;
4806 }
4807 return true;
4808 };
4809 if (!IsLoadSimplyStored(LI)) {
4810 UnsplittableLoads.insert(LI);
4811 continue;
4812 }
4813
4814 Loads.push_back(LI);
4815 } else if (auto *SI = dyn_cast<StoreInst>(I)) {
4816 if (S.getUse() != &SI->getOperandUse(SI->getPointerOperandIndex()))
4817 // Skip stores *of* pointers. FIXME: This shouldn't even be possible!
4818 continue;
4819 auto *StoredLoad = dyn_cast<LoadInst>(SI->getValueOperand());
4820 if (!StoredLoad || !StoredLoad->isSimple())
4821 continue;
4822 assert(!SI->isVolatile() && "Cannot split volatile stores!");
4823
4824 Stores.push_back(SI);
4825 } else {
4826 // Other uses cannot be pre-split.
4827 continue;
4828 }
4829
4830 // Record the initial split.
4831 LLVM_DEBUG(dbgs() << " Candidate: " << *I << "\n");
4832 auto &Offsets = SplitOffsetsMap[I];
4833 assert(Offsets.Splits.empty() &&
4834 "Should not have splits the first time we see an instruction!");
4835 Offsets.S = &S;
4836 Offsets.Splits.push_back(P.endOffset() - S.beginOffset());
4837 }
4838
4839 // Now scan the already split slices, and add a split for any of them which
4840 // we're going to pre-split.
4841 for (Slice *S : P.splitSliceTails()) {
4842 auto SplitOffsetsMapI =
4843 SplitOffsetsMap.find(cast<Instruction>(S->getUse()->getUser()));
4844 if (SplitOffsetsMapI == SplitOffsetsMap.end())
4845 continue;
4846 auto &Offsets = SplitOffsetsMapI->second;
4847
4848 assert(Offsets.S == S && "Found a mismatched slice!");
4849 assert(!Offsets.Splits.empty() &&
4850 "Cannot have an empty set of splits on the second partition!");
4851 assert(Offsets.Splits.back() ==
4852 P.beginOffset() - Offsets.S->beginOffset() &&
4853 "Previous split does not end where this one begins!");
4854
4855 // Record each split. The last partition's end isn't needed as the size
4856 // of the slice dictates that.
4857 if (S->endOffset() > P.endOffset())
4858 Offsets.Splits.push_back(P.endOffset() - Offsets.S->beginOffset());
4859 }
4860 }
4861
4862 // We may have split loads where some of their stores are split stores. For
4863 // such loads and stores, we can only pre-split them if their splits exactly
4864 // match relative to their starting offset. We have to verify this prior to
4865 // any rewriting.
4866 llvm::erase_if(Stores, [&UnsplittableLoads, &SplitOffsetsMap](StoreInst *SI) {
4867 // Lookup the load we are storing in our map of split
4868 // offsets.
4869 auto *LI = cast<LoadInst>(SI->getValueOperand());
4870 // If it was completely unsplittable, then we're done,
4871 // and this store can't be pre-split.
4872 if (UnsplittableLoads.count(LI))
4873 return true;
4874
4875 auto LoadOffsetsI = SplitOffsetsMap.find(LI);
4876 if (LoadOffsetsI == SplitOffsetsMap.end())
4877 return false; // Unrelated loads are definitely safe.
4878 auto &LoadOffsets = LoadOffsetsI->second;
4879
4880 // Now lookup the store's offsets.
4881 auto &StoreOffsets = SplitOffsetsMap[SI];
4882
4883 // If the relative offsets of each split in the load and
4884 // store match exactly, then we can split them and we
4885 // don't need to remove them here.
4886 if (LoadOffsets.Splits == StoreOffsets.Splits)
4887 return false;
4888
4889 LLVM_DEBUG(dbgs() << " Mismatched splits for load and store:\n"
4890 << " " << *LI << "\n"
4891 << " " << *SI << "\n");
4892
4893 // We've found a store and load that we need to split
4894 // with mismatched relative splits. Just give up on them
4895 // and remove both instructions from our list of
4896 // candidates.
4897 UnsplittableLoads.insert(LI);
4898 return true;
4899 });
4900 // Now we have to go *back* through all the stores, because a later store may
4901 // have caused an earlier store's load to become unsplittable and if it is
4902 // unsplittable for the later store, then we can't rely on it being split in
4903 // the earlier store either.
4904 llvm::erase_if(Stores, [&UnsplittableLoads](StoreInst *SI) {
4905 auto *LI = cast<LoadInst>(SI->getValueOperand());
4906 return UnsplittableLoads.count(LI);
4907 });
4908 // Once we've established all the loads that can't be split for some reason,
4909 // filter any that made it into our list out.
4910 llvm::erase_if(Loads, [&UnsplittableLoads](LoadInst *LI) {
4911 return UnsplittableLoads.count(LI);
4912 });
4913
4914 // If no loads or stores are left, there is no pre-splitting to be done for
4915 // this alloca.
4916 if (Loads.empty() && Stores.empty())
4917 return false;
4918
4919 // From here on, we can't fail and will be building new accesses, so rig up
4920 // an IR builder.
4921 IRBuilderTy IRB(&AI);
4922
4923 // Collect the new slices which we will merge into the alloca slices.
4924 SmallVector<Slice, 4> NewSlices;
4925
4926 // Track any allocas we end up splitting loads and stores for so we iterate
4927 // on them.
4928 SmallPtrSet<AllocaInst *, 4> ResplitPromotableAllocas;
4929
4930 // At this point, we have collected all of the loads and stores we can
4931 // pre-split, and the specific splits needed for them. We actually do the
4932 // splitting in a specific order in order to handle when one of the loads in
4933 // the value operand to one of the stores.
4934 //
4935 // First, we rewrite all of the split loads, and just accumulate each split
4936 // load in a parallel structure. We also build the slices for them and append
4937 // them to the alloca slices.
4938 SmallDenseMap<LoadInst *, std::vector<LoadInst *>, 1> SplitLoadsMap;
4939 std::vector<LoadInst *> SplitLoads;
4940 const DataLayout &DL = AI.getDataLayout();
4941 for (LoadInst *LI : Loads) {
4942 SplitLoads.clear();
4943
4944 auto &Offsets = SplitOffsetsMap[LI];
4945 unsigned SliceSize = Offsets.S->endOffset() - Offsets.S->beginOffset();
4946 assert(LI->getType()->getIntegerBitWidth() % 8 == 0 &&
4947 "Load must have type size equal to store size");
4948 assert(LI->getType()->getIntegerBitWidth() / 8 >= SliceSize &&
4949 "Load must be >= slice size");
4950
4951 uint64_t BaseOffset = Offsets.S->beginOffset();
4952 assert(BaseOffset + SliceSize > BaseOffset &&
4953 "Cannot represent alloca access size using 64-bit integers!");
4954
4956 IRB.SetInsertPoint(LI);
4957
4958 LLVM_DEBUG(dbgs() << " Splitting load: " << *LI << "\n");
4959
4960 uint64_t PartOffset = 0, PartSize = Offsets.Splits.front();
4961 int Idx = 0, Size = Offsets.Splits.size();
4962 for (;;) {
4963 auto *PartTy = Type::getIntNTy(LI->getContext(), PartSize * 8);
4964 auto AS = LI->getPointerAddressSpace();
4965 auto *PartPtrTy = LI->getPointerOperandType();
4966 LoadInst *PLoad = IRB.CreateAlignedLoad(
4967 PartTy,
4968 getAdjustedPtr(IRB, DL, BasePtr,
4969 APInt(DL.getIndexSizeInBits(AS), PartOffset),
4970 PartPtrTy, BasePtr->getName() + "."),
4971 getAdjustedAlignment(LI, PartOffset),
4972 /*IsVolatile*/ false, LI->getName());
4973 PLoad->copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
4974 LLVMContext::MD_access_group});
4975
4976 // Append this load onto the list of split loads so we can find it later
4977 // to rewrite the stores.
4978 SplitLoads.push_back(PLoad);
4979
4980 // Now build a new slice for the alloca.
4981 NewSlices.push_back(
4982 Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
4983 &PLoad->getOperandUse(PLoad->getPointerOperandIndex()),
4984 /*IsSplittable*/ false, nullptr));
4985 LLVM_DEBUG(dbgs() << " new slice [" << NewSlices.back().beginOffset()
4986 << ", " << NewSlices.back().endOffset()
4987 << "): " << *PLoad << "\n");
4988
4989 // See if we've handled all the splits.
4990 if (Idx >= Size)
4991 break;
4992
4993 // Setup the next partition.
4994 PartOffset = Offsets.Splits[Idx];
4995 ++Idx;
4996 PartSize = (Idx < Size ? Offsets.Splits[Idx] : SliceSize) - PartOffset;
4997 }
4998
4999 // Now that we have the split loads, do the slow walk over all uses of the
5000 // load and rewrite them as split stores, or save the split loads to use
5001 // below if the store is going to be split there anyways.
5002 bool DeferredStores = false;
5003 for (User *LU : LI->users()) {
5004 StoreInst *SI = cast<StoreInst>(LU);
5005 if (!Stores.empty() && SplitOffsetsMap.count(SI)) {
5006 DeferredStores = true;
5007 LLVM_DEBUG(dbgs() << " Deferred splitting of store: " << *SI
5008 << "\n");
5009 continue;
5010 }
5011
5012 Value *StoreBasePtr = SI->getPointerOperand();
5013 IRB.SetInsertPoint(SI);
5014 AAMDNodes AATags = SI->getAAMetadata();
5015
5016 LLVM_DEBUG(dbgs() << " Splitting store of load: " << *SI << "\n");
5017
5018 for (int Idx = 0, Size = SplitLoads.size(); Idx < Size; ++Idx) {
5019 LoadInst *PLoad = SplitLoads[Idx];
5020 uint64_t PartOffset = Idx == 0 ? 0 : Offsets.Splits[Idx - 1];
5021 auto *PartPtrTy = SI->getPointerOperandType();
5022
5023 auto AS = SI->getPointerAddressSpace();
5024 StoreInst *PStore = IRB.CreateAlignedStore(
5025 PLoad,
5026 getAdjustedPtr(IRB, DL, StoreBasePtr,
5027 APInt(DL.getIndexSizeInBits(AS), PartOffset),
5028 PartPtrTy, StoreBasePtr->getName() + "."),
5029 getAdjustedAlignment(SI, PartOffset),
5030 /*IsVolatile*/ false);
5031 PStore->copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access,
5032 LLVMContext::MD_access_group,
5033 LLVMContext::MD_DIAssignID});
5034
5035 if (AATags)
5036 PStore->setAAMetadata(
5037 AATags.adjustForAccess(PartOffset, PLoad->getType(), DL));
5038 LLVM_DEBUG(dbgs() << " +" << PartOffset << ":" << *PStore << "\n");
5039 }
5040
5041 // We want to immediately iterate on any allocas impacted by splitting
5042 // this store, and we have to track any promotable alloca (indicated by
5043 // a direct store) as needing to be resplit because it is no longer
5044 // promotable.
5045 if (AllocaInst *OtherAI = dyn_cast<AllocaInst>(StoreBasePtr)) {
5046 ResplitPromotableAllocas.insert(OtherAI);
5047 Worklist.insert(OtherAI);
5048 } else if (AllocaInst *OtherAI = dyn_cast<AllocaInst>(
5049 StoreBasePtr->stripInBoundsOffsets())) {
5050 Worklist.insert(OtherAI);
5051 }
5052
5053 // Mark the original store as dead.
5054 DeadInsts.push_back(SI);
5055 }
5056
5057 // Save the split loads if there are deferred stores among the users.
5058 if (DeferredStores)
5059 SplitLoadsMap.insert(std::make_pair(LI, std::move(SplitLoads)));
5060
5061 // Mark the original load as dead and kill the original slice.
5062 DeadInsts.push_back(LI);
5063 Offsets.S->kill();
5064 }
5065
5066 // Second, we rewrite all of the split stores. At this point, we know that
5067 // all loads from this alloca have been split already. For stores of such
5068 // loads, we can simply look up the pre-existing split loads. For stores of
5069 // other loads, we split those loads first and then write split stores of
5070 // them.
5071 for (StoreInst *SI : Stores) {
5072 auto *LI = cast<LoadInst>(SI->getValueOperand());
5073 IntegerType *Ty = cast<IntegerType>(LI->getType());
5074 assert(Ty->getBitWidth() % 8 == 0);
5075 uint64_t StoreSize = Ty->getBitWidth() / 8;
5076 assert(StoreSize > 0 && "Cannot have a zero-sized integer store!");
5077
5078 auto &Offsets = SplitOffsetsMap[SI];
5079 assert(StoreSize == Offsets.S->endOffset() - Offsets.S->beginOffset() &&
5080 "Slice size should always match load size exactly!");
5081 uint64_t BaseOffset = Offsets.S->beginOffset();
5082 assert(BaseOffset + StoreSize > BaseOffset &&
5083 "Cannot represent alloca access size using 64-bit integers!");
5084
5085 Value *LoadBasePtr = LI->getPointerOperand();
5086 Instruction *StoreBasePtr = cast<Instruction>(SI->getPointerOperand());
5087
5088 LLVM_DEBUG(dbgs() << " Splitting store: " << *SI << "\n");
5089
5090 // Check whether we have an already split load.
5091 auto SplitLoadsMapI = SplitLoadsMap.find(LI);
5092 std::vector<LoadInst *> *SplitLoads = nullptr;
5093 if (SplitLoadsMapI != SplitLoadsMap.end()) {
5094 SplitLoads = &SplitLoadsMapI->second;
5095 assert(SplitLoads->size() == Offsets.Splits.size() + 1 &&
5096 "Too few split loads for the number of splits in the store!");
5097 } else {
5098 LLVM_DEBUG(dbgs() << " of load: " << *LI << "\n");
5099 }
5100
5101 uint64_t PartOffset = 0, PartSize = Offsets.Splits.front();
5102 int Idx = 0, Size = Offsets.Splits.size();
5103 for (;;) {
5104 auto *PartTy = Type::getIntNTy(Ty->getContext(), PartSize * 8);
5105 auto *LoadPartPtrTy = LI->getPointerOperandType();
5106 auto *StorePartPtrTy = SI->getPointerOperandType();
5107
5108 // Either lookup a split load or create one.
5109 LoadInst *PLoad;
5110 if (SplitLoads) {
5111 PLoad = (*SplitLoads)[Idx];
5112 } else {
5113 IRB.SetInsertPoint(LI);
5114 auto AS = LI->getPointerAddressSpace();
5115 PLoad = IRB.CreateAlignedLoad(
5116 PartTy,
5117 getAdjustedPtr(IRB, DL, LoadBasePtr,
5118 APInt(DL.getIndexSizeInBits(AS), PartOffset),
5119 LoadPartPtrTy, LoadBasePtr->getName() + "."),
5120 getAdjustedAlignment(LI, PartOffset),
5121 /*IsVolatile*/ false, LI->getName());
5122 PLoad->copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
5123 LLVMContext::MD_access_group});
5124 }
5125
5126 // And store this partition.
5127 IRB.SetInsertPoint(SI);
5128 auto AS = SI->getPointerAddressSpace();
5129 StoreInst *PStore = IRB.CreateAlignedStore(
5130 PLoad,
5131 getAdjustedPtr(IRB, DL, StoreBasePtr,
5132 APInt(DL.getIndexSizeInBits(AS), PartOffset),
5133 StorePartPtrTy, StoreBasePtr->getName() + "."),
5134 getAdjustedAlignment(SI, PartOffset),
5135 /*IsVolatile*/ false);
5136 PStore->copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access,
5137 LLVMContext::MD_access_group});
5138
5139 // Now build a new slice for the alloca.
5140 // ProtectedFieldDisc==nullptr is a lie, but it doesn't matter because we
5141 // already determined that all accesses are consistent.
5142 NewSlices.push_back(
5143 Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
5144 &PStore->getOperandUse(PStore->getPointerOperandIndex()),
5145 /*IsSplittable*/ false, nullptr));
5146 LLVM_DEBUG(dbgs() << " new slice [" << NewSlices.back().beginOffset()
5147 << ", " << NewSlices.back().endOffset()
5148 << "): " << *PStore << "\n");
5149 if (!SplitLoads) {
5150 LLVM_DEBUG(dbgs() << " of split load: " << *PLoad << "\n");
5151 }
5152
5153 // See if we've finished all the splits.
5154 if (Idx >= Size)
5155 break;
5156
5157 // Setup the next partition.
5158 PartOffset = Offsets.Splits[Idx];
5159 ++Idx;
5160 PartSize = (Idx < Size ? Offsets.Splits[Idx] : StoreSize) - PartOffset;
5161 }
5162
5163 // We want to immediately iterate on any allocas impacted by splitting
5164 // this load, which is only relevant if it isn't a load of this alloca and
5165 // thus we didn't already split the loads above. We also have to keep track
5166 // of any promotable allocas we split loads on as they can no longer be
5167 // promoted.
5168 if (!SplitLoads) {
5169 if (AllocaInst *OtherAI = dyn_cast<AllocaInst>(LoadBasePtr)) {
5170 assert(OtherAI != &AI && "We can't re-split our own alloca!");
5171 ResplitPromotableAllocas.insert(OtherAI);
5172 Worklist.insert(OtherAI);
5173 } else if (AllocaInst *OtherAI = dyn_cast<AllocaInst>(
5174 LoadBasePtr->stripInBoundsOffsets())) {
5175 assert(OtherAI != &AI && "We can't re-split our own alloca!");
5176 Worklist.insert(OtherAI);
5177 }
5178 }
5179
5180 // Mark the original store as dead now that we've split it up and kill its
5181 // slice. Note that we leave the original load in place unless this store
5182 // was its only use. It may in turn be split up if it is an alloca load
5183 // for some other alloca, but it may be a normal load. This may introduce
5184 // redundant loads, but where those can be merged the rest of the optimizer
5185 // should handle the merging, and this uncovers SSA splits which is more
5186 // important. In practice, the original loads will almost always be fully
5187 // split and removed eventually, and the splits will be merged by any
5188 // trivial CSE, including instcombine.
5189 if (LI->hasOneUse()) {
5190 assert(*LI->user_begin() == SI && "Single use isn't this store!");
5191 DeadInsts.push_back(LI);
5192 }
5193 DeadInsts.push_back(SI);
5194 Offsets.S->kill();
5195 }
5196
5197 // Remove the killed slices that have ben pre-split.
5198 llvm::erase_if(AS, [](const Slice &S) { return S.isDead(); });
5199
5200 // Insert our new slices. This will sort and merge them into the sorted
5201 // sequence.
5202 AS.insert(NewSlices);
5203
5204 LLVM_DEBUG(dbgs() << " Pre-split slices:\n");
5205#ifndef NDEBUG
5206 for (auto I = AS.begin(), E = AS.end(); I != E; ++I)
5207 LLVM_DEBUG(AS.print(dbgs(), I, " "));
5208#endif
5209
5210 // Finally, don't try to promote any allocas that new require re-splitting.
5211 // They have already been added to the worklist above.
5212 PromotableAllocas.set_subtract(ResplitPromotableAllocas);
5213
5214 return true;
5215}
5216
5217/// Select a partition type for an alloca partition.
5218///
5219/// Try to compute a friendly type for this partition of the alloca. This
5220/// won't always succeed, in which case we fall back to a legal integer type
5221/// or an i8 array of an appropriate size.
5222///
5223/// \returns A tuple with the following elements:
5224/// - PartitionType: The computed type for this partition.
5225/// - IsIntegerWideningViable: True if integer widening promotion is used.
5226/// - VectorType: The vector type if vector promotion is used, otherwise
5227/// nullptr.
5228static std::tuple<Type *, bool, VectorType *>
5230 LLVMContext &C) {
5231 // First check if the partition is viable for vector promotion.
5232 //
5233 // We prefer vector promotion over integer widening promotion when:
5234 // - The vector element type is a floating-point type.
5235 // - All the loads/stores to the alloca are vector loads/stores to the
5236 // entire alloca or load/store a single element of the vector.
5237 //
5238 // Otherwise when there is an integer vector with mixed type loads/stores we
5239 // prefer integer widening promotion because it's more likely the user is
5240 // doing bitwise arithmetic and we generate better code.
5241 VectorType *VecTy =
5243 // If the vector element type is a floating-point type, we prefer vector
5244 // promotion. If the vector has one element, let the below code select
5245 // whether we promote with the vector or scalar.
5246 if (VecTy && VecTy->getElementType()->isFloatingPointTy() &&
5247 VecTy->getElementCount().getFixedValue() > 1)
5248 return {VecTy, false, VecTy};
5249
5250 // Check if there is a common type that all slices of the partition use that
5251 // spans the partition.
5252 auto [CommonUseTy, LargestIntTy] =
5253 findCommonType(P.begin(), P.end(), P.endOffset());
5254 if (CommonUseTy) {
5255 TypeSize CommonUseSize = DL.getTypeAllocSize(CommonUseTy);
5256 if (CommonUseSize.isFixed() && CommonUseSize.getFixedValue() >= P.size()) {
5257 // We prefer vector promotion here because if vector promotion is viable
5258 // and there is a common type used, then it implies the second listed
5259 // condition for preferring vector promotion is true.
5260 if (VecTy)
5261 return {VecTy, false, VecTy};
5262 return {CommonUseTy, isIntegerWideningViable(P, CommonUseTy, DL),
5263 nullptr};
5264 }
5265 }
5266
5267 // Can we find an appropriate subtype in the original allocated
5268 // type?
5269 if (Type *TypePartitionTy = getTypePartition(DL, AI.getAllocatedType(),
5270 P.beginOffset(), P.size())) {
5271 // If the partition is an integer array that can be spanned by a legal
5272 // integer type, prefer to represent it as a legal integer type because
5273 // it's more likely to be promotable.
5274 if (TypePartitionTy->isArrayTy() &&
5275 TypePartitionTy->getArrayElementType()->isIntegerTy() &&
5276 DL.isLegalInteger(P.size() * 8))
5277 TypePartitionTy = Type::getIntNTy(C, P.size() * 8);
5278 // There was no common type used, so we prefer integer widening promotion.
5279 if (isIntegerWideningViable(P, TypePartitionTy, DL))
5280 return {TypePartitionTy, true, nullptr};
5281 if (VecTy)
5282 return {VecTy, false, VecTy};
5283 // If we couldn't promote with TypePartitionTy, try with the largest
5284 // integer type used.
5285 if (LargestIntTy &&
5286 DL.getTypeAllocSize(LargestIntTy).getFixedValue() >= P.size() &&
5287 isIntegerWideningViable(P, LargestIntTy, DL))
5288 return {LargestIntTy, true, nullptr};
5289
5290 // Fallback to TypePartitionTy and we probably won't promote.
5291 return {TypePartitionTy, false, nullptr};
5292 }
5293
5294 // Select the largest integer type used if it spans the partition.
5295 if (LargestIntTy &&
5296 DL.getTypeAllocSize(LargestIntTy).getFixedValue() >= P.size())
5297 return {LargestIntTy, false, nullptr};
5298
5299 // Select a legal integer type if it spans the partition.
5300 if (DL.isLegalInteger(P.size() * 8))
5301 return {Type::getIntNTy(C, P.size() * 8), false, nullptr};
5302
5303 // Fallback to an i8 array.
5304 return {ArrayType::get(Type::getInt8Ty(C), P.size()), false, nullptr};
5305}
5306
5307/// Rewrite an alloca partition's users.
5308///
5309/// This routine drives both of the rewriting goals of the SROA pass. It tries
5310/// to rewrite uses of an alloca partition to be conducive for SSA value
5311/// promotion. If the partition needs a new, more refined alloca, this will
5312/// build that new alloca, preserving as much type information as possible, and
5313/// rewrite the uses of the old alloca to point at the new one and have the
5314/// appropriate new offsets. It also evaluates how successful the rewrite was
5315/// at enabling promotion and if it was successful queues the alloca to be
5316/// promoted.
5317std::pair<AllocaInst *, uint64_t>
5318SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS, Partition &P) {
5319 const DataLayout &DL = AI.getDataLayout();
5320 // Select the type for the new alloca that spans the partition.
5321 auto [PartitionTy, IsIntegerWideningViable, VecTy] =
5322 selectPartitionType(P, DL, AI, *C);
5323
5324 // Check for the case where we're going to rewrite to a new alloca of the
5325 // exact same type as the original, and with the same access offsets. In that
5326 // case, re-use the existing alloca, but still run through the rewriter to
5327 // perform phi and select speculation.
5328 // P.beginOffset() can be non-zero even with the same type in a case with
5329 // out-of-bounds access (e.g. @PR35657 function in SROA/basictest.ll).
5330 AllocaInst *NewAI;
5331 if (PartitionTy == AI.getAllocatedType() && P.beginOffset() == 0) {
5332 NewAI = &AI;
5333 // FIXME: We should be able to bail at this point with "nothing changed".
5334 // FIXME: We might want to defer PHI speculation until after here.
5335 // FIXME: return nullptr;
5336 } else {
5337 // Make sure the alignment is compatible with P.beginOffset().
5338 const Align Alignment = commonAlignment(AI.getAlign(), P.beginOffset());
5339 // If we will get at least this much alignment from the type alone, leave
5340 // the alloca's alignment unconstrained.
5341 const bool IsUnconstrained = Alignment <= DL.getABITypeAlign(PartitionTy);
5342 NewAI = new AllocaInst(
5343 PartitionTy, AI.getAddressSpace(), nullptr,
5344 IsUnconstrained ? DL.getPrefTypeAlign(PartitionTy) : Alignment,
5345 AI.getName() + ".sroa." + Twine(P.begin() - AS.begin()),
5346 AI.getIterator());
5347 // Copy the old AI debug location over to the new one.
5348 NewAI->setDebugLoc(AI.getDebugLoc());
5349 ++NumNewAllocas;
5350 }
5351
5352 LLVM_DEBUG(dbgs() << "Rewriting alloca partition " << "[" << P.beginOffset()
5353 << "," << P.endOffset() << ") to: " << *NewAI << "\n");
5354
5355 // Track the high watermark on the worklist as it is only relevant for
5356 // promoted allocas. We will reset it to this point if the alloca is not in
5357 // fact scheduled for promotion.
5358 unsigned PPWOldSize = PostPromotionWorklist.size();
5359 unsigned NumUses = 0;
5360 SmallSetVector<PHINode *, 8> PHIUsers;
5361 SmallSetVector<SelectInst *, 8> SelectUsers;
5362
5363 AllocaSliceRewriter Rewriter(
5364 DL, AS, *this, AI, *NewAI, PartitionTy, P.beginOffset(), P.endOffset(),
5365 IsIntegerWideningViable, VecTy, PHIUsers, SelectUsers);
5366 bool Promotable = true;
5367 // Check whether we can have tree-structured merge.
5368 if (auto DeletedValues = Rewriter.rewriteTreeStructuredMerge(P)) {
5369 NumUses += DeletedValues->size() + 1;
5370 for (Value *V : *DeletedValues)
5371 DeadInsts.push_back(V);
5372 } else {
5373 for (Slice *S : P.splitSliceTails()) {
5374 Promotable &= Rewriter.visit(S);
5375 ++NumUses;
5376 }
5377 for (Slice &S : P) {
5378 Promotable &= Rewriter.visit(&S);
5379 ++NumUses;
5380 }
5381 }
5382
5383 NumAllocaPartitionUses += NumUses;
5384 MaxUsesPerAllocaPartition.updateMax(NumUses);
5385
5386 // Now that we've processed all the slices in the new partition, check if any
5387 // PHIs or Selects would block promotion.
5388 for (PHINode *PHI : PHIUsers)
5389 if (!isSafePHIToSpeculate(*PHI)) {
5390 Promotable = false;
5391 PHIUsers.clear();
5392 SelectUsers.clear();
5393 break;
5394 }
5395
5397 NewSelectsToRewrite;
5398 NewSelectsToRewrite.reserve(SelectUsers.size());
5399 for (SelectInst *Sel : SelectUsers) {
5400 std::optional<RewriteableMemOps> Ops =
5401 isSafeSelectToSpeculate(*Sel, PreserveCFG);
5402 if (!Ops) {
5403 Promotable = false;
5404 PHIUsers.clear();
5405 SelectUsers.clear();
5406 NewSelectsToRewrite.clear();
5407 break;
5408 }
5409 NewSelectsToRewrite.emplace_back(std::make_pair(Sel, *Ops));
5410 }
5411
5412 if (Promotable) {
5413 for (Use *U : AS.getDeadUsesIfPromotable()) {
5414 auto *OldInst = dyn_cast<Instruction>(U->get());
5415 Value::dropDroppableUse(*U);
5416 if (OldInst)
5417 if (isInstructionTriviallyDead(OldInst))
5418 DeadInsts.push_back(OldInst);
5419 }
5420 if (PHIUsers.empty() && SelectUsers.empty()) {
5421 // Promote the alloca.
5422 PromotableAllocas.insert(NewAI);
5423 } else {
5424 // If we have either PHIs or Selects to speculate, add them to those
5425 // worklists and re-queue the new alloca so that we promote in on the
5426 // next iteration.
5427 SpeculatablePHIs.insert_range(PHIUsers);
5428 SelectsToRewrite.reserve(SelectsToRewrite.size() +
5429 NewSelectsToRewrite.size());
5430 for (auto &&KV : llvm::make_range(
5431 std::make_move_iterator(NewSelectsToRewrite.begin()),
5432 std::make_move_iterator(NewSelectsToRewrite.end())))
5433 SelectsToRewrite.insert(std::move(KV));
5434 Worklist.insert(NewAI);
5435 }
5436 } else {
5437 // Drop any post-promotion work items if promotion didn't happen.
5438 while (PostPromotionWorklist.size() > PPWOldSize)
5439 PostPromotionWorklist.pop_back();
5440
5441 // We couldn't promote and we didn't create a new partition, nothing
5442 // happened.
5443 if (NewAI == &AI)
5444 return {nullptr, 0};
5445
5446 // If we can't promote the alloca, iterate on it to check for new
5447 // refinements exposed by splitting the current alloca. Don't iterate on an
5448 // alloca which didn't actually change and didn't get promoted.
5449 Worklist.insert(NewAI);
5450 }
5451
5452 return {NewAI, DL.getTypeSizeInBits(PartitionTy).getFixedValue()};
5453}
5454
5455// There isn't a shared interface to get the "address" parts out of a
5456// dbg.declare and dbg.assign, so provide some wrappers.
5459 return DVR->isKillAddress();
5460 return DVR->isKillLocation();
5461}
5462
5465 return DVR->getAddressExpression();
5466 return DVR->getExpression();
5467}
5468
5469/// Create or replace an existing fragment in a DIExpression with \p Frag.
5470/// If the expression already contains a DW_OP_LLVM_extract_bits_[sz]ext
5471/// operation, add \p BitExtractOffset to the offset part.
5472///
5473/// Returns the new expression, or nullptr if this fails (see details below).
5474///
5475/// This function is similar to DIExpression::createFragmentExpression except
5476/// for 3 important distinctions:
5477/// 1. The new fragment isn't relative to an existing fragment.
5478/// 2. It assumes the computed location is a memory location. This means we
5479/// don't need to perform checks that creating the fragment preserves the
5480/// expression semantics.
5481/// 3. Existing extract_bits are modified independently of fragment changes
5482/// using \p BitExtractOffset. A change to the fragment offset or size
5483/// may affect a bit extract. But a bit extract offset can change
5484/// independently of the fragment dimensions.
5485///
5486/// Returns the new expression, or nullptr if one couldn't be created.
5487/// Ideally this is only used to signal that a bit-extract has become
5488/// zero-sized (and thus the new debug record has no size and can be
5489/// dropped), however, it fails for other reasons too - see the FIXME below.
5490///
5491/// FIXME: To keep the change that introduces this function NFC it bails
5492/// in some situations unecessarily, e.g. when fragment and bit extract
5493/// sizes differ.
5496 int64_t BitExtractOffset) {
5498 bool HasFragment = false;
5499 bool HasBitExtract = false;
5500
5501 for (auto &Op : Expr->expr_ops()) {
5502 if (Op.getOp() == dwarf::DW_OP_LLVM_fragment) {
5503 HasFragment = true;
5504 continue;
5505 }
5506 if (Op.getOp() == dwarf::DW_OP_LLVM_extract_bits_zext ||
5508 HasBitExtract = true;
5509 int64_t ExtractOffsetInBits = Op.getArg(0);
5510 int64_t ExtractSizeInBits = Op.getArg(1);
5511
5512 // DIExpression::createFragmentExpression doesn't know how to handle
5513 // a fragment that is smaller than the extract. Copy the behaviour
5514 // (bail) to avoid non-NFC changes.
5515 // FIXME: Don't do this.
5516 if (Frag.SizeInBits < uint64_t(ExtractSizeInBits))
5517 return nullptr;
5518
5519 assert(BitExtractOffset <= 0);
5520 int64_t AdjustedOffset = ExtractOffsetInBits + BitExtractOffset;
5521
5522 // DIExpression::createFragmentExpression doesn't know what to do
5523 // if the new extract starts "outside" the existing one. Copy the
5524 // behaviour (bail) to avoid non-NFC changes.
5525 // FIXME: Don't do this.
5526 if (AdjustedOffset < 0)
5527 return nullptr;
5528
5529 Ops.push_back(Op.getOp());
5530 Ops.push_back(std::max<int64_t>(0, AdjustedOffset));
5531 Ops.push_back(ExtractSizeInBits);
5532 continue;
5533 }
5534 Op.appendToVector(Ops);
5535 }
5536
5537 // Unsupported by createFragmentExpression, so don't support it here yet to
5538 // preserve NFC-ness.
5539 if (HasFragment && HasBitExtract)
5540 return nullptr;
5541
5542 if (!HasBitExtract) {
5544 Ops.push_back(Frag.OffsetInBits);
5545 Ops.push_back(Frag.SizeInBits);
5546 }
5547 return DIExpression::get(Expr->getContext(), Ops);
5548}
5549
5550/// Insert a new DbgRecord.
5551/// \p Orig Original to copy record type, debug loc and variable from, and
5552/// additionally value and value expression for dbg_assign records.
5553/// \p NewAddr Location's new base address.
5554/// \p NewAddrExpr New expression to apply to address.
5555/// \p BeforeInst Insert position.
5556/// \p NewFragment New fragment (absolute, non-relative).
5557/// \p BitExtractAdjustment Offset to apply to any extract_bits op.
5558static void
5560 DIExpression *NewAddrExpr, Instruction *BeforeInst,
5561 std::optional<DIExpression::FragmentInfo> NewFragment,
5562 int64_t BitExtractAdjustment) {
5563 (void)DIB;
5564
5565 // A dbg_assign puts fragment info in the value expression only. The address
5566 // expression has already been built: NewAddrExpr. A dbg_declare puts the
5567 // new fragment info into NewAddrExpr (as it only has one expression).
5568 DIExpression *NewFragmentExpr =
5569 Orig->isDbgAssign() ? Orig->getExpression() : NewAddrExpr;
5570 if (NewFragment)
5571 NewFragmentExpr = createOrReplaceFragment(NewFragmentExpr, *NewFragment,
5572 BitExtractAdjustment);
5573 if (!NewFragmentExpr)
5574 return;
5575
5576 if (Orig->isDbgDeclare()) {
5578 NewAddr, Orig->getVariable(), NewFragmentExpr, Orig->getDebugLoc());
5579 BeforeInst->getParent()->insertDbgRecordBefore(DVR,
5580 BeforeInst->getIterator());
5581 return;
5582 }
5583
5584 if (Orig->isDbgValue()) {
5586 NewAddr, Orig->getVariable(), NewFragmentExpr, Orig->getDebugLoc());
5587 // Drop debug information if the expression doesn't start with a
5588 // DW_OP_deref. This is because without a DW_OP_deref, the #dbg_value
5589 // describes the address of alloca rather than the value inside the alloca.
5590 if (!NewFragmentExpr->startsWithDeref())
5591 DVR->setKillAddress();
5592 BeforeInst->getParent()->insertDbgRecordBefore(DVR,
5593 BeforeInst->getIterator());
5594 return;
5595 }
5596
5597 // Apply a DIAssignID to the store if it doesn't already have it.
5598 if (!NewAddr->hasMetadata(LLVMContext::MD_DIAssignID)) {
5599 NewAddr->setMetadata(LLVMContext::MD_DIAssignID,
5601 }
5602
5604 NewAddr, Orig->getValue(), Orig->getVariable(), NewFragmentExpr, NewAddr,
5605 NewAddrExpr, Orig->getDebugLoc());
5606 LLVM_DEBUG(dbgs() << "Created new DVRAssign: " << *NewAssign << "\n");
5607 (void)NewAssign;
5608}
5609
5610/// Walks the slices of an alloca and form partitions based on them,
5611/// rewriting each of their uses.
5612bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
5613 if (AS.begin() == AS.end())
5614 return false;
5615
5616 unsigned NumPartitions = 0;
5617 bool Changed = false;
5618 const DataLayout &DL = AI.getModule()->getDataLayout();
5619
5620 // First try to pre-split loads and stores.
5621 Changed |= presplitLoadsAndStores(AI, AS);
5622
5623 // Now that we have identified any pre-splitting opportunities,
5624 // mark loads and stores unsplittable except for the following case.
5625 // We leave a slice splittable if all other slices are disjoint or fully
5626 // included in the slice, such as whole-alloca loads and stores.
5627 // If we fail to split these during pre-splitting, we want to force them
5628 // to be rewritten into a partition.
5629 bool IsSorted = true;
5630
5631 uint64_t AllocaSize = AI.getAllocationSize(DL)->getFixedValue();
5632 const uint64_t MaxBitVectorSize = 1024;
5633 if (AllocaSize <= MaxBitVectorSize) {
5634 // If a byte boundary is included in any load or store, a slice starting or
5635 // ending at the boundary is not splittable.
5636 SmallBitVector SplittableOffset(AllocaSize + 1, true);
5637 for (Slice &S : AS)
5638 for (unsigned O = S.beginOffset() + 1;
5639 O < S.endOffset() && O < AllocaSize; O++)
5640 SplittableOffset.reset(O);
5641
5642 for (Slice &S : AS) {
5643 if (!S.isSplittable())
5644 continue;
5645
5646 if ((S.beginOffset() > AllocaSize || SplittableOffset[S.beginOffset()]) &&
5647 (S.endOffset() > AllocaSize || SplittableOffset[S.endOffset()]))
5648 continue;
5649
5650 if (isa<LoadInst>(S.getUse()->getUser()) ||
5651 isa<StoreInst>(S.getUse()->getUser())) {
5652 S.makeUnsplittable();
5653 IsSorted = false;
5654 }
5655 }
5656 } else {
5657 // We only allow whole-alloca splittable loads and stores
5658 // for a large alloca to avoid creating too large BitVector.
5659 for (Slice &S : AS) {
5660 if (!S.isSplittable())
5661 continue;
5662
5663 if (S.beginOffset() == 0 && S.endOffset() >= AllocaSize)
5664 continue;
5665
5666 if (isa<LoadInst>(S.getUse()->getUser()) ||
5667 isa<StoreInst>(S.getUse()->getUser())) {
5668 S.makeUnsplittable();
5669 IsSorted = false;
5670 }
5671 }
5672 }
5673
5674 if (!IsSorted)
5676
5677 /// Describes the allocas introduced by rewritePartition in order to migrate
5678 /// the debug info.
5679 struct Fragment {
5680 AllocaInst *Alloca;
5681 uint64_t Offset;
5682 uint64_t Size;
5683 Fragment(AllocaInst *AI, uint64_t O, uint64_t S)
5684 : Alloca(AI), Offset(O), Size(S) {}
5685 };
5686 SmallVector<Fragment, 4> Fragments;
5687
5688 // Rewrite each partition.
5689 for (auto &P : AS.partitions()) {
5690 auto [NewAI, ActiveBits] = rewritePartition(AI, AS, P);
5691 if (NewAI) {
5692 Changed = true;
5693 if (NewAI != &AI) {
5694 uint64_t SizeOfByte = 8;
5695 // Don't include any padding.
5696 uint64_t Size = std::min(ActiveBits, P.size() * SizeOfByte);
5697 Fragments.push_back(
5698 Fragment(NewAI, P.beginOffset() * SizeOfByte, Size));
5699 }
5700 }
5701 ++NumPartitions;
5702 }
5703
5704 NumAllocaPartitions += NumPartitions;
5705 MaxPartitionsPerAlloca.updateMax(NumPartitions);
5706
5707 // Migrate debug information from the old alloca to the new alloca(s)
5708 // and the individual partitions.
5709 auto MigrateOne = [&](DbgVariableRecord *DbgVariable) {
5710 // Can't overlap with undef memory.
5711 if (isKillAddress(DbgVariable))
5712 return;
5713
5714 const Value *DbgPtr = DbgVariable->getAddress();
5716 DbgVariable->getFragmentOrEntireVariable();
5717 // Get the address expression constant offset if one exists and the ops
5718 // that come after it.
5719 int64_t CurrentExprOffsetInBytes = 0;
5720 SmallVector<uint64_t> PostOffsetOps;
5721 if (!getAddressExpression(DbgVariable)
5722 ->extractLeadingOffset(CurrentExprOffsetInBytes, PostOffsetOps))
5723 return; // Couldn't interpret this DIExpression - drop the var.
5724
5725 // Offset defined by a DW_OP_LLVM_extract_bits_[sz]ext.
5726 int64_t ExtractOffsetInBits = 0;
5727 for (auto Op : getAddressExpression(DbgVariable)->expr_ops()) {
5728 if (Op.getOp() == dwarf::DW_OP_LLVM_extract_bits_zext ||
5730 ExtractOffsetInBits = Op.getArg(0);
5731 break;
5732 }
5733 }
5734
5735 DIBuilder DIB(*AI.getModule(), /*AllowUnresolved*/ false);
5736 for (auto Fragment : Fragments) {
5737 int64_t OffsetFromLocationInBits;
5738 std::optional<DIExpression::FragmentInfo> NewDbgFragment;
5739 // Find the variable fragment that the new alloca slice covers.
5740 // Drop debug info for this variable fragment if we can't compute an
5741 // intersect between it and the alloca slice.
5743 DL, &AI, Fragment.Offset, Fragment.Size, DbgPtr,
5744 CurrentExprOffsetInBytes * 8, ExtractOffsetInBits, VarFrag,
5745 NewDbgFragment, OffsetFromLocationInBits))
5746 continue; // Do not migrate this fragment to this slice.
5747
5748 // Zero sized fragment indicates there's no intersect between the variable
5749 // fragment and the alloca slice. Skip this slice for this variable
5750 // fragment.
5751 if (NewDbgFragment && !NewDbgFragment->SizeInBits)
5752 continue; // Do not migrate this fragment to this slice.
5753
5754 // No fragment indicates DbgVariable's variable or fragment exactly
5755 // overlaps the slice; copy its fragment (or nullopt if there isn't one).
5756 if (!NewDbgFragment)
5757 NewDbgFragment = DbgVariable->getFragment();
5758
5759 // Reduce the new expression offset by the bit-extract offset since
5760 // we'll be keeping that.
5761 int64_t OffestFromNewAllocaInBits =
5762 OffsetFromLocationInBits - ExtractOffsetInBits;
5763 // We need to adjust an existing bit extract if the offset expression
5764 // can't eat the slack (i.e., if the new offset would be negative).
5765 int64_t BitExtractOffset =
5766 std::min<int64_t>(0, OffestFromNewAllocaInBits);
5767 // The magnitude of a negative value indicates the number of bits into
5768 // the existing variable fragment that the memory region begins. The new
5769 // variable fragment already excludes those bits - the new DbgPtr offset
5770 // only needs to be applied if it's positive.
5771 OffestFromNewAllocaInBits =
5772 std::max(int64_t(0), OffestFromNewAllocaInBits);
5773
5774 // Rebuild the expression:
5775 // {Offset(OffestFromNewAllocaInBits), PostOffsetOps, NewDbgFragment}
5776 // Add NewDbgFragment later, because dbg.assigns don't want it in the
5777 // address expression but the value expression instead.
5778 DIExpression *NewExpr = DIExpression::get(AI.getContext(), PostOffsetOps);
5779 if (OffestFromNewAllocaInBits > 0) {
5780 int64_t OffsetInBytes = (OffestFromNewAllocaInBits + 7) / 8;
5781 NewExpr = DIExpression::prepend(NewExpr, /*flags=*/0, OffsetInBytes);
5782 }
5783
5784 // Remove any existing intrinsics on the new alloca describing
5785 // the variable fragment.
5786 auto RemoveOne = [DbgVariable](auto *OldDII) {
5787 auto SameVariableFragment = [](const auto *LHS, const auto *RHS) {
5788 return LHS->getVariable() == RHS->getVariable() &&
5789 LHS->getDebugLoc()->getInlinedAt() ==
5790 RHS->getDebugLoc()->getInlinedAt();
5791 };
5792 if (SameVariableFragment(OldDII, DbgVariable))
5793 OldDII->eraseFromParent();
5794 };
5795 for_each(findDVRDeclares(Fragment.Alloca), RemoveOne);
5796 for_each(findDVRValues(Fragment.Alloca), RemoveOne);
5797 insertNewDbgInst(DIB, DbgVariable, Fragment.Alloca, NewExpr, &AI,
5798 NewDbgFragment, BitExtractOffset);
5799 }
5800 };
5801
5802 // Migrate debug information from the old alloca to the new alloca(s)
5803 // and the individual partitions.
5804 for_each(findDVRDeclares(&AI), MigrateOne);
5805 for_each(findDVRValues(&AI), MigrateOne);
5806 for_each(at::getDVRAssignmentMarkers(&AI), MigrateOne);
5807
5808 return Changed;
5809}
5810
5811/// Clobber a use with poison, deleting the used value if it becomes dead.
5812void SROA::clobberUse(Use &U) {
5813 Value *OldV = U;
5814 // Replace the use with an poison value.
5815 U = PoisonValue::get(OldV->getType());
5816
5817 // Check for this making an instruction dead. We have to garbage collect
5818 // all the dead instructions to ensure the uses of any alloca end up being
5819 // minimal.
5820 if (Instruction *OldI = dyn_cast<Instruction>(OldV))
5821 if (isInstructionTriviallyDead(OldI)) {
5822 DeadInsts.push_back(OldI);
5823 }
5824}
5825
5826/// A basic LoadAndStorePromoter that does not remove store nodes.
5828public:
5830 Type *ZeroType)
5831 : LoadAndStorePromoter(Insts, S), ZeroType(ZeroType) {}
5832 bool shouldDelete(Instruction *I) const override {
5833 return !isa<StoreInst>(I) && !isa<AllocaInst>(I);
5834 }
5835
5837 return UndefValue::get(ZeroType);
5838 }
5839
5840private:
5841 Type *ZeroType;
5842};
5843
5844bool SROA::propagateStoredValuesToLoads(AllocaInst &AI, AllocaSlices &AS) {
5845 // Look through each "partition", looking for slices with the same start/end
5846 // that do not overlap with any before them. The slices are sorted by
5847 // increasing beginOffset. We don't use AS.partitions(), as it will use a more
5848 // sophisticated algorithm that takes splittable slices into account.
5849 LLVM_DEBUG(dbgs() << "Attempting to propagate values on " << AI << "\n");
5850 bool AllSameAndValid = true;
5851 Type *PartitionType = nullptr;
5853 uint64_t BeginOffset = 0;
5854 uint64_t EndOffset = 0;
5855
5856 auto Flush = [&]() {
5857 if (AllSameAndValid && !Insts.empty()) {
5858 LLVM_DEBUG(dbgs() << "Propagate values on slice [" << BeginOffset << ", "
5859 << EndOffset << ")\n");
5861 SSAUpdater SSA(&NewPHIs);
5862 Insts.push_back(&AI);
5863 BasicLoadAndStorePromoter Promoter(Insts, SSA, PartitionType);
5864 Promoter.run(Insts);
5865 }
5866 AllSameAndValid = true;
5867 PartitionType = nullptr;
5868 Insts.clear();
5869 };
5870
5871 for (Slice &S : AS) {
5872 auto *User = cast<Instruction>(S.getUse()->getUser());
5873 if (isAssumeLikeIntrinsic(User)) {
5874 LLVM_DEBUG({
5875 dbgs() << "Ignoring slice: ";
5876 AS.print(dbgs(), &S);
5877 });
5878 continue;
5879 }
5880 if (S.beginOffset() >= EndOffset) {
5881 Flush();
5882 BeginOffset = S.beginOffset();
5883 EndOffset = S.endOffset();
5884 } else if (S.beginOffset() != BeginOffset || S.endOffset() != EndOffset) {
5885 if (AllSameAndValid) {
5886 LLVM_DEBUG({
5887 dbgs() << "Slice does not match range [" << BeginOffset << ", "
5888 << EndOffset << ")";
5889 AS.print(dbgs(), &S);
5890 });
5891 AllSameAndValid = false;
5892 }
5893 EndOffset = std::max(EndOffset, S.endOffset());
5894 continue;
5895 }
5896
5897 if (auto *LI = dyn_cast<LoadInst>(User)) {
5898 Type *UserTy = LI->getType();
5899 // LoadAndStorePromoter requires all the types to be the same.
5900 if (!LI->isSimple() || (PartitionType && UserTy != PartitionType))
5901 AllSameAndValid = false;
5902 PartitionType = UserTy;
5903 Insts.push_back(User);
5904 } else if (auto *SI = dyn_cast<StoreInst>(User)) {
5905 Type *UserTy = SI->getValueOperand()->getType();
5906 if (!SI->isSimple() || (PartitionType && UserTy != PartitionType))
5907 AllSameAndValid = false;
5908 PartitionType = UserTy;
5909 Insts.push_back(User);
5910 } else {
5911 AllSameAndValid = false;
5912 }
5913 }
5914
5915 Flush();
5916 return true;
5917}
5918
5919/// Analyze an alloca for SROA.
5920///
5921/// This analyzes the alloca to ensure we can reason about it, builds
5922/// the slices of the alloca, and then hands it off to be split and
5923/// rewritten as needed.
5924std::pair<bool /*Changed*/, bool /*CFGChanged*/>
5925SROA::runOnAlloca(AllocaInst &AI) {
5926 bool Changed = false;
5927 bool CFGChanged = false;
5928
5929 LLVM_DEBUG(dbgs() << "SROA alloca: " << AI << "\n");
5930 ++NumAllocasAnalyzed;
5931
5932 // Special case dead allocas, as they're trivial.
5933 if (AI.use_empty()) {
5934 AI.eraseFromParent();
5935 Changed = true;
5936 return {Changed, CFGChanged};
5937 }
5938 const DataLayout &DL = AI.getDataLayout();
5939
5940 // Skip alloca forms that this analysis can't handle.
5941 std::optional<TypeSize> Size = AI.getAllocationSize(DL);
5942 if (AI.isArrayAllocation() || !Size || Size->isScalable() || Size->isZero())
5943 return {Changed, CFGChanged};
5944
5945 // First, split any FCA loads and stores touching this alloca to promote
5946 // better splitting and promotion opportunities.
5947 IRBuilderTy IRB(&AI);
5948 AggLoadStoreRewriter AggRewriter(DL, IRB);
5949 Changed |= AggRewriter.rewrite(AI);
5950
5951 // Build the slices using a recursive instruction-visiting builder.
5952 AllocaSlices AS(DL, AI);
5953 LLVM_DEBUG(AS.print(dbgs()));
5954 if (AS.isEscaped())
5955 return {Changed, CFGChanged};
5956
5957 if (AS.isEscapedReadOnly()) {
5958 Changed |= propagateStoredValuesToLoads(AI, AS);
5959 return {Changed, CFGChanged};
5960 }
5961
5962 for (auto &P : AS.partitions()) {
5963 // For now, we can't split if a field is accessed both via protected field
5964 // and not, because that would mean that we would need to introduce sign and
5965 // auth operations to convert between the protected and non-protected uses,
5966 // and this pass doesn't know how to do that. Also, this case is unlikely to
5967 // occur in normal code.
5968 std::optional<Value *> ProtectedFieldDisc;
5969 auto SliceHasMismatch = [&](Slice &S) {
5970 if (auto *II = dyn_cast<IntrinsicInst>(S.getUse()->getUser()))
5971 if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
5972 II->getIntrinsicID() == Intrinsic::lifetime_end)
5973 return false;
5974 if (!ProtectedFieldDisc)
5975 ProtectedFieldDisc = S.ProtectedFieldDisc;
5976 return *ProtectedFieldDisc != S.ProtectedFieldDisc;
5977 };
5978 for (Slice &S : P)
5979 if (SliceHasMismatch(S))
5980 return {Changed, CFGChanged};
5981 for (Slice *S : P.splitSliceTails())
5982 if (SliceHasMismatch(*S))
5983 return {Changed, CFGChanged};
5984 }
5985
5986 // Delete all the dead users of this alloca before splitting and rewriting it.
5987 for (Instruction *DeadUser : AS.getDeadUsers()) {
5988 // Free up everything used by this instruction.
5989 for (Use &DeadOp : DeadUser->operands())
5990 clobberUse(DeadOp);
5991
5992 // Now replace the uses of this instruction.
5993 DeadUser->replaceAllUsesWith(PoisonValue::get(DeadUser->getType()));
5994
5995 // And mark it for deletion.
5996 DeadInsts.push_back(DeadUser);
5997 Changed = true;
5998 }
5999 for (Use *DeadOp : AS.getDeadOperands()) {
6000 clobberUse(*DeadOp);
6001 Changed = true;
6002 }
6003 for (IntrinsicInst *PFPUser : AS.getPFPUsers()) {
6004 PFPUser->replaceAllUsesWith(PFPUser->getArgOperand(0));
6005
6006 DeadInsts.push_back(PFPUser);
6007 Changed = true;
6008 }
6009
6010 // No slices to split. Leave the dead alloca for a later pass to clean up.
6011 if (AS.begin() == AS.end())
6012 return {Changed, CFGChanged};
6013
6014 Changed |= splitAlloca(AI, AS);
6015
6016 LLVM_DEBUG(dbgs() << " Speculating PHIs\n");
6017 while (!SpeculatablePHIs.empty())
6018 speculatePHINodeLoads(IRB, *SpeculatablePHIs.pop_back_val());
6019
6020 LLVM_DEBUG(dbgs() << " Rewriting Selects\n");
6021 auto RemainingSelectsToRewrite = SelectsToRewrite.takeVector();
6022 while (!RemainingSelectsToRewrite.empty()) {
6023 const auto [K, V] = RemainingSelectsToRewrite.pop_back_val();
6024 CFGChanged |=
6025 rewriteSelectInstMemOps(*K, V, IRB, PreserveCFG ? nullptr : DTU);
6026 }
6027
6028 return {Changed, CFGChanged};
6029}
6030
6031/// Delete the dead instructions accumulated in this run.
6032///
6033/// Recursively deletes the dead instructions we've accumulated. This is done
6034/// at the very end to maximize locality of the recursive delete and to
6035/// minimize the problems of invalidated instruction pointers as such pointers
6036/// are used heavily in the intermediate stages of the algorithm.
6037///
6038/// We also record the alloca instructions deleted here so that they aren't
6039/// subsequently handed to mem2reg to promote.
6040bool SROA::deleteDeadInstructions(
6041 SmallPtrSetImpl<AllocaInst *> &DeletedAllocas) {
6042 bool Changed = false;
6043 while (!DeadInsts.empty()) {
6044 Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val());
6045 if (!I)
6046 continue;
6047 LLVM_DEBUG(dbgs() << "Deleting dead instruction: " << *I << "\n");
6048
6049 // If the instruction is an alloca, find the possible dbg.declare connected
6050 // to it, and remove it too. We must do this before calling RAUW or we will
6051 // not be able to find it.
6052 if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
6053 DeletedAllocas.insert(AI);
6054 for (DbgVariableRecord *OldDII : findDVRDeclares(AI))
6055 OldDII->eraseFromParent();
6056 }
6057
6059 I->replaceAllUsesWith(UndefValue::get(I->getType()));
6060
6061 for (Use &Operand : I->operands())
6062 if (Instruction *U = dyn_cast<Instruction>(Operand)) {
6063 // Zero out the operand and see if it becomes trivially dead.
6064 Operand = nullptr;
6066 DeadInsts.push_back(U);
6067 }
6068
6069 ++NumDeleted;
6070 I->eraseFromParent();
6071 Changed = true;
6072 }
6073 return Changed;
6074}
6075/// Promote the allocas, using the best available technique.
6076///
6077/// This attempts to promote whatever allocas have been identified as viable in
6078/// the PromotableAllocas list. If that list is empty, there is nothing to do.
6079/// This function returns whether any promotion occurred.
6080bool SROA::promoteAllocas() {
6081 if (PromotableAllocas.empty())
6082 return false;
6083
6084 if (SROASkipMem2Reg) {
6085 LLVM_DEBUG(dbgs() << "Not promoting allocas with mem2reg!\n");
6086 } else {
6087 LLVM_DEBUG(dbgs() << "Promoting allocas with mem2reg...\n");
6088 NumPromoted += PromotableAllocas.size();
6089 PromoteMemToReg(PromotableAllocas.getArrayRef(), DTU->getDomTree(), AC);
6090 }
6091
6092 PromotableAllocas.clear();
6093 return true;
6094}
6095
6096std::pair<bool /*Changed*/, bool /*CFGChanged*/> SROA::runSROA(Function &F) {
6097 LLVM_DEBUG(dbgs() << "SROA function: " << F.getName() << "\n");
6098
6099 const DataLayout &DL = F.getDataLayout();
6100 BasicBlock &EntryBB = F.getEntryBlock();
6101 for (BasicBlock::iterator I = EntryBB.begin(), E = std::prev(EntryBB.end());
6102 I != E; ++I) {
6103 if (AllocaInst *AI = dyn_cast<AllocaInst>(I)) {
6104 std::optional<TypeSize> Size = AI->getAllocationSize(DL);
6105 if (Size && Size->isScalable() && isAllocaPromotable(AI))
6106 PromotableAllocas.insert(AI);
6107 else
6108 Worklist.insert(AI);
6109 }
6110 }
6111
6112 bool Changed = false;
6113 bool CFGChanged = false;
6114 // A set of deleted alloca instruction pointers which should be removed from
6115 // the list of promotable allocas.
6116 SmallPtrSet<AllocaInst *, 4> DeletedAllocas;
6117
6118 do {
6119 while (!Worklist.empty()) {
6120 auto [IterationChanged, IterationCFGChanged] =
6121 runOnAlloca(*Worklist.pop_back_val());
6122 Changed |= IterationChanged;
6123 CFGChanged |= IterationCFGChanged;
6124
6125 Changed |= deleteDeadInstructions(DeletedAllocas);
6126
6127 // Remove the deleted allocas from various lists so that we don't try to
6128 // continue processing them.
6129 if (!DeletedAllocas.empty()) {
6130 Worklist.set_subtract(DeletedAllocas);
6131 PostPromotionWorklist.set_subtract(DeletedAllocas);
6132 PromotableAllocas.set_subtract(DeletedAllocas);
6133 DeletedAllocas.clear();
6134 }
6135 }
6136
6137 Changed |= promoteAllocas();
6138
6139 Worklist = PostPromotionWorklist;
6140 PostPromotionWorklist.clear();
6141 } while (!Worklist.empty());
6142
6143 assert((!CFGChanged || Changed) && "Can not only modify the CFG.");
6144 assert((!CFGChanged || !PreserveCFG) &&
6145 "Should not have modified the CFG when told to preserve it.");
6146
6147 if (Changed && isAssignmentTrackingEnabled(*F.getParent())) {
6148 for (auto &BB : F) {
6150 }
6151 }
6152
6153 return {Changed, CFGChanged};
6154}
6155
6159 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
6160 auto [Changed, CFGChanged] =
6161 SROA(&F.getContext(), &DTU, &AC, PreserveCFG).runSROA(F);
6162 if (!Changed)
6163 return PreservedAnalyses::all();
6165 if (!CFGChanged)
6168 return PA;
6169}
6170
6172 raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
6173 static_cast<PassInfoMixin<SROAPass> *>(this)->printPipeline(
6174 OS, MapClassName2PassName);
6175 OS << (PreserveCFG == SROAOptions::PreserveCFG ? "<preserve-cfg>"
6176 : "<modify-cfg>");
6177}
6178
6179SROAPass::SROAPass(SROAOptions PreserveCFG) : PreserveCFG(PreserveCFG) {}
6180
6181namespace {
6182
6183/// A legacy pass for the legacy pass manager that wraps the \c SROA pass.
6184class SROALegacyPass : public FunctionPass {
6186
6187public:
6188 static char ID;
6189
6193 }
6194
6195 bool runOnFunction(Function &F) override {
6196 if (skipFunction(F))
6197 return false;
6198
6199 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
6200 AssumptionCache &AC =
6201 getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
6202 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
6203 auto [Changed, _] =
6204 SROA(&F.getContext(), &DTU, &AC, PreserveCFG).runSROA(F);
6205 return Changed;
6206 }
6207
6208 void getAnalysisUsage(AnalysisUsage &AU) const override {
6209 AU.addRequired<AssumptionCacheTracker>();
6210 AU.addRequired<DominatorTreeWrapperPass>();
6211 AU.addPreserved<GlobalsAAWrapperPass>();
6212 AU.addPreserved<DominatorTreeWrapperPass>();
6213 }
6214
6215 StringRef getPassName() const override { return "SROA"; }
6216};
6217
6218} // end anonymous namespace
6219
6220char SROALegacyPass::ID = 0;
6221
6226
6227INITIALIZE_PASS_BEGIN(SROALegacyPass, "sroa",
6228 "Scalar Replacement Of Aggregates", false, false)
6231INITIALIZE_PASS_END(SROALegacyPass, "sroa", "Scalar Replacement Of Aggregates",
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Rewrite undef for PHI
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:661
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
static bool runOnFunction(Function &F, bool PostInlining)
#define DEBUG_TYPE
This is the interface for a simple mod/ref and alias analysis over globals.
Hexagon Common GEP
#define _
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
This defines the Use class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
print mir2vec MIR2Vec Vocabulary Printer Pass
Definition MIR2Vec.cpp:598
This file implements a map that provides insertion order iteration.
static std::optional< AllocFnsTy > getAllocationSize(const CallBase *CB, const TargetLibraryInfo *TLI)
static std::optional< uint64_t > getSizeInBytes(std::optional< uint64_t > SizeInBits)
Memory SSA
Definition MemorySSA.cpp:72
This file contains the declarations for metadata subclasses.
#define T
uint64_t IntrinsicInst * II
#define P(N)
if(PassOpts->AAPipeline)
PassBuilder PB(Machine, PassOpts->PTO, std::nullopt, &PIC)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file defines the PointerIntPair class.
This file provides a collection of visitors which walk the (instruction) uses of a pointer.
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static unsigned getNumElements(Type *Ty)
bool isDead(const MachineInstr &MI, const MachineRegisterInfo &MRI)
void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)
static void migrateDebugInfo(AllocaInst *OldAlloca, bool IsSplit, uint64_t OldAllocaOffsetInBits, uint64_t SliceSizeInBits, Instruction *OldInst, Instruction *Inst, Value *Dest, Value *Value, const DataLayout &DL)
Find linked dbg.assign and generate a new one with the correct FragmentInfo.
Definition SROA.cpp:344
static std::tuple< Type *, bool, VectorType * > selectPartitionType(Partition &P, const DataLayout &DL, AllocaInst &AI, LLVMContext &C)
Select a partition type for an alloca partition.
Definition SROA.cpp:5229
static VectorType * isVectorPromotionViable(Partition &P, const DataLayout &DL, unsigned VScale)
Test whether the given alloca partitioning and range of slices can be promoted to a vector.
Definition SROA.cpp:2375
static Align getAdjustedAlignment(Instruction *I, uint64_t Offset)
Compute the adjusted alignment for a load or store from an offset.
Definition SROA.cpp:1960
static VectorType * checkVectorTypesForPromotion(Partition &P, const DataLayout &DL, SmallVectorImpl< VectorType * > &CandidateTys, bool HaveCommonEltTy, Type *CommonEltTy, bool HaveVecPtrTy, bool HaveCommonVecPtrTy, VectorType *CommonVecPtrTy, unsigned VScale)
Test whether any vector type in CandidateTys is viable for promotion.
Definition SROA.cpp:2226
static std::pair< Type *, IntegerType * > findCommonType(AllocaSlices::const_iterator B, AllocaSlices::const_iterator E, uint64_t EndOffset)
Walk the range of a partitioning looking for a common type to cover this sequence of slices.
Definition SROA.cpp:1526
static Type * stripAggregateTypeWrapping(const DataLayout &DL, Type *Ty)
Strip aggregate type wrapping.
Definition SROA.cpp:4572
static FragCalcResult calculateFragment(DILocalVariable *Variable, uint64_t NewStorageSliceOffsetInBits, uint64_t NewStorageSliceSizeInBits, std::optional< DIExpression::FragmentInfo > StorageFragment, std::optional< DIExpression::FragmentInfo > CurrentFragment, DIExpression::FragmentInfo &Target)
Definition SROA.cpp:279
static DIExpression * createOrReplaceFragment(const DIExpression *Expr, DIExpression::FragmentInfo Frag, int64_t BitExtractOffset)
Create or replace an existing fragment in a DIExpression with Frag.
Definition SROA.cpp:5494
static Value * insertInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *Old, Value *V, uint64_t Offset, const Twine &Name)
Definition SROA.cpp:2618
static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S, VectorType *Ty, uint64_t ElementSize, const DataLayout &DL, unsigned VScale)
Test whether the given slice use can be promoted to a vector.
Definition SROA.cpp:2151
static Value * getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr, APInt Offset, Type *PointerTy, const Twine &NamePrefix)
Compute an adjusted pointer from Ptr by Offset bytes where the resulting pointer has PointerTy.
Definition SROA.cpp:1949
static bool isIntegerWideningViableForSlice(const Slice &S, uint64_t AllocBeginOffset, Type *AllocaTy, const DataLayout &DL, bool &WholeAllocaOp)
Test whether a slice of an alloca is valid for integer widening.
Definition SROA.cpp:2457
static Value * extractVector(IRBuilderTy &IRB, Value *V, unsigned BeginIndex, unsigned EndIndex, const Twine &Name)
Definition SROA.cpp:2651
static Value * foldPHINodeOrSelectInst(Instruction &I)
A helper that folds a PHI node or a select.
Definition SROA.cpp:1021
static bool rewriteSelectInstMemOps(SelectInst &SI, const RewriteableMemOps &Ops, IRBuilderTy &IRB, DomTreeUpdater *DTU)
Definition SROA.cpp:1915
static void rewriteMemOpOfSelect(SelectInst &SI, T &I, SelectHandSpeculativity Spec, DomTreeUpdater &DTU)
Definition SROA.cpp:1848
static Value * foldSelectInst(SelectInst &SI)
Definition SROA.cpp:1008
bool isKillAddress(const DbgVariableRecord *DVR)
Definition SROA.cpp:5457
static Value * insertVector(IRBuilderTy &IRB, Value *Old, Value *V, unsigned BeginIndex, const Twine &Name)
Definition SROA.cpp:2673
static bool isIntegerWideningViable(Partition &P, Type *AllocaTy, const DataLayout &DL)
Test whether the given alloca partition's integer operations can be widened to promotable ones.
Definition SROA.cpp:2552
static void speculatePHINodeLoads(IRBuilderTy &IRB, PHINode &PN)
Definition SROA.cpp:1666
static VectorType * createAndCheckVectorTypesForPromotion(SetVector< Type * > &OtherTys, ArrayRef< VectorType * > CandidateTysCopy, function_ref< void(Type *)> CheckCandidateType, Partition &P, const DataLayout &DL, SmallVectorImpl< VectorType * > &CandidateTys, bool &HaveCommonEltTy, Type *&CommonEltTy, bool &HaveVecPtrTy, bool &HaveCommonVecPtrTy, VectorType *&CommonVecPtrTy, unsigned VScale)
Definition SROA.cpp:2331
static DebugVariable getAggregateVariable(DbgVariableRecord *DVR)
Definition SROA.cpp:325
static bool isSafePHIToSpeculate(PHINode &PN)
PHI instructions that use an alloca and are subsequently loaded can be rewritten to load both input p...
Definition SROA.cpp:1592
static Value * extractInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *V, IntegerType *Ty, uint64_t Offset, const Twine &Name)
Definition SROA.cpp:2593
static void insertNewDbgInst(DIBuilder &DIB, DbgVariableRecord *Orig, AllocaInst *NewAddr, DIExpression *NewAddrExpr, Instruction *BeforeInst, std::optional< DIExpression::FragmentInfo > NewFragment, int64_t BitExtractAdjustment)
Insert a new DbgRecord.
Definition SROA.cpp:5559
static void speculateSelectInstLoads(SelectInst &SI, LoadInst &LI, IRBuilderTy &IRB)
Definition SROA.cpp:1809
static Value * mergeTwoVectors(Value *V0, Value *V1, const DataLayout &DL, Type *NewAIEltTy, IRBuilder<> &Builder)
This function takes two vector values and combines them into a single vector by concatenating their e...
Definition SROA.cpp:2745
const DIExpression * getAddressExpression(const DbgVariableRecord *DVR)
Definition SROA.cpp:5463
static Type * getTypePartition(const DataLayout &DL, Type *Ty, uint64_t Offset, uint64_t Size)
Try to find a partition of the aggregate type passed in for a given offset and size.
Definition SROA.cpp:4610
static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy, unsigned VScale=0)
Test whether we can convert a value from the old to the new type.
Definition SROA.cpp:1970
static SelectHandSpeculativity isSafeLoadOfSelectToSpeculate(LoadInst &LI, SelectInst &SI, bool PreserveCFG)
Definition SROA.cpp:1747
static Value * convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V, Type *NewTy)
Generic routine to convert an SSA value to a value of a different type.
Definition SROA.cpp:2060
This file provides the interface for LLVM's Scalar Replacement of Aggregates pass.
This file contains some templates that are useful if you are working with the STL at all.
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
Virtual Register Rewriter
Value * RHS
Value * LHS
Builder for the alloca slices.
Definition SROA.cpp:1033
SliceBuilder(const DataLayout &DL, AllocaInst &AI, AllocaSlices &AS)
Definition SROA.cpp:1053
An iterator over partitions of the alloca's slices.
Definition SROA.cpp:821
bool operator==(const partition_iterator &RHS) const
Definition SROA.cpp:968
partition_iterator & operator++()
Definition SROA.cpp:988
bool shouldDelete(Instruction *I) const override
Return false if a sub-class wants to keep one of the loads/stores after the SSA construction.
Definition SROA.cpp:5832
BasicLoadAndStorePromoter(ArrayRef< const Instruction * > Insts, SSAUpdater &S, Type *ZeroType)
Definition SROA.cpp:5829
Value * getValueToUseForAlloca(Instruction *I) const override
Return the value to use for the point in the code that the alloca is positioned.
Definition SROA.cpp:5836
Class for arbitrary precision integers.
Definition APInt.h:78
an instruction to allocate memory on the stack
LLVM_ABI bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
PointerType * getType() const
Overload to return most specific pointer type.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
iterator end() const
Definition ArrayRef.h:131
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
iterator begin() const
Definition ArrayRef.h:130
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator end()
Definition BasicBlock.h:483
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:470
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
LLVM_ABI CaptureInfo getCaptureInfo(unsigned OpNo) const
Return which pointer components this operand may capture.
bool onlyReadsMemory(unsigned OpNo) const
bool isDataOperand(const Use *U) const
This is the shared class of boolean and integer constants.
Definition Constants.h:87
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static DIAssignID * getDistinct(LLVMContext &Context)
LLVM_ABI DbgInstPtr insertDbgAssign(Instruction *LinkedInstr, Value *Val, DILocalVariable *SrcVar, DIExpression *ValExpr, Value *Addr, DIExpression *AddrExpr, const DILocation *DL)
Insert a new llvm.dbg.assign intrinsic call.
DWARF expression.
iterator_range< expr_op_iterator > expr_ops() const
DbgVariableFragmentInfo FragmentInfo
LLVM_ABI bool startsWithDeref() const
Return whether the first element a DW_OP_deref.
static LLVM_ABI bool calculateFragmentIntersect(const DataLayout &DL, const Value *SliceStart, uint64_t SliceOffsetInBits, uint64_t SliceSizeInBits, const Value *DbgPtr, int64_t DbgPtrOffsetInBits, int64_t DbgExtractOffsetInBits, DIExpression::FragmentInfo VarFrag, std::optional< DIExpression::FragmentInfo > &Result, int64_t &OffsetFromLocationInBits)
Computes a fragment, bit-extract operation if needed, and new constant offset to describe a part of a...
static LLVM_ABI std::optional< DIExpression * > createFragmentExpression(const DIExpression *Expr, unsigned OffsetInBits, unsigned SizeInBits)
Create a DIExpression to describe one part of an aggregate variable that is fragmented across multipl...
static LLVM_ABI DIExpression * prepend(const DIExpression *Expr, uint8_t Flags, int64_t Offset=0)
Prepend DIExpr with a deref and offset operation and optionally turn it into a stack value or/and an ...
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
LLVM_ABI void moveBefore(DbgRecord *MoveBefore)
DebugLoc getDebugLoc() const
void setDebugLoc(DebugLoc Loc)
Record of a variable value-assignment, aka a non instruction representation of the dbg....
LLVM_ABI void setKillAddress()
Kill the address component.
LLVM_ABI bool isKillLocation() const
LLVM_ABI bool isKillAddress() const
Check whether this kills the address component.
LLVM_ABI void replaceVariableLocationOp(Value *OldValue, Value *NewValue, bool AllowEmpty=false)
Value * getValue(unsigned OpIdx=0) const
static LLVM_ABI DbgVariableRecord * createLinkedDVRAssign(Instruction *LinkedInstr, Value *Val, DILocalVariable *Variable, DIExpression *Expression, Value *Address, DIExpression *AddressExpression, const DILocation *DI)
LLVM_ABI void setAssignId(DIAssignID *New)
DIExpression * getExpression() const
static LLVM_ABI DbgVariableRecord * createDVRDeclare(Value *Address, DILocalVariable *DV, DIExpression *Expr, const DILocation *DI)
static LLVM_ABI DbgVariableRecord * createDbgVariableRecord(Value *Location, DILocalVariable *DV, DIExpression *Expr, const DILocation *DI)
DILocalVariable * getVariable() const
DIExpression * getAddressExpression() const
LLVM_ABI DILocation * getInlinedAt() const
Definition DebugLoc.cpp:67
Identifies a unique instance of a variable.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition DenseMap.h:205
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition DenseMap.h:174
iterator end()
Definition DenseMap.h:81
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:241
Analysis pass which computes a DominatorTree.
Definition Dominators.h:283
Legacy analysis pass which computes a DominatorTree.
Definition Dominators.h:321
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:164
Class to represent fixed width SIMD vectors.
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:802
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
unsigned getVScaleValue() const
Return the value for vscale based on the vscale_range attribute or 0 when unknown.
const BasicBlock & getEntryBlock() const
Definition Function.h:813
LLVM_ABI bool accumulateConstantOffset(const DataLayout &DL, APInt &Offset, function_ref< bool(Value &, APInt &)> ExternalAnalysis=nullptr) const
Accumulate the constant address offset of this GEP if possible.
Definition Operator.cpp:125
iterator_range< op_iterator > indices()
Type * getSourceElementType() const
LLVM_ABI GEPNoWrapFlags getNoWrapFlags() const
Get the nowrap flags for the GEP instruction.
This provides the default implementation of the IRBuilder 'InsertHelper' method that is called whenev...
Definition IRBuilder.h:61
virtual void InsertHelper(Instruction *I, const Twine &Name, BasicBlock::iterator InsertPt) const
Definition IRBuilder.h:65
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2776
Base class for instruction visitors.
Definition InstVisitor.h:78
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
bool hasMetadata() const
Return true if this instruction has any metadata attached to it.
LLVM_ABI bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Class to represent integer types.
@ MAX_INT_BITS
Maximum number of bits that can be specified.
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LoadAndStorePromoter(ArrayRef< const Instruction * > Insts, SSAUpdater &S, StringRef Name=StringRef())
An instruction for reading from memory.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
void setAlignment(Align Align)
Value * getPointerOperand()
bool isVolatile() const
Return true if this is a load from a volatile memory location.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Type * getPointerOperandType() const
static unsigned getPointerOperandIndex()
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
bool isSimple() const
Align getAlign() const
Return the alignment of the access that is being performed.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1572
LLVMContext & getContext() const
Definition Metadata.h:1244
LLVM_ABI StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
This is the common base class for memset/memcpy/memmove.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
PointerIntPair - This class implements a pair of a pointer and small integer.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132
PtrUseVisitor(const DataLayout &DL)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Run the pass over the function.
Definition SROA.cpp:6156
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
Definition SROA.cpp:6171
SROAPass(SROAOptions PreserveCFG)
If PreserveCFG is set, then the pass is not allowed to modify CFG in any way, even if it would update...
Definition SROA.cpp:6179
Helper class for SSA formation on a set of values defined in multiple blocks.
Definition SSAUpdater.h:39
This class represents the LLVM 'select' instruction.
A vector that has set insertion semantics.
Definition SetVector.h:57
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:103
void clear()
Completely clear the SetVector.
Definition SetVector.h:267
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
typename SuperClass::const_iterator const_iterator
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAlignment(Align Align)
Value * getValueOperand()
static unsigned getPointerOperandIndex()
Value * getPointerOperand()
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
static constexpr size_t npos
Definition StringRef.h:57
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:573
size_t rfind(char C, size_t From=npos) const
Search for the last character C in the string.
Definition StringRef.h:345
size_t find(char C, size_t From=0) const
Search for the first character C in the string.
Definition StringRef.h:293
LLVM_ABI size_t find_first_not_of(char C, size_t From=0) const
Find the first character in the string that is not C or npos if not found.
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
Definition DataLayout.h:723
TypeSize getSizeInBytes() const
Definition DataLayout.h:732
LLVM_ABI unsigned getElementContainingOffset(uint64_t FixedOffset) const
Given a valid byte offset into the structure, returns the structure index that contains it.
TypeSize getElementOffset(unsigned Idx) const
Definition DataLayout.h:754
TypeSize getSizeInBits() const
Definition DataLayout.h:734
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition Type.cpp:413
element_iterator element_end() const
element_iterator element_begin() const
bool isPacked() const
Type * getElementType(unsigned N) const
Type::subtype_iterator element_iterator
Target - Wrapper for Target specific information.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI unsigned getIntegerBitWidth() const
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition Type.h:246
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:267
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
Definition Type.h:296
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:294
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
bool isStructTy() const
True if this is an instance of StructType.
Definition Type.h:261
bool isTargetExtTy() const
Return true if this is a target extension type.
Definition Type.h:203
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition Type.h:128
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition Type.h:270
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:300
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
Definition Use.h:35
op_range operands()
Definition User.h:267
op_iterator op_begin()
Definition User.h:259
const Use & getOperandUse(unsigned i) const
Definition User.h:220
Value * getOperand(unsigned i) const
Definition User.h:207
op_iterator op_end()
Definition User.h:261
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
user_iterator user_begin()
Definition Value.h:402
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:439
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:553
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:259
LLVM_ABI const Value * stripInBoundsOffsets(function_ref< void(const Value *)> Func=[](const Value *) {}) const
Strip off pointer casts and inbounds GEPs.
Definition Value.cpp:819
iterator_range< user_iterator > users()
Definition Value.h:426
LLVM_ABI void dropDroppableUsesIn(User &Usr)
Remove every use of this value in User that can safely be removed.
Definition Value.cpp:218
LLVM_ABI const Value * stripAndAccumulateConstantOffsets(const DataLayout &DL, APInt &Offset, bool AllowNonInbounds, bool AllowInvariantGroup=false, function_ref< bool(Value &Value, APInt &Offset)> ExternalAnalysis=nullptr, bool LookThroughIntToPtr=false) const
Accumulate the constant offset this value has compared to a base pointer.
bool use_empty() const
Definition Value.h:346
iterator_range< use_iterator > uses()
Definition Value.h:380
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:403
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
static VectorType * getWithSizeAndScalar(VectorType *SizeTy, Type *EltTy)
This static method attempts to construct a VectorType with the same size-in-bits as SizeTy but with a...
static LLVM_ABI bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
Definition TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
CRTP base class which implements the entire standard iterator facade in terms of a minimal subset of ...
Definition iterator.h:80
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
Changed
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition CallingConv.h:76
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
Offsets
Offsets in bytes from the start of the input buffer.
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Return a range of dbg_assign records for which Inst performs the assignment they encode.
Definition DebugInfo.h:195
LLVM_ABI void deleteAssignmentMarkers(const Instruction *Inst)
Delete the llvm.dbg.assign intrinsics linked to Inst.
initializer< Ty > init(const Ty &Val)
@ DW_OP_LLVM_extract_bits_zext
Only used in LLVM metadata.
Definition Dwarf.h:151
@ DW_OP_LLVM_fragment
Only used in LLVM metadata.
Definition Dwarf.h:144
@ DW_OP_LLVM_extract_bits_sext
Only used in LLVM metadata.
Definition Dwarf.h:150
@ User
could "use" a pointer
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
NodeAddr< UseNode * > Use
Definition RDFGraph.h:385
bool empty() const
Definition BasicBlock.h:101
Context & getContext() const
Definition BasicBlock.h:99
iterator end() const
Definition BasicBlock.h:89
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
LLVM_ABI iterator begin() const
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
static cl::opt< bool > SROASkipMem2Reg("sroa-skip-mem2reg", cl::init(false), cl::Hidden)
Disable running mem2reg during SROA in order to test or debug SROA.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
@ Offset
Definition DWP.cpp:532
@ Length
Definition DWP.cpp:532
bool operator<(int64_t V1, const APSInt &V2)
Definition APSInt.h:362
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
void stable_sort(R &&Range)
Definition STLExtras.h:2106
LLVM_ABI bool RemoveRedundantDbgInstrs(BasicBlock *BB)
Try to remove redundant dbg.value instructions from given basic block.
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1730
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1737
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1667
LLVM_ABI void PromoteMemToReg(ArrayRef< AllocaInst * > Allocas, DominatorTree &DT, AssumptionCache *AC=nullptr)
Promote the specified list of alloca instructions into scalar registers, inserting PHI nodes as appro...
LLVM_ABI bool isAssumeLikeIntrinsic(const Instruction *I)
Return true if it is an intrinsic that cannot be speculated but also cannot trap.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2544
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
auto successors(const MachineBasicBlock *BB)
bool operator!=(uint64_t V1, const APInt &V2)
Definition APInt.h:2122
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI std::optional< RegOrConstant > getVectorSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI)
Definition Utils.cpp:1501
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632
void * PointerTy
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
auto unique(Range &&R, Predicate P)
Definition STLExtras.h:2124
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
LLVM_ABI bool isAllocaPromotable(const AllocaInst *AI)
Return true if this alloca is legal for promotion.
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition STLExtras.h:2190
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1744
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
Definition Local.cpp:402
bool capturesFullProvenance(CaptureComponents CC)
Definition ModRef.h:341
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1634
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
LLVM_ABI bool isSafeToLoadUnconditionally(Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, Instruction *ScanFrom, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
Definition Loads.cpp:435
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void initializeSROALegacyPassPass(PassRegistry &)
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
LLVM_ABI TinyPtrVector< DbgVariableRecord * > findDVRValues(Value *V)
As above, for DVRValues.
Definition DebugInfo.cpp:82
LLVM_ABI void llvm_unreachable_internal(const char *msg=nullptr, const char *file=nullptr, unsigned line=0)
This function calls abort(), and prints the optional message to stderr.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
constexpr int PoisonMaskElem
iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI bool isAssignmentTrackingEnabled(const Module &M)
Return true if assignment tracking is enabled for module M.
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1770
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2182
LLVM_ABI TinyPtrVector< DbgVariableRecord * > findDVRDeclares(Value *V)
Finds dbg.declare records declaring local variables as living in the memory that 'V' points to.
Definition DebugInfo.cpp:48
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1945
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
cl::opt< bool > ProfcheckDisableMetadataFixes("profcheck-disable-metadata-fixes", cl::Hidden, cl::init(false), cl::desc("Disable metadata propagation fixes discovered through Issue #147390"))
Definition Metadata.cpp:64
LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI FunctionPass * createSROAPass(bool PreserveCFG=true)
Definition SROA.cpp:6222
SROAOptions
Definition SROA.h:24
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define NDEBUG
Definition regutils.h:48
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition Metadata.h:763
AAMDNodes shift(size_t Offset) const
Create a new AAMDNode that describes this AAMDNode after applying a constant offset to the start of t...
Definition Metadata.h:822
LLVM_ABI AAMDNodes adjustForAccess(unsigned AccessSize)
Create a new AAMDNode for accessing AccessSize bytes of this AAMDNode.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Describes an element of a Bitfield.
Definition Bitfields.h:176
static Bitfield::Type get(StorageType Packed)
Unpacks the field from the Packed value.
Definition Bitfields.h:207
static void set(StorageType &Packed, typename Bitfield::Type Value)
Sets the typed value in the provided Packed value.
Definition Bitfields.h:223
A CRTP mix-in to automatically provide informational APIs needed for passes.
Definition PassManager.h:70