47#include "llvm/Config/llvm-config.h"
103#define DEBUG_TYPE "sroa"
105STATISTIC(NumAllocasAnalyzed,
"Number of allocas analyzed for replacement");
106STATISTIC(NumAllocaPartitions,
"Number of alloca partitions formed");
107STATISTIC(MaxPartitionsPerAlloca,
"Maximum number of partitions per alloca");
108STATISTIC(NumAllocaPartitionUses,
"Number of alloca partition uses rewritten");
109STATISTIC(MaxUsesPerAllocaPartition,
"Maximum number of uses of a partition");
110STATISTIC(NumNewAllocas,
"Number of new, smaller allocas introduced");
111STATISTIC(NumPromoted,
"Number of allocas promoted to SSA values");
112STATISTIC(NumLoadsSpeculated,
"Number of loads speculated to allow promotion");
114 "Number of loads rewritten into predicated loads to allow promotion");
117 "Number of stores rewritten into predicated loads to allow promotion");
119STATISTIC(NumVectorized,
"Number of vectorized aggregates");
130class AllocaSliceRewriter;
134class SelectHandSpeculativity {
135 unsigned char Storage = 0;
139 SelectHandSpeculativity() =
default;
140 SelectHandSpeculativity &setAsSpeculatable(
bool isTrueVal);
141 bool isSpeculatable(
bool isTrueVal)
const;
142 bool areAllSpeculatable()
const;
143 bool areAnySpeculatable()
const;
144 bool areNoneSpeculatable()
const;
146 explicit operator intptr_t()
const {
return static_cast<intptr_t
>(Storage); }
147 explicit SelectHandSpeculativity(intptr_t Storage_) : Storage(Storage_) {}
149static_assert(
sizeof(SelectHandSpeculativity) ==
sizeof(
unsigned char));
151using PossiblySpeculatableLoad =
154using RewriteableMemOp =
155 std::variant<PossiblySpeculatableLoad, UnspeculatableStore>;
177 LLVMContext *
const C;
178 DomTreeUpdater *
const DTU;
179 AssumptionCache *
const AC;
180 const bool PreserveCFG;
189 SmallSetVector<AllocaInst *, 16> Worklist;
204 SmallSetVector<AllocaInst *, 16> PostPromotionWorklist;
207 SetVector<AllocaInst *, SmallVector<AllocaInst *>,
208 SmallPtrSet<AllocaInst *, 16>, 16>
216 SmallSetVector<PHINode *, 8> SpeculatablePHIs;
220 SmallMapVector<SelectInst *, RewriteableMemOps, 8> SelectsToRewrite;
238 static std::optional<RewriteableMemOps>
239 isSafeSelectToSpeculate(SelectInst &SI,
bool PreserveCFG);
242 SROA(LLVMContext *C, DomTreeUpdater *DTU, AssumptionCache *AC,
244 : C(C), DTU(DTU), AC(AC),
245 PreserveCFG(PreserveCFG_ ==
SROAOptions::PreserveCFG) {}
248 std::pair<
bool ,
bool > runSROA(Function &
F);
251 friend class AllocaSliceRewriter;
253 bool presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS);
254 std::pair<AllocaInst *, uint64_t>
255 rewritePartition(AllocaInst &AI, AllocaSlices &AS, Partition &
P);
256 bool splitAlloca(AllocaInst &AI, AllocaSlices &AS);
257 bool propagateStoredValuesToLoads(AllocaInst &AI, AllocaSlices &AS);
258 std::pair<
bool ,
bool > runOnAlloca(AllocaInst &AI);
259 void clobberUse(Use &U);
260 bool deleteDeadInstructions(SmallPtrSetImpl<AllocaInst *> &DeletedAllocas);
261 bool promoteAllocas();
275enum FragCalcResult { UseFrag, UseNoFrag,
Skip };
279 uint64_t NewStorageSliceOffsetInBits,
281 std::optional<DIExpression::FragmentInfo> StorageFragment,
282 std::optional<DIExpression::FragmentInfo> CurrentFragment,
286 if (StorageFragment) {
288 std::min(NewStorageSliceSizeInBits, StorageFragment->SizeInBits);
290 NewStorageSliceOffsetInBits + StorageFragment->OffsetInBits;
292 Target.SizeInBits = NewStorageSliceSizeInBits;
293 Target.OffsetInBits = NewStorageSliceOffsetInBits;
299 if (!CurrentFragment) {
300 if (
auto Size = Variable->getSizeInBits()) {
303 if (
Target == CurrentFragment)
310 if (!CurrentFragment || *CurrentFragment ==
Target)
316 if (
Target.startInBits() < CurrentFragment->startInBits() ||
317 Target.endInBits() > CurrentFragment->endInBits())
356 if (DVRAssignMarkerRange.empty())
362 LLVM_DEBUG(
dbgs() <<
" OldAllocaOffsetInBits: " << OldAllocaOffsetInBits
364 LLVM_DEBUG(
dbgs() <<
" SliceSizeInBits: " << SliceSizeInBits <<
"\n");
376 DVR->getExpression()->getFragmentInfo();
389 auto *Expr = DbgAssign->getExpression();
390 bool SetKillLocation =
false;
393 std::optional<DIExpression::FragmentInfo> BaseFragment;
396 if (R == BaseFragments.
end())
398 BaseFragment = R->second;
400 std::optional<DIExpression::FragmentInfo> CurrentFragment =
401 Expr->getFragmentInfo();
404 DbgAssign->getVariable(), OldAllocaOffsetInBits, SliceSizeInBits,
405 BaseFragment, CurrentFragment, NewFragment);
409 if (Result == UseFrag && !(NewFragment == CurrentFragment)) {
410 if (CurrentFragment) {
415 NewFragment.
OffsetInBits -= CurrentFragment->OffsetInBits;
428 SetKillLocation =
true;
436 Inst->
setMetadata(LLVMContext::MD_DIAssignID, NewID);
445 DbgAssign->getDebugLoc())));
448 NewAssign = DbgAssign;
467 Value && (DbgAssign->hasArgList() ||
468 !DbgAssign->getExpression()->isSingleLocationExpression());
485 if (NewAssign != DbgAssign) {
486 NewAssign->
moveBefore(DbgAssign->getIterator());
489 LLVM_DEBUG(
dbgs() <<
"Created new assign: " << *NewAssign <<
"\n");
492 for_each(DVRAssignMarkerRange, MigrateDbgAssign);
502 Twine getNameWithPrefix(
const Twine &Name)
const {
507 void SetNamePrefix(
const Twine &
P) { Prefix =
P.str(); }
509 void InsertHelper(Instruction *
I,
const Twine &Name,
527 uint64_t BeginOffset = 0;
530 uint64_t EndOffset = 0;
534 PointerIntPair<Use *, 1, bool> UseAndIsSplittable;
539 Slice(uint64_t BeginOffset, uint64_t EndOffset, Use *U,
bool IsSplittable)
540 : BeginOffset(BeginOffset), EndOffset(EndOffset),
541 UseAndIsSplittable(
U, IsSplittable) {}
543 uint64_t beginOffset()
const {
return BeginOffset; }
544 uint64_t endOffset()
const {
return EndOffset; }
546 bool isSplittable()
const {
return UseAndIsSplittable.getInt(); }
547 void makeUnsplittable() { UseAndIsSplittable.setInt(
false); }
549 Use *getUse()
const {
return UseAndIsSplittable.getPointer(); }
551 bool isDead()
const {
return getUse() ==
nullptr; }
552 void kill() { UseAndIsSplittable.setPointer(
nullptr); }
561 if (beginOffset() <
RHS.beginOffset())
563 if (beginOffset() >
RHS.beginOffset())
565 if (isSplittable() !=
RHS.isSplittable())
566 return !isSplittable();
567 if (endOffset() >
RHS.endOffset())
573 [[maybe_unused]]
friend bool operator<(
const Slice &
LHS, uint64_t RHSOffset) {
574 return LHS.beginOffset() < RHSOffset;
576 [[maybe_unused]]
friend bool operator<(uint64_t LHSOffset,
const Slice &
RHS) {
577 return LHSOffset <
RHS.beginOffset();
581 return isSplittable() ==
RHS.isSplittable() &&
582 beginOffset() ==
RHS.beginOffset() && endOffset() ==
RHS.endOffset();
597 AllocaSlices(
const DataLayout &
DL, AllocaInst &AI);
603 bool isEscaped()
const {
return PointerEscapingInstr; }
604 bool isEscapedReadOnly()
const {
return PointerEscapingInstrReadOnly; }
609 using range = iterator_range<iterator>;
611 iterator
begin() {
return Slices.begin(); }
612 iterator
end() {
return Slices.end(); }
615 using const_range = iterator_range<const_iterator>;
617 const_iterator
begin()
const {
return Slices.begin(); }
618 const_iterator
end()
const {
return Slices.end(); }
622 void erase(iterator Start, iterator Stop) { Slices.erase(Start, Stop); }
630 int OldSize = Slices.size();
631 Slices.append(NewSlices.
begin(), NewSlices.
end());
632 auto SliceI = Slices.begin() + OldSize;
633 std::stable_sort(SliceI, Slices.end());
634 std::inplace_merge(Slices.begin(), SliceI, Slices.end());
647 return DeadUseIfPromotable;
658#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
659 void print(raw_ostream &OS, const_iterator
I, StringRef Indent =
" ")
const;
660 void printSlice(raw_ostream &OS, const_iterator
I,
661 StringRef Indent =
" ")
const;
662 void printUse(raw_ostream &OS, const_iterator
I,
663 StringRef Indent =
" ")
const;
664 void print(raw_ostream &OS)
const;
665 void dump(const_iterator
I)
const;
670 template <
typename DerivedT,
typename RetT =
void>
class BuilderBase;
673 friend class AllocaSlices::SliceBuilder;
675#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
703 SmallVector<Instruction *, 8> DeadUsers;
730 friend class AllocaSlices;
731 friend class AllocaSlices::partition_iterator;
733 using iterator = AllocaSlices::iterator;
737 uint64_t BeginOffset = 0, EndOffset = 0;
747 Partition(iterator SI) : SI(SI), SJ(SI) {}
753 uint64_t beginOffset()
const {
return BeginOffset; }
758 uint64_t endOffset()
const {
return EndOffset; }
763 uint64_t
size()
const {
764 assert(BeginOffset < EndOffset &&
"Partitions must span some bytes!");
765 return EndOffset - BeginOffset;
770 bool empty()
const {
return SI == SJ; }
781 iterator
begin()
const {
return SI; }
782 iterator
end()
const {
return SJ; }
814 AllocaSlices::iterator SE;
818 uint64_t MaxSplitSliceEndOffset = 0;
822 partition_iterator(AllocaSlices::iterator
SI, AllocaSlices::iterator SE)
834 assert((
P.SI != SE || !
P.SplitTails.empty()) &&
835 "Cannot advance past the end of the slices!");
838 if (!
P.SplitTails.empty()) {
839 if (
P.EndOffset >= MaxSplitSliceEndOffset) {
841 P.SplitTails.clear();
842 MaxSplitSliceEndOffset = 0;
848 [&](Slice *S) { return S->endOffset() <= P.EndOffset; });
851 return S->endOffset() == MaxSplitSliceEndOffset;
853 "Could not find the current max split slice offset!");
856 return S->endOffset() <= MaxSplitSliceEndOffset;
858 "Max split slice end offset is not actually the max!");
865 assert(P.SplitTails.empty() &&
"Failed to clear the split slices!");
875 if (S.isSplittable() && S.endOffset() > P.EndOffset) {
876 P.SplitTails.push_back(&S);
877 MaxSplitSliceEndOffset =
878 std::max(S.endOffset(), MaxSplitSliceEndOffset);
886 P.BeginOffset = P.EndOffset;
887 P.EndOffset = MaxSplitSliceEndOffset;
894 if (!P.SplitTails.empty() && P.SI->beginOffset() != P.EndOffset &&
895 !P.SI->isSplittable()) {
896 P.BeginOffset = P.EndOffset;
897 P.EndOffset = P.SI->beginOffset();
907 P.BeginOffset = P.SplitTails.empty() ? P.SI->beginOffset() : P.EndOffset;
908 P.EndOffset = P.SI->endOffset();
913 if (!P.SI->isSplittable()) {
916 assert(P.BeginOffset == P.SI->beginOffset());
920 while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
921 if (!P.SJ->isSplittable())
922 P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
934 assert(P.SI->isSplittable() &&
"Forming a splittable partition!");
937 while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset &&
938 P.SJ->isSplittable()) {
939 P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
946 if (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
947 assert(!P.SJ->isSplittable());
948 P.EndOffset = P.SJ->beginOffset();
955 "End iterators don't match between compared partition iterators!");
962 if (P.SI == RHS.P.SI && P.SplitTails.empty() == RHS.P.SplitTails.empty()) {
963 assert(P.SJ == RHS.P.SJ &&
964 "Same set of slices formed two different sized partitions!");
965 assert(P.SplitTails.size() == RHS.P.SplitTails.size() &&
966 "Same slice position with differently sized non-empty split "
989 return make_range(partition_iterator(begin(), end()),
990 partition_iterator(end(), end()));
998 return SI.getOperand(1 + CI->isZero());
999 if (
SI.getOperand(1) ==
SI.getOperand(2))
1000 return SI.getOperand(1);
1009 return PN->hasConstantValue();
1040 if (VisitedDeadInsts.
insert(&
I).second)
1045 bool IsSplittable =
false) {
1051 <<
" which has zero size or starts outside of the "
1052 << AllocSize <<
" byte alloca:\n"
1053 <<
" alloca: " << AS.AI <<
"\n"
1054 <<
" use: " <<
I <<
"\n");
1055 return markAsDead(
I);
1058 uint64_t BeginOffset =
Offset.getZExtValue();
1059 uint64_t EndOffset = BeginOffset +
Size;
1067 assert(AllocSize >= BeginOffset);
1068 if (
Size > AllocSize - BeginOffset) {
1070 <<
Offset <<
" to remain within the " << AllocSize
1071 <<
" byte alloca:\n"
1072 <<
" alloca: " << AS.AI <<
"\n"
1073 <<
" use: " <<
I <<
"\n");
1074 EndOffset = AllocSize;
1077 AS.Slices.push_back(Slice(BeginOffset, EndOffset, U, IsSplittable));
1080 void visitBitCastInst(BitCastInst &BC) {
1082 return markAsDead(BC);
1084 return Base::visitBitCastInst(BC);
1087 void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
1089 return markAsDead(ASC);
1091 return Base::visitAddrSpaceCastInst(ASC);
1094 void visitGetElementPtrInst(GetElementPtrInst &GEPI) {
1096 return markAsDead(GEPI);
1098 return Base::visitGetElementPtrInst(GEPI);
1101 void handleLoadOrStore(
Type *Ty, Instruction &
I,
const APInt &
Offset,
1102 uint64_t
Size,
bool IsVolatile) {
1112 void visitLoadInst(LoadInst &LI) {
1114 "All simple FCA loads should have been pre-split");
1119 return PI.setEscapedReadOnly(&LI);
1122 if (
Size.isScalable()) {
1125 return PI.setAborted(&LI);
1134 void visitStoreInst(StoreInst &SI) {
1135 Value *ValOp =
SI.getValueOperand();
1137 return PI.setEscapedAndAborted(&SI);
1139 return PI.setAborted(&SI);
1141 TypeSize StoreSize =
DL.getTypeStoreSize(ValOp->
getType());
1143 unsigned VScale =
SI.getFunction()->getVScaleValue();
1145 return PI.setAborted(&SI);
1161 <<
Offset <<
" which extends past the end of the "
1162 << AllocSize <<
" byte alloca:\n"
1163 <<
" alloca: " << AS.AI <<
"\n"
1164 <<
" use: " << SI <<
"\n");
1165 return markAsDead(SI);
1169 "All simple FCA stores should have been pre-split");
1173 void visitMemSetInst(MemSetInst &
II) {
1174 assert(
II.getRawDest() == *U &&
"Pointer use is not the destination?");
1177 (IsOffsetKnown &&
Offset.uge(AllocSize)))
1179 return markAsDead(
II);
1182 return PI.setAborted(&
II);
1186 : AllocSize -
Offset.getLimitedValue(),
1190 void visitMemTransferInst(MemTransferInst &
II) {
1194 return markAsDead(
II);
1198 if (VisitedDeadInsts.
count(&
II))
1202 return PI.setAborted(&
II);
1209 if (
Offset.uge(AllocSize)) {
1210 auto MTPI = MemTransferSliceMap.
find(&
II);
1211 if (MTPI != MemTransferSliceMap.
end())
1212 AS.Slices[MTPI->second].kill();
1213 return markAsDead(
II);
1216 uint64_t RawOffset =
Offset.getLimitedValue();
1217 uint64_t
Size =
Length ?
Length->getLimitedValue() : AllocSize - RawOffset;
1221 if (*U ==
II.getRawDest() && *U ==
II.getRawSource()) {
1223 if (!
II.isVolatile())
1224 return markAsDead(
II);
1232 SmallDenseMap<Instruction *, unsigned>::iterator MTPI;
1233 std::tie(MTPI, Inserted) =
1234 MemTransferSliceMap.
insert(std::make_pair(&
II, AS.Slices.size()));
1235 unsigned PrevIdx = MTPI->second;
1237 Slice &PrevP = AS.Slices[PrevIdx];
1241 if (!
II.isVolatile() && PrevP.beginOffset() == RawOffset) {
1243 return markAsDead(
II);
1248 PrevP.makeUnsplittable();
1255 assert(AS.Slices[PrevIdx].getUse()->getUser() == &
II &&
1256 "Map index doesn't point back to a slice with this user.");
1262 void visitIntrinsicInst(IntrinsicInst &
II) {
1263 if (
II.isDroppable()) {
1264 AS.DeadUseIfPromotable.push_back(U);
1269 return PI.setAborted(&
II);
1271 if (
II.isLifetimeStartOrEnd()) {
1272 insertUse(
II,
Offset, AllocSize,
true);
1276 Base::visitIntrinsicInst(
II);
1279 Instruction *hasUnsafePHIOrSelectUse(Instruction *Root, uint64_t &
Size) {
1284 SmallPtrSet<Instruction *, 4> Visited;
1294 std::tie(UsedI,
I) =
Uses.pop_back_val();
1297 TypeSize LoadSize =
DL.getTypeStoreSize(LI->
getType());
1309 TypeSize StoreSize =
DL.getTypeStoreSize(
Op->getType());
1319 if (!
GEP->hasAllZeroIndices())
1326 for (User *U :
I->users())
1329 }
while (!
Uses.empty());
1334 void visitPHINodeOrSelectInst(Instruction &
I) {
1337 return markAsDead(
I);
1343 return PI.setAborted(&
I);
1361 AS.DeadOperands.push_back(U);
1367 return PI.setAborted(&
I);
1370 uint64_t &
Size = PHIOrSelectSizes[&
I];
1373 if (Instruction *UnsafeI = hasUnsafePHIOrSelectUse(&
I,
Size))
1374 return PI.setAborted(UnsafeI);
1383 if (
Offset.uge(AllocSize)) {
1384 AS.DeadOperands.push_back(U);
1391 void visitPHINode(PHINode &PN) { visitPHINodeOrSelectInst(PN); }
1393 void visitSelectInst(SelectInst &SI) { visitPHINodeOrSelectInst(SI); }
1396 void visitInstruction(Instruction &
I) { PI.setAborted(&
I); }
1398 void visitCallBase(CallBase &CB) {
1404 PI.setEscapedReadOnly(&CB);
1408 Base::visitCallBase(CB);
1412AllocaSlices::AllocaSlices(
const DataLayout &
DL, AllocaInst &AI)
1414#
if !defined(
NDEBUG) || defined(LLVM_ENABLE_DUMP)
1417 PointerEscapingInstr(nullptr), PointerEscapingInstrReadOnly(nullptr) {
1419 SliceBuilder::PtrInfo PtrI =
PB.visitPtr(AI);
1420 if (PtrI.isEscaped() || PtrI.isAborted()) {
1423 PointerEscapingInstr = PtrI.getEscapingInst() ? PtrI.getEscapingInst()
1424 : PtrI.getAbortingInst();
1425 assert(PointerEscapingInstr &&
"Did not track a bad instruction");
1428 PointerEscapingInstrReadOnly = PtrI.getEscapedReadOnlyInst();
1430 llvm::erase_if(Slices, [](
const Slice &S) {
return S.isDead(); });
1437#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1439void AllocaSlices::print(raw_ostream &OS, const_iterator
I,
1440 StringRef Indent)
const {
1441 printSlice(OS,
I, Indent);
1443 printUse(OS,
I, Indent);
1446void AllocaSlices::printSlice(raw_ostream &OS, const_iterator
I,
1447 StringRef Indent)
const {
1448 OS << Indent <<
"[" <<
I->beginOffset() <<
"," <<
I->endOffset() <<
")"
1449 <<
" slice #" << (
I -
begin())
1450 << (
I->isSplittable() ?
" (splittable)" :
"");
1453void AllocaSlices::printUse(raw_ostream &OS, const_iterator
I,
1454 StringRef Indent)
const {
1455 OS << Indent <<
" used by: " << *
I->getUse()->getUser() <<
"\n";
1458void AllocaSlices::print(raw_ostream &OS)
const {
1459 if (PointerEscapingInstr) {
1460 OS <<
"Can't analyze slices for alloca: " << AI <<
"\n"
1461 <<
" A pointer to this alloca escaped by:\n"
1462 <<
" " << *PointerEscapingInstr <<
"\n";
1466 if (PointerEscapingInstrReadOnly)
1467 OS <<
"Escapes into ReadOnly: " << *PointerEscapingInstrReadOnly <<
"\n";
1469 OS <<
"Slices of alloca: " << AI <<
"\n";
1483static std::pair<Type *, IntegerType *>
1487 bool TyIsCommon =
true;
1492 for (AllocaSlices::const_iterator
I =
B;
I !=
E; ++
I) {
1493 Use *U =
I->getUse();
1496 if (
I->beginOffset() !=
B->beginOffset() ||
I->endOffset() != EndOffset)
1499 Type *UserTy =
nullptr;
1503 UserTy =
SI->getValueOperand()->getType();
1511 if (UserITy->getBitWidth() % 8 != 0 ||
1512 UserITy->getBitWidth() / 8 > (EndOffset -
B->beginOffset()))
1517 if (!ITy || ITy->
getBitWidth() < UserITy->getBitWidth())
1523 if (!UserTy || (Ty && Ty != UserTy))
1529 return {TyIsCommon ? Ty :
nullptr, ITy};
1560 Type *LoadType =
nullptr;
1573 if (LoadType != LI->
getType())
1582 if (BBI->mayWriteToMemory())
1585 MaxAlign = std::max(MaxAlign, LI->
getAlign());
1592 APInt(APWidth,
DL.getTypeStoreSize(LoadType).getFixedValue());
1629 IRB.SetInsertPoint(&PN);
1631 PN.
getName() +
".sroa.speculated");
1661 IRB.SetInsertPoint(TI);
1663 LoadInst *Load = IRB.CreateAlignedLoad(
1664 LoadTy, InVal, Alignment,
1665 (PN.
getName() +
".sroa.speculate.load." + Pred->getName()));
1666 ++NumLoadsSpeculated;
1668 Load->setAAMetadata(AATags);
1670 InjectedLoads[Pred] = Load;
1677SelectHandSpeculativity &
1678SelectHandSpeculativity::setAsSpeculatable(
bool isTrueVal) {
1686bool SelectHandSpeculativity::isSpeculatable(
bool isTrueVal)
const {
1691bool SelectHandSpeculativity::areAllSpeculatable()
const {
1692 return isSpeculatable(
true) &&
1693 isSpeculatable(
false);
1696bool SelectHandSpeculativity::areAnySpeculatable()
const {
1697 return isSpeculatable(
true) ||
1698 isSpeculatable(
false);
1700bool SelectHandSpeculativity::areNoneSpeculatable()
const {
1701 return !areAnySpeculatable();
1704static SelectHandSpeculativity
1707 SelectHandSpeculativity
Spec;
1713 Spec.setAsSpeculatable(
Value ==
SI.getTrueValue());
1720std::optional<RewriteableMemOps>
1721SROA::isSafeSelectToSpeculate(SelectInst &SI,
bool PreserveCFG) {
1722 RewriteableMemOps
Ops;
1724 for (User *U :
SI.users()) {
1734 Ops.emplace_back(Store);
1745 PossiblySpeculatableLoad
Load(LI);
1751 Ops.emplace_back(Load);
1755 SelectHandSpeculativity Spec =
1761 Ops.emplace_back(Load);
1771 Value *TV =
SI.getTrueValue();
1772 Value *FV =
SI.getFalseValue();
1777 IRB.SetInsertPoint(&LI);
1781 LI.
getName() +
".sroa.speculate.load.true");
1784 LI.
getName() +
".sroa.speculate.load.false");
1785 NumLoadsSpeculated += 2;
1797 Value *V = IRB.CreateSelect(
SI.getCondition(), TL, FL,
1798 LI.
getName() +
".sroa.speculated",
1805template <
typename T>
1807 SelectHandSpeculativity
Spec,
1814 if (
Spec.areNoneSpeculatable())
1816 SI.getMetadata(LLVMContext::MD_prof), &DTU);
1819 SI.getMetadata(LLVMContext::MD_prof), &DTU,
1821 if (
Spec.isSpeculatable(
true))
1832 bool IsThen = SuccBB == HeadBI->getSuccessor(0);
1833 int SuccIdx = IsThen ? 0 : 1;
1834 auto *NewMemOpBB = SuccBB ==
Tail ? Head : SuccBB;
1835 auto &CondMemOp =
cast<T>(*
I.clone());
1836 if (NewMemOpBB != Head) {
1837 NewMemOpBB->setName(Head->
getName() + (IsThen ?
".then" :
".else"));
1839 ++NumLoadsPredicated;
1841 ++NumStoresPredicated;
1843 CondMemOp.dropUBImplyingAttrsAndMetadata();
1844 ++NumLoadsSpeculated;
1846 CondMemOp.insertBefore(NewMemOpBB->getTerminator()->getIterator());
1847 Value *Ptr =
SI.getOperand(1 + SuccIdx);
1848 CondMemOp.setOperand(
I.getPointerOperandIndex(), Ptr);
1850 CondMemOp.setName(
I.getName() + (IsThen ?
".then" :
".else") +
".val");
1858 I.replaceAllUsesWith(PN);
1863 SelectHandSpeculativity
Spec,
1874 const RewriteableMemOps &
Ops,
1876 bool CFGChanged =
false;
1879 for (
const RewriteableMemOp &
Op :
Ops) {
1880 SelectHandSpeculativity
Spec;
1882 if (
auto *
const *US = std::get_if<UnspeculatableStore>(&
Op)) {
1885 auto PSL = std::get<PossiblySpeculatableLoad>(
Op);
1886 I = PSL.getPointer();
1887 Spec = PSL.getInt();
1889 if (
Spec.areAllSpeculatable()) {
1892 assert(DTU &&
"Should not get here when not allowed to modify the CFG!");
1896 I->eraseFromParent();
1901 SI.eraseFromParent();
1909 const Twine &NamePrefix) {
1911 Ptr = IRB.CreateInBoundsPtrAdd(Ptr, IRB.getInt(
Offset),
1912 NamePrefix +
"sroa_idx");
1913 return IRB.CreatePointerBitCastOrAddrSpaceCast(Ptr,
PointerTy,
1914 NamePrefix +
"sroa_cast");
1929 unsigned VScale = 0) {
1939 "We can't have the same bitwidth for different int types");
1943 TypeSize NewSize =
DL.getTypeSizeInBits(NewTy);
1944 TypeSize OldSize =
DL.getTypeSizeInBits(OldTy);
1971 if (NewSize != OldSize)
1987 return OldAS == NewAS ||
1988 (!
DL.isNonIntegralAddressSpace(OldAS) &&
1989 !
DL.isNonIntegralAddressSpace(NewAS) &&
1990 DL.getPointerSize(OldAS) ==
DL.getPointerSize(NewAS));
1996 return !
DL.isNonIntegralPointerType(NewTy);
2000 if (!
DL.isNonIntegralPointerType(OldTy))
2023 std::max(S.beginOffset(),
P.beginOffset()) -
P.beginOffset();
2024 uint64_t BeginIndex = BeginOffset / ElementSize;
2025 if (BeginIndex * ElementSize != BeginOffset ||
2028 uint64_t EndOffset = std::min(S.endOffset(),
P.endOffset()) -
P.beginOffset();
2029 uint64_t EndIndex = EndOffset / ElementSize;
2030 if (EndIndex * ElementSize != EndOffset ||
2034 assert(EndIndex > BeginIndex &&
"Empty vector!");
2035 uint64_t NumElements = EndIndex - BeginIndex;
2036 Type *SliceTy = (NumElements == 1)
2037 ? Ty->getElementType()
2043 Use *U = S.getUse();
2046 if (
MI->isVolatile())
2048 if (!S.isSplittable())
2051 if (!
II->isLifetimeStartOrEnd() && !
II->isDroppable())
2058 if (LTy->isStructTy())
2060 if (
P.beginOffset() > S.beginOffset() ||
P.endOffset() < S.endOffset()) {
2061 assert(LTy->isIntegerTy());
2067 if (
SI->isVolatile())
2069 Type *STy =
SI->getValueOperand()->getType();
2073 if (
P.beginOffset() > S.beginOffset() ||
P.endOffset() < S.endOffset()) {
2093 bool HaveCommonEltTy,
Type *CommonEltTy,
2094 bool HaveVecPtrTy,
bool HaveCommonVecPtrTy,
2095 VectorType *CommonVecPtrTy,
unsigned VScale) {
2097 if (CandidateTys.
empty())
2104 if (HaveVecPtrTy && !HaveCommonVecPtrTy)
2108 if (!HaveCommonEltTy && HaveVecPtrTy) {
2110 CandidateTys.
clear();
2112 }
else if (!HaveCommonEltTy && !HaveVecPtrTy) {
2115 if (!VTy->getElementType()->isIntegerTy())
2117 VTy->getContext(), VTy->getScalarSizeInBits())));
2124 assert(
DL.getTypeSizeInBits(RHSTy).getFixedValue() ==
2125 DL.getTypeSizeInBits(LHSTy).getFixedValue() &&
2126 "Cannot have vector types of different sizes!");
2127 assert(RHSTy->getElementType()->isIntegerTy() &&
2128 "All non-integer types eliminated!");
2129 assert(LHSTy->getElementType()->isIntegerTy() &&
2130 "All non-integer types eliminated!");
2136 assert(
DL.getTypeSizeInBits(RHSTy).getFixedValue() ==
2137 DL.getTypeSizeInBits(LHSTy).getFixedValue() &&
2138 "Cannot have vector types of different sizes!");
2139 assert(RHSTy->getElementType()->isIntegerTy() &&
2140 "All non-integer types eliminated!");
2141 assert(LHSTy->getElementType()->isIntegerTy() &&
2142 "All non-integer types eliminated!");
2146 llvm::sort(CandidateTys, RankVectorTypesComp);
2147 CandidateTys.erase(
llvm::unique(CandidateTys, RankVectorTypesEq),
2148 CandidateTys.end());
2154 assert(VTy->getElementType() == CommonEltTy &&
2155 "Unaccounted for element type!");
2156 assert(VTy == CandidateTys[0] &&
2157 "Different vector types with the same element type!");
2160 CandidateTys.resize(1);
2167 std::numeric_limits<unsigned short>::max();
2173 DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
2177 if (ElementSize % 8)
2179 assert((
DL.getTypeSizeInBits(VTy).getFixedValue() % 8) == 0 &&
2180 "vector size not a multiple of element size?");
2183 for (
const Slice &S :
P)
2187 for (
const Slice *S :
P.splitSliceTails())
2193 return VTy != CandidateTys.
end() ? *VTy :
nullptr;
2200 bool &HaveCommonEltTy,
Type *&CommonEltTy,
bool &HaveVecPtrTy,
2201 bool &HaveCommonVecPtrTy,
VectorType *&CommonVecPtrTy,
unsigned VScale) {
2203 CandidateTysCopy.
size() ? CandidateTysCopy[0] :
nullptr;
2206 for (
Type *Ty : OtherTys) {
2209 unsigned TypeSize =
DL.getTypeSizeInBits(Ty).getFixedValue();
2212 for (
VectorType *
const VTy : CandidateTysCopy) {
2214 assert(CandidateTysCopy[0] == OriginalElt &&
"Different Element");
2215 unsigned VectorSize =
DL.getTypeSizeInBits(VTy).getFixedValue();
2216 unsigned ElementSize =
2217 DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
2221 CheckCandidateType(NewVTy);
2227 P,
DL, CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
2228 HaveCommonVecPtrTy, CommonVecPtrTy, VScale);
2247 Type *CommonEltTy =
nullptr;
2249 bool HaveVecPtrTy =
false;
2250 bool HaveCommonEltTy =
true;
2251 bool HaveCommonVecPtrTy =
true;
2252 auto CheckCandidateType = [&](
Type *Ty) {
2255 if (!CandidateTys.
empty()) {
2257 if (
DL.getTypeSizeInBits(VTy).getFixedValue() !=
2258 DL.getTypeSizeInBits(V).getFixedValue()) {
2259 CandidateTys.
clear();
2264 Type *EltTy = VTy->getElementType();
2267 CommonEltTy = EltTy;
2268 else if (CommonEltTy != EltTy)
2269 HaveCommonEltTy =
false;
2272 HaveVecPtrTy =
true;
2273 if (!CommonVecPtrTy)
2274 CommonVecPtrTy = VTy;
2275 else if (CommonVecPtrTy != VTy)
2276 HaveCommonVecPtrTy =
false;
2282 for (
const Slice &S :
P) {
2287 Ty =
SI->getValueOperand()->getType();
2291 auto CandTy = Ty->getScalarType();
2292 if (CandTy->isPointerTy() && (S.beginOffset() !=
P.beginOffset() ||
2293 S.endOffset() !=
P.endOffset())) {
2300 if (S.beginOffset() ==
P.beginOffset() && S.endOffset() ==
P.endOffset())
2301 CheckCandidateType(Ty);
2306 LoadStoreTys, CandidateTysCopy, CheckCandidateType,
P,
DL,
2307 CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
2308 HaveCommonVecPtrTy, CommonVecPtrTy, VScale))
2311 CandidateTys.
clear();
2313 DeferredTys, CandidateTysCopy, CheckCandidateType,
P,
DL, CandidateTys,
2314 HaveCommonEltTy, CommonEltTy, HaveVecPtrTy, HaveCommonVecPtrTy,
2315 CommonVecPtrTy, VScale);
2326 bool &WholeAllocaOp) {
2329 uint64_t RelBegin = S.beginOffset() - AllocBeginOffset;
2330 uint64_t RelEnd = S.endOffset() - AllocBeginOffset;
2332 Use *U = S.getUse();
2339 if (
II->isLifetimeStartOrEnd() ||
II->isDroppable())
2357 if (S.beginOffset() < AllocBeginOffset)
2363 WholeAllocaOp =
true;
2365 if (ITy->getBitWidth() <
DL.getTypeStoreSizeInBits(ITy).getFixedValue())
2367 }
else if (RelBegin != 0 || RelEnd !=
Size ||
2374 Type *ValueTy =
SI->getValueOperand()->getType();
2375 if (
SI->isVolatile())
2378 TypeSize StoreSize =
DL.getTypeStoreSize(ValueTy);
2383 if (S.beginOffset() < AllocBeginOffset)
2389 WholeAllocaOp =
true;
2391 if (ITy->getBitWidth() <
DL.getTypeStoreSizeInBits(ITy).getFixedValue())
2393 }
else if (RelBegin != 0 || RelEnd !=
Size ||
2402 if (!S.isSplittable())
2419 uint64_t SizeInBits =
DL.getTypeSizeInBits(AllocaTy).getFixedValue();
2425 if (SizeInBits !=
DL.getTypeStoreSizeInBits(AllocaTy).getFixedValue())
2443 bool WholeAllocaOp =
P.empty() &&
DL.isLegalInteger(SizeInBits);
2445 for (
const Slice &S :
P)
2450 for (
const Slice *S :
P.splitSliceTails())
2455 return WholeAllocaOp;
2460 const Twine &Name) {
2464 DL.getTypeStoreSize(IntTy).getFixedValue() &&
2465 "Element extends past full value");
2467 if (
DL.isBigEndian())
2468 ShAmt = 8 * (
DL.getTypeStoreSize(IntTy).getFixedValue() -
2469 DL.getTypeStoreSize(Ty).getFixedValue() -
Offset);
2471 V = IRB.CreateLShr(V, ShAmt, Name +
".shift");
2474 assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
2475 "Cannot extract to a larger integer!");
2477 V = IRB.CreateTrunc(V, Ty, Name +
".trunc");
2487 assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
2488 "Cannot insert a larger integer!");
2491 V = IRB.CreateZExt(V, IntTy, Name +
".ext");
2495 DL.getTypeStoreSize(IntTy).getFixedValue() &&
2496 "Element store outside of alloca store");
2498 if (
DL.isBigEndian())
2499 ShAmt = 8 * (
DL.getTypeStoreSize(IntTy).getFixedValue() -
2500 DL.getTypeStoreSize(Ty).getFixedValue() -
Offset);
2502 V = IRB.CreateShl(V, ShAmt, Name +
".shift");
2506 if (ShAmt || Ty->getBitWidth() < IntTy->getBitWidth()) {
2507 APInt Mask = ~Ty->getMask().zext(IntTy->getBitWidth()).shl(ShAmt);
2508 Old = IRB.CreateAnd(Old, Mask, Name +
".mask");
2510 V = IRB.CreateOr(Old, V, Name +
".insert");
2517 unsigned EndIndex,
const Twine &Name) {
2519 unsigned NumElements = EndIndex - BeginIndex;
2522 if (NumElements == VecTy->getNumElements())
2525 if (NumElements == 1) {
2526 V = IRB.CreateExtractElement(V, IRB.getInt32(BeginIndex),
2533 V = IRB.CreateShuffleVector(V, Mask, Name +
".extract");
2539 unsigned BeginIndex,
const Twine &Name) {
2541 assert(VecTy &&
"Can only insert a vector into a vector");
2546 V = IRB.CreateInsertElement(Old, V, IRB.getInt32(BeginIndex),
2555 assert(NumSubElements <= NumElements &&
"Too many elements!");
2556 if (NumSubElements == NumElements) {
2557 assert(V->getType() == VecTy &&
"Vector type mismatch");
2560 unsigned EndIndex = BeginIndex + NumSubElements;
2567 Mask.reserve(NumElements);
2568 for (
unsigned Idx = 0; Idx != NumElements; ++Idx)
2569 if (Idx >= BeginIndex && Idx < EndIndex)
2570 Mask.push_back(Idx - BeginIndex);
2573 V = IRB.CreateShuffleVector(V, Mask, Name +
".expand");
2577 for (
unsigned Idx = 0; Idx != NumElements; ++Idx)
2578 if (Idx >= BeginIndex && Idx < EndIndex)
2579 Mask.push_back(Idx);
2581 Mask.push_back(Idx + NumElements);
2582 V = IRB.CreateShuffleVector(V, Old, Mask, Name +
"blend");
2621 const char *DebugName) {
2622 Type *EltType = VecType->getElementType();
2623 if (EltType != NewAIEltTy) {
2625 unsigned TotalBits =
2626 VecType->getNumElements() *
DL.getTypeSizeInBits(EltType);
2627 unsigned NewNumElts = TotalBits /
DL.getTypeSizeInBits(NewAIEltTy);
2630 V = Builder.CreateBitCast(V, NewVecType);
2631 VecType = NewVecType;
2632 LLVM_DEBUG(
dbgs() <<
" bitcast " << DebugName <<
": " << *V <<
"\n");
2636 BitcastIfNeeded(V0, VecType0,
"V0");
2637 BitcastIfNeeded(V1, VecType1,
"V1");
2639 unsigned NumElts0 = VecType0->getNumElements();
2640 unsigned NumElts1 = VecType1->getNumElements();
2644 if (NumElts0 == NumElts1) {
2645 for (
unsigned i = 0; i < NumElts0 + NumElts1; ++i)
2646 ShuffleMask.push_back(i);
2650 unsigned SmallSize = std::min(NumElts0, NumElts1);
2651 unsigned LargeSize = std::max(NumElts0, NumElts1);
2652 bool IsV0Smaller = NumElts0 < NumElts1;
2653 Value *&ExtendedVec = IsV0Smaller ? V0 : V1;
2655 for (
unsigned i = 0; i < SmallSize; ++i)
2657 for (
unsigned i = SmallSize; i < LargeSize; ++i)
2659 ExtendedVec = Builder.CreateShuffleVector(
2661 LLVM_DEBUG(
dbgs() <<
" shufflevector: " << *ExtendedVec <<
"\n");
2662 for (
unsigned i = 0; i < NumElts0; ++i)
2663 ShuffleMask.push_back(i);
2664 for (
unsigned i = 0; i < NumElts1; ++i)
2665 ShuffleMask.push_back(LargeSize + i);
2668 return Builder.CreateShuffleVector(V0, V1, ShuffleMask);
2679class AllocaSliceRewriter :
public InstVisitor<AllocaSliceRewriter, bool> {
2681 friend class InstVisitor<AllocaSliceRewriter, bool>;
2683 using Base = InstVisitor<AllocaSliceRewriter, bool>;
2685 const DataLayout &
DL;
2688 AllocaInst &OldAI, &NewAI;
2689 const uint64_t NewAllocaBeginOffset, NewAllocaEndOffset;
2709 uint64_t ElementSize;
2713 uint64_t BeginOffset = 0;
2714 uint64_t EndOffset = 0;
2718 uint64_t NewBeginOffset = 0, NewEndOffset = 0;
2720 uint64_t SliceSize = 0;
2721 bool IsSplittable =
false;
2722 bool IsSplit =
false;
2723 Use *OldUse =
nullptr;
2727 SmallSetVector<PHINode *, 8> &PHIUsers;
2728 SmallSetVector<SelectInst *, 8> &SelectUsers;
2736 Value *getPtrToNewAI(
unsigned AddrSpace,
bool IsVolatile) {
2740 Type *AccessTy = IRB.getPtrTy(AddrSpace);
2741 return IRB.CreateAddrSpaceCast(&NewAI, AccessTy);
2745 AllocaSliceRewriter(
const DataLayout &
DL, AllocaSlices &AS, SROA &
Pass,
2746 AllocaInst &OldAI, AllocaInst &NewAI,
Type *NewAllocaTy,
2747 uint64_t NewAllocaBeginOffset,
2748 uint64_t NewAllocaEndOffset,
bool IsIntegerPromotable,
2749 VectorType *PromotableVecTy,
2750 SmallSetVector<PHINode *, 8> &PHIUsers,
2751 SmallSetVector<SelectInst *, 8> &SelectUsers)
2752 :
DL(
DL), AS(AS),
Pass(
Pass), OldAI(OldAI), NewAI(NewAI),
2753 NewAllocaBeginOffset(NewAllocaBeginOffset),
2754 NewAllocaEndOffset(NewAllocaEndOffset), NewAllocaTy(NewAllocaTy),
2755 IntTy(IsIntegerPromotable
2758 DL.getTypeSizeInBits(NewAllocaTy).getFixedValue())
2760 VecTy(PromotableVecTy),
2761 ElementTy(VecTy ? VecTy->getElementType() : nullptr),
2762 ElementSize(VecTy ?
DL.getTypeSizeInBits(ElementTy).getFixedValue() / 8
2764 PHIUsers(PHIUsers), SelectUsers(SelectUsers),
2767 assert((
DL.getTypeSizeInBits(ElementTy).getFixedValue() % 8) == 0 &&
2768 "Only multiple-of-8 sized vector elements are viable");
2771 assert((!IntTy && !VecTy) || (IntTy && !VecTy) || (!IntTy && VecTy));
2774 bool visit(AllocaSlices::const_iterator
I) {
2775 bool CanSROA =
true;
2776 BeginOffset =
I->beginOffset();
2777 EndOffset =
I->endOffset();
2778 IsSplittable =
I->isSplittable();
2780 BeginOffset < NewAllocaBeginOffset || EndOffset > NewAllocaEndOffset;
2781 LLVM_DEBUG(
dbgs() <<
" rewriting " << (IsSplit ?
"split " :
""));
2786 assert(BeginOffset < NewAllocaEndOffset);
2787 assert(EndOffset > NewAllocaBeginOffset);
2788 NewBeginOffset = std::max(BeginOffset, NewAllocaBeginOffset);
2789 NewEndOffset = std::min(EndOffset, NewAllocaEndOffset);
2791 SliceSize = NewEndOffset - NewBeginOffset;
2792 LLVM_DEBUG(
dbgs() <<
" Begin:(" << BeginOffset <<
", " << EndOffset
2793 <<
") NewBegin:(" << NewBeginOffset <<
", "
2794 << NewEndOffset <<
") NewAllocaBegin:("
2795 << NewAllocaBeginOffset <<
", " << NewAllocaEndOffset
2797 assert(IsSplit || NewBeginOffset == BeginOffset);
2798 OldUse =
I->getUse();
2802 IRB.SetInsertPoint(OldUserI);
2803 IRB.SetCurrentDebugLocation(OldUserI->
getDebugLoc());
2805 if (!IRB.getContext().shouldDiscardValueNames())
2806 IRB.getInserter().SetNamePrefix(Twine(NewAI.
getName()) +
"." +
2807 Twine(BeginOffset) +
".");
2853 std::optional<SmallVector<Value *, 4>>
2854 rewriteTreeStructuredMerge(Partition &
P) {
2856 if (
P.splitSliceTails().size() > 0)
2857 return std::nullopt;
2859 SmallVector<Value *, 4> DeletedValues;
2860 LoadInst *TheLoad =
nullptr;
2865 uint64_t BeginOffset;
2868 StoreInfo(StoreInst *SI, uint64_t Begin, uint64_t End,
Value *Val)
2869 :
Store(
SI), BeginOffset(Begin), EndOffset(End), StoredValue(Val) {}
2876 Type *AllocatedEltTy =
2880 unsigned AllocatedEltTySize =
DL.getTypeSizeInBits(AllocatedEltTy);
2887 auto IsTypeValidForTreeStructuredMerge = [&](
Type *Ty) ->
bool {
2889 return FixedVecTy &&
2890 DL.getTypeSizeInBits(FixedVecTy->getElementType()) % 8 == 0 &&
2891 !FixedVecTy->getElementType()->isPointerTy();
2894 for (Slice &S :
P) {
2902 if (TheLoad || !IsTypeValidForTreeStructuredMerge(LI->
getType()) ||
2903 S.beginOffset() != NewAllocaBeginOffset ||
2904 S.endOffset() != NewAllocaEndOffset || LI->
isVolatile())
2905 return std::nullopt;
2913 if (!IsTypeValidForTreeStructuredMerge(
2914 SI->getValueOperand()->getType()) ||
2916 return std::nullopt;
2918 unsigned NumElts = VecTy->getNumElements();
2919 unsigned EltSize =
DL.getTypeSizeInBits(VecTy->getElementType());
2920 if (NumElts * EltSize % AllocatedEltTySize != 0)
2921 return std::nullopt;
2922 StoreInfos.
emplace_back(SI, S.beginOffset(), S.endOffset(),
2923 SI->getValueOperand());
2927 return std::nullopt;
2932 return std::nullopt;
2935 if (StoreInfos.
size() < 2)
2936 return std::nullopt;
2940 llvm::sort(StoreInfos, [](
const StoreInfo &
A,
const StoreInfo &
B) {
2941 return A.BeginOffset <
B.BeginOffset;
2945 uint64_t ExpectedStart = NewAllocaBeginOffset;
2946 for (
auto &StoreInfo : StoreInfos) {
2947 uint64_t BeginOff = StoreInfo.BeginOffset;
2948 uint64_t EndOff = StoreInfo.EndOffset;
2951 if (BeginOff != ExpectedStart)
2952 return std::nullopt;
2954 ExpectedStart = EndOff;
2957 if (ExpectedStart != NewAllocaEndOffset)
2958 return std::nullopt;
2969 BasicBlock *StoreBB = StoreInfos[0].Store->getParent();
2971 for (
auto &StoreInfo : StoreInfos) {
2972 if (StoreInfo.Store->getParent() != StoreBB)
2973 return std::nullopt;
2974 if (LoadBB == StoreBB && !StoreInfo.Store->comesBefore(TheLoad))
2975 return std::nullopt;
2981 dbgs() <<
"Tree structured merge rewrite:\n Load: " << *TheLoad
2982 <<
"\n Ordered stores:\n";
2983 for (
auto [i, Info] :
enumerate(StoreInfos))
2984 dbgs() <<
" [" << i <<
"] Range[" <<
Info.BeginOffset <<
", "
2985 <<
Info.EndOffset <<
") \tStore: " << *
Info.Store
2986 <<
"\tValue: " << *
Info.StoredValue <<
"\n";
2991 std::queue<Value *> VecElements;
3000 for (
const auto &Info : StoreInfos) {
3002 VecElements.push(
Info.StoredValue);
3006 while (VecElements.size() > 1) {
3007 const auto NumElts = VecElements.size();
3008 for ([[maybe_unused]]
const auto _ :
llvm::seq(NumElts / 2)) {
3009 Value *V0 = VecElements.front();
3011 Value *V1 = VecElements.front();
3015 VecElements.push(Merged);
3017 if (NumElts % 2 == 1) {
3018 Value *
V = VecElements.front();
3020 VecElements.push(V);
3025 Value *MergedValue = VecElements.front();
3026 Builder.CreateAlignedStore(MergedValue, &NewAI, getSliceAlign());
3031 TheLoad->
getName() +
".sroa.new.load"));
3034 return DeletedValues;
3042 bool visitInstruction(Instruction &
I) {
3050 assert(IsSplit || BeginOffset == NewBeginOffset);
3051 uint64_t
Offset = NewBeginOffset - NewAllocaBeginOffset;
3053 StringRef OldName = OldPtr->
getName();
3055 size_t LastSROAPrefix = OldName.
rfind(
".sroa.");
3057 OldName = OldName.
substr(LastSROAPrefix + strlen(
".sroa."));
3062 OldName = OldName.
substr(IndexEnd + 1);
3066 OldName = OldName.
substr(OffsetEnd + 1);
3070 OldName = OldName.
substr(0, OldName.
find(
".sroa_"));
3082 Align getSliceAlign() {
3084 NewBeginOffset - NewAllocaBeginOffset);
3087 unsigned getIndex(uint64_t
Offset) {
3088 assert(VecTy &&
"Can only call getIndex when rewriting a vector");
3089 uint64_t RelOffset =
Offset - NewAllocaBeginOffset;
3090 assert(RelOffset / ElementSize < UINT32_MAX &&
"Index out of bounds");
3091 uint32_t
Index = RelOffset / ElementSize;
3092 assert(Index * ElementSize == RelOffset);
3096 void deleteIfTriviallyDead(
Value *V) {
3099 Pass.DeadInsts.push_back(
I);
3102 Value *rewriteVectorizedLoadInst(LoadInst &LI) {
3103 unsigned BeginIndex = getIndex(NewBeginOffset);
3104 unsigned EndIndex = getIndex(NewEndOffset);
3105 assert(EndIndex > BeginIndex &&
"Empty vector!");
3108 IRB.CreateAlignedLoad(NewAllocaTy, &NewAI, NewAI.
getAlign(),
"load");
3110 Load->copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access,
3111 LLVMContext::MD_access_group});
3112 return extractVector(IRB, Load, BeginIndex, EndIndex,
"vec");
3115 Value *rewriteIntegerLoad(LoadInst &LI) {
3116 assert(IntTy &&
"We cannot insert an integer to the alloca");
3119 IRB.CreateAlignedLoad(NewAllocaTy, &NewAI, NewAI.
getAlign(),
"load");
3120 V = IRB.CreateBitPreservingCastChain(
DL, V, IntTy);
3121 assert(NewBeginOffset >= NewAllocaBeginOffset &&
"Out of bounds offset");
3122 uint64_t
Offset = NewBeginOffset - NewAllocaBeginOffset;
3123 if (
Offset > 0 || NewEndOffset < NewAllocaEndOffset) {
3124 IntegerType *ExtractTy = Type::getIntNTy(LI.
getContext(), SliceSize * 8);
3133 "Can only handle an extract for an overly wide load");
3135 V = IRB.CreateZExt(V, LI.
getType());
3139 bool visitLoadInst(LoadInst &LI) {
3148 Type *TargetTy = IsSplit ? Type::getIntNTy(LI.
getContext(), SliceSize * 8)
3150 bool IsPtrAdjusted =
false;
3153 V = rewriteVectorizedLoadInst(LI);
3155 V = rewriteIntegerLoad(LI);
3156 }
else if (NewBeginOffset == NewAllocaBeginOffset &&
3157 NewEndOffset == NewAllocaEndOffset &&
3160 DL.getTypeStoreSize(TargetTy).getFixedValue() > SliceSize &&
3163 getPtrToNewAI(LI.getPointerAddressSpace(), LI.isVolatile());
3164 LoadInst *NewLI = IRB.CreateAlignedLoad(
3165 NewAllocaTy, NewPtr, NewAI.getAlign(), LI.isVolatile(), LI.getName());
3166 if (LI.isVolatile())
3167 NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
3168 if (NewLI->isAtomic())
3169 NewLI->setAlignment(LI.getAlign());
3174 copyMetadataForLoad(*NewLI, LI);
3178 NewLI->setAAMetadata(AATags.adjustForAccess(
3179 NewBeginOffset - BeginOffset, NewLI->getType(), DL));
3187 if (auto *AITy = dyn_cast<IntegerType>(NewAllocaTy))
3188 if (auto *TITy = dyn_cast<IntegerType>(TargetTy))
3189 if (AITy->getBitWidth() < TITy->getBitWidth()) {
3190 V = IRB.CreateZExt(V, TITy,
"load.ext");
3191 if (DL.isBigEndian())
3192 V = IRB.CreateShl(V, TITy->getBitWidth() - AITy->getBitWidth(),
3196 Type *LTy = IRB.getPtrTy(AS);
3198 IRB.CreateAlignedLoad(TargetTy, getNewAllocaSlicePtr(IRB, LTy),
3203 NewBeginOffset - BeginOffset, NewLI->
getType(),
DL));
3207 NewLI->
copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access,
3208 LLVMContext::MD_access_group});
3211 IsPtrAdjusted =
true;
3213 V = IRB.CreateBitPreservingCastChain(
DL, V, TargetTy);
3218 "Only integer type loads and stores are split");
3219 assert(SliceSize <
DL.getTypeStoreSize(LI.
getType()).getFixedValue() &&
3220 "Split load isn't smaller than original load");
3222 "Non-byte-multiple bit width");
3228 LIIt.setHeadBit(
true);
3229 IRB.SetInsertPoint(LI.
getParent(), LIIt);
3234 Value *Placeholder =
3240 Placeholder->replaceAllUsesWith(&LI);
3241 Placeholder->deleteValue();
3246 Pass.DeadInsts.push_back(&LI);
3247 deleteIfTriviallyDead(OldOp);
3252 bool rewriteVectorizedStoreInst(
Value *V, StoreInst &SI,
Value *OldOp,
3257 if (
V->getType() != VecTy) {
3258 unsigned BeginIndex = getIndex(NewBeginOffset);
3259 unsigned EndIndex = getIndex(NewEndOffset);
3260 assert(EndIndex > BeginIndex &&
"Empty vector!");
3261 unsigned NumElements = EndIndex - BeginIndex;
3263 "Too many elements!");
3264 Type *SliceTy = (NumElements == 1)
3266 : FixedVectorType::
get(ElementTy, NumElements);
3267 if (
V->getType() != SliceTy)
3268 V = IRB.CreateBitPreservingCastChain(
DL, V, SliceTy);
3272 IRB.CreateAlignedLoad(NewAllocaTy, &NewAI, NewAI.
getAlign(),
"load");
3275 StoreInst *
Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.
getAlign());
3276 Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
3277 LLVMContext::MD_access_group});
3281 Pass.DeadInsts.push_back(&SI);
3285 Store,
Store->getPointerOperand(), OrigV,
DL);
3290 bool rewriteIntegerStore(
Value *V, StoreInst &SI, AAMDNodes AATags) {
3291 assert(IntTy &&
"We cannot extract an integer from the alloca");
3293 if (
DL.getTypeSizeInBits(
V->getType()).getFixedValue() !=
3295 Value *Old = IRB.CreateAlignedLoad(NewAllocaTy, &NewAI, NewAI.
getAlign(),
3297 Old = IRB.CreateBitPreservingCastChain(
DL, Old, IntTy);
3298 assert(BeginOffset >= NewAllocaBeginOffset &&
"Out of bounds offset");
3299 uint64_t
Offset = BeginOffset - NewAllocaBeginOffset;
3302 V = IRB.CreateBitPreservingCastChain(
DL, V, NewAllocaTy);
3303 StoreInst *
Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.
getAlign());
3304 Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
3305 LLVMContext::MD_access_group});
3311 Store,
Store->getPointerOperand(),
3312 Store->getValueOperand(),
DL);
3314 Pass.DeadInsts.push_back(&SI);
3319 bool visitStoreInst(StoreInst &SI) {
3321 Value *OldOp =
SI.getOperand(1);
3324 AAMDNodes AATags =
SI.getAAMetadata();
3329 if (
V->getType()->isPointerTy())
3331 Pass.PostPromotionWorklist.insert(AI);
3333 TypeSize StoreSize =
DL.getTypeStoreSize(
V->getType());
3336 assert(
V->getType()->isIntegerTy() &&
3337 "Only integer type loads and stores are split");
3338 assert(
DL.typeSizeEqualsStoreSize(
V->getType()) &&
3339 "Non-byte-multiple bit width");
3340 IntegerType *NarrowTy = Type::getIntNTy(
SI.getContext(), SliceSize * 8);
3346 return rewriteVectorizedStoreInst(V, SI, OldOp, AATags);
3347 if (IntTy &&
V->getType()->isIntegerTy())
3348 return rewriteIntegerStore(V, SI, AATags);
3351 if (NewBeginOffset == NewAllocaBeginOffset &&
3352 NewEndOffset == NewAllocaEndOffset &&
3354 V = IRB.CreateBitPreservingCastChain(
DL, V, NewAllocaTy);
3356 getPtrToNewAI(
SI.getPointerAddressSpace(),
SI.isVolatile());
3359 IRB.CreateAlignedStore(V, NewPtr, NewAI.
getAlign(),
SI.isVolatile());
3361 unsigned AS =
SI.getPointerAddressSpace();
3362 Value *NewPtr = getNewAllocaSlicePtr(IRB, IRB.getPtrTy(AS));
3364 IRB.CreateAlignedStore(V, NewPtr, getSliceAlign(),
SI.isVolatile());
3366 NewSI->
copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
3367 LLVMContext::MD_access_group});
3371 if (
SI.isVolatile())
3380 Pass.DeadInsts.push_back(&SI);
3381 deleteIfTriviallyDead(OldOp);
3399 assert(
Size > 0 &&
"Expected a positive number of bytes.");
3407 IRB.CreateZExt(V, SplatIntTy,
"zext"),
3417 V = IRB.CreateVectorSplat(NumElements, V,
"vsplat");
3422 bool visitMemSetInst(MemSetInst &
II) {
3426 AAMDNodes AATags =
II.getAAMetadata();
3432 assert(NewBeginOffset == BeginOffset);
3433 II.setDest(getNewAllocaSlicePtr(IRB, OldPtr->
getType()));
3434 II.setDestAlignment(getSliceAlign());
3439 "AT: Unexpected link to non-const GEP");
3440 deleteIfTriviallyDead(OldPtr);
3445 Pass.DeadInsts.push_back(&
II);
3449 const bool CanContinue = [&]() {
3452 if (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset)
3456 const uint64_t
Len =
C->getLimitedValue();
3457 if (Len > std::numeric_limits<unsigned>::max())
3459 auto *Int8Ty = IntegerType::getInt8Ty(NewAI.
getContext());
3462 DL.isLegalInteger(
DL.getTypeSizeInBits(ScalarTy).getFixedValue());
3468 Type *SizeTy =
II.getLength()->getType();
3469 unsigned Sz = NewEndOffset - NewBeginOffset;
3472 getNewAllocaSlicePtr(IRB, OldPtr->
getType()),
II.getValue(),
Size,
3473 MaybeAlign(getSliceAlign()),
II.isVolatile()));
3479 New,
New->getRawDest(),
nullptr,
DL);
3494 assert(ElementTy == ScalarTy);
3496 unsigned BeginIndex = getIndex(NewBeginOffset);
3497 unsigned EndIndex = getIndex(NewEndOffset);
3498 assert(EndIndex > BeginIndex &&
"Empty vector!");
3499 unsigned NumElements = EndIndex - BeginIndex;
3501 "Too many elements!");
3504 II.getValue(),
DL.getTypeSizeInBits(ElementTy).getFixedValue() / 8);
3505 Splat = IRB.CreateBitPreservingCastChain(
DL,
Splat, ElementTy);
3506 if (NumElements > 1)
3509 Value *Old = IRB.CreateAlignedLoad(NewAllocaTy, &NewAI, NewAI.
getAlign(),
3517 uint64_t
Size = NewEndOffset - NewBeginOffset;
3518 V = getIntegerSplat(
II.getValue(),
Size);
3520 if (IntTy && (NewBeginOffset != NewAllocaBeginOffset ||
3521 NewEndOffset != NewAllocaEndOffset)) {
3522 Value *Old = IRB.CreateAlignedLoad(NewAllocaTy, &NewAI,
3524 Old = IRB.CreateBitPreservingCastChain(
DL, Old, IntTy);
3525 uint64_t
Offset = NewBeginOffset - NewAllocaBeginOffset;
3528 assert(
V->getType() == IntTy &&
3529 "Wrong type for an alloca wide integer!");
3531 V = IRB.CreateBitPreservingCastChain(
DL, V, NewAllocaTy);
3534 assert(NewBeginOffset == NewAllocaBeginOffset);
3535 assert(NewEndOffset == NewAllocaEndOffset);
3537 V = getIntegerSplat(
II.getValue(),
3538 DL.getTypeSizeInBits(ScalarTy).getFixedValue() / 8);
3543 V = IRB.CreateBitPreservingCastChain(
DL, V, NewAllocaTy);
3546 Value *NewPtr = getPtrToNewAI(
II.getDestAddressSpace(),
II.isVolatile());
3548 IRB.CreateAlignedStore(V, NewPtr, NewAI.
getAlign(),
II.isVolatile());
3549 New->copyMetadata(
II, {LLVMContext::MD_mem_parallel_loop_access,
3550 LLVMContext::MD_access_group});
3556 New,
New->getPointerOperand(), V,
DL);
3559 return !
II.isVolatile();
3562 bool visitMemTransferInst(MemTransferInst &
II) {
3568 AAMDNodes AATags =
II.getAAMetadata();
3570 bool IsDest = &
II.getRawDestUse() == OldUse;
3571 assert((IsDest &&
II.getRawDest() == OldPtr) ||
3572 (!IsDest &&
II.getRawSource() == OldPtr));
3574 Align SliceAlign = getSliceAlign();
3582 if (!IsSplittable) {
3583 Value *AdjustedPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
3588 DbgAssign->getAddress() ==
II.getDest())
3589 DbgAssign->replaceVariableLocationOp(
II.getDest(), AdjustedPtr);
3591 II.setDest(AdjustedPtr);
3592 II.setDestAlignment(SliceAlign);
3594 II.setSource(AdjustedPtr);
3595 II.setSourceAlignment(SliceAlign);
3599 deleteIfTriviallyDead(OldPtr);
3612 (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset ||
3613 SliceSize !=
DL.getTypeStoreSize(NewAllocaTy).getFixedValue() ||
3614 !
DL.typeSizeEqualsStoreSize(NewAllocaTy) ||
3620 if (EmitMemCpy && &OldAI == &NewAI) {
3622 assert(NewBeginOffset == BeginOffset);
3625 if (NewEndOffset != EndOffset)
3626 II.setLength(NewEndOffset - NewBeginOffset);
3630 Pass.DeadInsts.push_back(&
II);
3634 Value *OtherPtr = IsDest ?
II.getRawSource() :
II.getRawDest();
3635 if (AllocaInst *AI =
3637 assert(AI != &OldAI && AI != &NewAI &&
3638 "Splittable transfers cannot reach the same alloca on both ends.");
3639 Pass.Worklist.insert(AI);
3646 unsigned OffsetWidth =
DL.getIndexSizeInBits(OtherAS);
3647 APInt OtherOffset(OffsetWidth, NewBeginOffset - BeginOffset);
3649 (IsDest ?
II.getSourceAlign() :
II.getDestAlign()).valueOrOne();
3651 commonAlignment(OtherAlign, OtherOffset.zextOrTrunc(64).getZExtValue());
3659 Value *OurPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
3660 Type *SizeTy =
II.getLength()->getType();
3661 Constant *
Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset);
3663 Value *DestPtr, *SrcPtr;
3664 MaybeAlign DestAlign, SrcAlign;
3668 DestAlign = SliceAlign;
3670 SrcAlign = OtherAlign;
3673 DestAlign = OtherAlign;
3675 SrcAlign = SliceAlign;
3677 CallInst *
New = IRB.CreateMemCpy(DestPtr, DestAlign, SrcPtr, SrcAlign,
3680 New->setAAMetadata(AATags.
shift(NewBeginOffset - BeginOffset));
3685 &
II, New, DestPtr,
nullptr,
DL);
3690 SliceSize * 8, &
II, New, DestPtr,
nullptr,
DL);
3696 bool IsWholeAlloca = NewBeginOffset == NewAllocaBeginOffset &&
3697 NewEndOffset == NewAllocaEndOffset;
3698 uint64_t
Size = NewEndOffset - NewBeginOffset;
3699 unsigned BeginIndex = VecTy ? getIndex(NewBeginOffset) : 0;
3700 unsigned EndIndex = VecTy ? getIndex(NewEndOffset) : 0;
3701 unsigned NumElements = EndIndex - BeginIndex;
3702 IntegerType *SubIntTy =
3703 IntTy ? Type::getIntNTy(IntTy->
getContext(),
Size * 8) : nullptr;
3708 if (VecTy && !IsWholeAlloca) {
3709 if (NumElements == 1)
3710 OtherTy = VecTy->getElementType();
3713 }
else if (IntTy && !IsWholeAlloca) {
3716 OtherTy = NewAllocaTy;
3721 MaybeAlign SrcAlign = OtherAlign;
3722 MaybeAlign DstAlign = SliceAlign;
3730 DstPtr = getPtrToNewAI(
II.getDestAddressSpace(),
II.isVolatile());
3734 SrcPtr = getPtrToNewAI(
II.getSourceAddressSpace(),
II.isVolatile());
3738 if (VecTy && !IsWholeAlloca && !IsDest) {
3740 IRB.CreateAlignedLoad(NewAllocaTy, &NewAI, NewAI.
getAlign(),
"load");
3742 }
else if (IntTy && !IsWholeAlloca && !IsDest) {
3744 IRB.CreateAlignedLoad(NewAllocaTy, &NewAI, NewAI.
getAlign(),
"load");
3745 Src = IRB.CreateBitPreservingCastChain(
DL, Src, IntTy);
3746 uint64_t
Offset = NewBeginOffset - NewAllocaBeginOffset;
3749 LoadInst *
Load = IRB.CreateAlignedLoad(OtherTy, SrcPtr, SrcAlign,
3750 II.isVolatile(),
"copyload");
3751 Load->copyMetadata(
II, {LLVMContext::MD_mem_parallel_loop_access,
3752 LLVMContext::MD_access_group});
3759 if (VecTy && !IsWholeAlloca && IsDest) {
3760 Value *Old = IRB.CreateAlignedLoad(NewAllocaTy, &NewAI, NewAI.
getAlign(),
3763 }
else if (IntTy && !IsWholeAlloca && IsDest) {
3764 Value *Old = IRB.CreateAlignedLoad(NewAllocaTy, &NewAI, NewAI.
getAlign(),
3766 Old = IRB.CreateBitPreservingCastChain(
DL, Old, IntTy);
3767 uint64_t
Offset = NewBeginOffset - NewAllocaBeginOffset;
3769 Src = IRB.CreateBitPreservingCastChain(
DL, Src, NewAllocaTy);
3773 IRB.CreateAlignedStore(Src, DstPtr, DstAlign,
II.isVolatile()));
3774 Store->copyMetadata(
II, {LLVMContext::MD_mem_parallel_loop_access,
3775 LLVMContext::MD_access_group});
3778 Src->getType(),
DL));
3784 Store, DstPtr, Src,
DL);
3789 &
II, Store, DstPtr, Src,
DL);
3793 return !
II.isVolatile();
3796 bool visitIntrinsicInst(IntrinsicInst &
II) {
3797 assert((
II.isLifetimeStartOrEnd() ||
II.isDroppable()) &&
3798 "Unexpected intrinsic!");
3802 Pass.DeadInsts.push_back(&
II);
3804 if (
II.isDroppable()) {
3805 assert(
II.getIntrinsicID() == Intrinsic::assume &&
"Expected assume");
3811 assert(
II.getArgOperand(0) == OldPtr);
3815 if (
II.getIntrinsicID() == Intrinsic::lifetime_start)
3816 New = IRB.CreateLifetimeStart(Ptr);
3818 New = IRB.CreateLifetimeEnd(Ptr);
3826 void fixLoadStoreAlign(Instruction &Root) {
3830 SmallPtrSet<Instruction *, 4> Visited;
3831 SmallVector<Instruction *, 4>
Uses;
3833 Uses.push_back(&Root);
3842 SI->setAlignment(std::min(
SI->getAlign(), getSliceAlign()));
3849 for (User *U :
I->users())
3852 }
while (!
Uses.empty());
3855 bool visitPHINode(PHINode &PN) {
3857 assert(BeginOffset >= NewAllocaBeginOffset &&
"PHIs are unsplittable");
3858 assert(EndOffset <= NewAllocaEndOffset &&
"PHIs are unsplittable");
3864 IRBuilderBase::InsertPointGuard Guard(IRB);
3867 OldPtr->
getParent()->getFirstInsertionPt());
3869 IRB.SetInsertPoint(OldPtr);
3870 IRB.SetCurrentDebugLocation(OldPtr->
getDebugLoc());
3872 Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
3877 deleteIfTriviallyDead(OldPtr);
3880 fixLoadStoreAlign(PN);
3889 bool visitSelectInst(SelectInst &SI) {
3891 assert((
SI.getTrueValue() == OldPtr ||
SI.getFalseValue() == OldPtr) &&
3892 "Pointer isn't an operand!");
3893 assert(BeginOffset >= NewAllocaBeginOffset &&
"Selects are unsplittable");
3894 assert(EndOffset <= NewAllocaEndOffset &&
"Selects are unsplittable");
3896 Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
3898 if (
SI.getOperand(1) == OldPtr)
3899 SI.setOperand(1, NewPtr);
3900 if (
SI.getOperand(2) == OldPtr)
3901 SI.setOperand(2, NewPtr);
3904 deleteIfTriviallyDead(OldPtr);
3907 fixLoadStoreAlign(SI);
3922class AggLoadStoreRewriter :
public InstVisitor<AggLoadStoreRewriter, bool> {
3924 friend class InstVisitor<AggLoadStoreRewriter, bool>;
3930 SmallPtrSet<User *, 8> Visited;
3937 const DataLayout &
DL;
3942 AggLoadStoreRewriter(
const DataLayout &
DL, IRBuilderTy &IRB)
3943 :
DL(
DL), IRB(IRB) {}
3947 bool rewrite(Instruction &
I) {
3951 while (!
Queue.empty()) {
3952 U =
Queue.pop_back_val();
3961 void enqueueUsers(Instruction &
I) {
3962 for (Use &U :
I.uses())
3963 if (Visited.
insert(
U.getUser()).second)
3964 Queue.push_back(&U);
3968 bool visitInstruction(Instruction &
I) {
return false; }
3971 template <
typename Derived>
class OpSplitter {
3978 SmallVector<unsigned, 4> Indices;
3982 SmallVector<Value *, 4> GEPIndices;
3996 const DataLayout &
DL;
4000 OpSplitter(Instruction *InsertionPoint,
Value *Ptr,
Type *BaseTy,
4001 Align BaseAlign,
const DataLayout &
DL, IRBuilderTy &IRB)
4002 : IRB(IRB), GEPIndices(1, IRB.getInt32(0)), Ptr(Ptr), BaseTy(BaseTy),
4003 BaseAlign(BaseAlign),
DL(
DL) {
4004 IRB.SetInsertPoint(InsertionPoint);
4021 void emitSplitOps(
Type *Ty,
Value *&Agg,
const Twine &Name) {
4023 unsigned Offset =
DL.getIndexedOffsetInType(BaseTy, GEPIndices);
4024 return static_cast<Derived *
>(
this)->emitFunc(
4029 unsigned OldSize = Indices.
size();
4031 for (
unsigned Idx = 0,
Size = ATy->getNumElements(); Idx !=
Size;
4033 assert(Indices.
size() == OldSize &&
"Did not return to the old size");
4035 GEPIndices.
push_back(IRB.getInt32(Idx));
4036 emitSplitOps(ATy->getElementType(), Agg, Name +
"." + Twine(Idx));
4044 unsigned OldSize = Indices.
size();
4046 for (
unsigned Idx = 0,
Size = STy->getNumElements(); Idx !=
Size;
4048 assert(Indices.
size() == OldSize &&
"Did not return to the old size");
4050 GEPIndices.
push_back(IRB.getInt32(Idx));
4051 emitSplitOps(STy->getElementType(Idx), Agg, Name +
"." + Twine(Idx));
4062 struct LoadOpSplitter :
public OpSplitter<LoadOpSplitter> {
4066 SmallVector<Value *, 4> Components;
4071 LoadOpSplitter(Instruction *InsertionPoint,
Value *Ptr,
Type *BaseTy,
4072 AAMDNodes AATags, Align BaseAlign,
const DataLayout &
DL,
4074 : OpSplitter<LoadOpSplitter>(InsertionPoint, Ptr, BaseTy, BaseAlign,
DL,
4080 void emitFunc(
Type *Ty,
Value *&Agg, Align Alignment,
const Twine &Name) {
4084 IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name +
".gep");
4086 IRB.CreateAlignedLoad(Ty,
GEP, Alignment, Name +
".load");
4092 Load->setAAMetadata(
4098 Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name +
".insert");
4103 void recordFakeUses(LoadInst &LI) {
4104 for (Use &U : LI.
uses())
4106 if (
II->getIntrinsicID() == Intrinsic::fake_use)
4112 void emitFakeUses() {
4113 for (Instruction *
I : FakeUses) {
4114 IRB.SetInsertPoint(
I);
4115 for (
auto *V : Components)
4116 IRB.CreateIntrinsic(Intrinsic::fake_use, {
V});
4117 I->eraseFromParent();
4122 bool visitLoadInst(LoadInst &LI) {
4131 Splitter.recordFakeUses(LI);
4134 Splitter.emitFakeUses();
4141 struct StoreOpSplitter :
public OpSplitter<StoreOpSplitter> {
4142 StoreOpSplitter(Instruction *InsertionPoint,
Value *Ptr,
Type *BaseTy,
4143 AAMDNodes AATags, StoreInst *AggStore, Align BaseAlign,
4144 const DataLayout &
DL, IRBuilderTy &IRB)
4145 : OpSplitter<StoreOpSplitter>(InsertionPoint, Ptr, BaseTy, BaseAlign,
4147 AATags(AATags), AggStore(AggStore) {}
4149 StoreInst *AggStore;
4152 void emitFunc(
Type *Ty,
Value *&Agg, Align Alignment,
const Twine &Name) {
4158 Value *ExtractValue =
4159 IRB.CreateExtractValue(Agg, Indices, Name +
".extract");
4160 Value *InBoundsGEP =
4161 IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name +
".gep");
4163 IRB.CreateAlignedStore(ExtractValue, InBoundsGEP, Alignment);
4179 uint64_t SizeInBits =
4180 DL.getTypeSizeInBits(
Store->getValueOperand()->getType());
4182 SizeInBits, AggStore, Store,
4183 Store->getPointerOperand(),
Store->getValueOperand(),
4187 "AT: unexpected debug.assign linked to store through "
4194 bool visitStoreInst(StoreInst &SI) {
4195 if (!
SI.isSimple() ||
SI.getPointerOperand() != *U)
4198 if (
V->getType()->isSingleValueType())
4203 StoreOpSplitter Splitter(&SI, *U,
V->getType(),
SI.getAAMetadata(), &SI,
4205 Splitter.emitSplitOps(
V->getType(), V,
V->getName() +
".fca");
4210 SI.eraseFromParent();
4214 bool visitBitCastInst(BitCastInst &BC) {
4219 bool visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
4229 bool unfoldGEPSelect(GetElementPtrInst &GEPI) {
4248 if (!ZI->getSrcTy()->isIntegerTy(1))
4261 dbgs() <<
" original: " << *Sel <<
"\n";
4262 dbgs() <<
" " << GEPI <<
"\n";);
4264 auto GetNewOps = [&](
Value *SelOp) {
4277 Cond =
SI->getCondition();
4278 True =
SI->getTrueValue();
4279 False =
SI->getFalseValue();
4283 Cond = Sel->getOperand(0);
4284 True = ConstantInt::get(Sel->getType(), 1);
4285 False = ConstantInt::get(Sel->getType(), 0);
4290 IRB.SetInsertPoint(&GEPI);
4294 Value *NTrue = IRB.CreateGEP(Ty, TrueOps[0],
ArrayRef(TrueOps).drop_front(),
4295 True->
getName() +
".sroa.gep", NW);
4298 IRB.CreateGEP(Ty, FalseOps[0],
ArrayRef(FalseOps).drop_front(),
4299 False->
getName() +
".sroa.gep", NW);
4301 Value *NSel = MDFrom
4302 ? IRB.CreateSelect(
Cond, NTrue, NFalse,
4303 Sel->getName() +
".sroa.sel", MDFrom)
4304 : IRB.CreateSelectWithUnknownProfile(
4306 Sel->getName() +
".sroa.sel");
4307 Visited.
erase(&GEPI);
4312 enqueueUsers(*NSelI);
4315 dbgs() <<
" " << *NFalse <<
"\n";
4316 dbgs() <<
" " << *NSel <<
"\n";);
4325 bool unfoldGEPPhi(GetElementPtrInst &GEPI) {
4330 auto IsInvalidPointerOperand = [](
Value *
V) {
4334 return !AI->isStaticAlloca();
4338 if (
any_of(
Phi->operands(), IsInvalidPointerOperand))
4353 [](
Value *V) { return isa<ConstantInt>(V); }))
4366 dbgs() <<
" original: " << *
Phi <<
"\n";
4367 dbgs() <<
" " << GEPI <<
"\n";);
4369 auto GetNewOps = [&](
Value *PhiOp) {
4379 IRB.SetInsertPoint(Phi);
4380 PHINode *NewPhi = IRB.CreatePHI(GEPI.
getType(),
Phi->getNumIncomingValues(),
4381 Phi->getName() +
".sroa.phi");
4387 for (
unsigned I = 0,
E =
Phi->getNumIncomingValues();
I !=
E; ++
I) {
4396 IRB.CreateGEP(SourceTy, NewOps[0],
ArrayRef(NewOps).drop_front(),
4402 Visited.
erase(&GEPI);
4406 enqueueUsers(*NewPhi);
4412 dbgs() <<
"\n " << *NewPhi <<
'\n');
4417 bool visitGetElementPtrInst(GetElementPtrInst &GEPI) {
4418 if (unfoldGEPSelect(GEPI))
4421 if (unfoldGEPPhi(GEPI))
4428 bool visitPHINode(PHINode &PN) {
4433 bool visitSelectInst(SelectInst &SI) {
4447 if (Ty->isSingleValueType())
4450 uint64_t AllocSize =
DL.getTypeAllocSize(Ty).getFixedValue();
4455 InnerTy = ArrTy->getElementType();
4459 InnerTy = STy->getElementType(Index);
4464 if (AllocSize >
DL.getTypeAllocSize(InnerTy).getFixedValue() ||
4465 TypeSize >
DL.getTypeSizeInBits(InnerTy).getFixedValue())
4486 if (
Offset == 0 &&
DL.getTypeAllocSize(Ty).getFixedValue() ==
Size)
4488 if (
Offset >
DL.getTypeAllocSize(Ty).getFixedValue() ||
4489 (
DL.getTypeAllocSize(Ty).getFixedValue() -
Offset) <
Size)
4496 ElementTy = AT->getElementType();
4497 TyNumElements = AT->getNumElements();
4502 ElementTy = VT->getElementType();
4503 TyNumElements = VT->getNumElements();
4505 uint64_t ElementSize =
DL.getTypeAllocSize(ElementTy).getFixedValue();
4507 if (NumSkippedElements >= TyNumElements)
4509 Offset -= NumSkippedElements * ElementSize;
4521 if (
Size == ElementSize)
4525 if (NumElements * ElementSize !=
Size)
4549 uint64_t ElementSize =
DL.getTypeAllocSize(ElementTy).getFixedValue();
4550 if (
Offset >= ElementSize)
4561 if (
Size == ElementSize)
4568 if (Index == EndIndex)
4578 assert(Index < EndIndex);
4617bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
4631 struct SplitOffsets {
4633 std::vector<uint64_t> Splits;
4635 SmallDenseMap<Instruction *, SplitOffsets, 8> SplitOffsetsMap;
4648 SmallPtrSet<LoadInst *, 8> UnsplittableLoads;
4650 LLVM_DEBUG(
dbgs() <<
" Searching for candidate loads and stores\n");
4651 for (
auto &
P : AS.partitions()) {
4652 for (Slice &S :
P) {
4654 if (!S.isSplittable() || S.endOffset() <=
P.endOffset()) {
4659 UnsplittableLoads.
insert(LI);
4662 UnsplittableLoads.
insert(LI);
4665 assert(
P.endOffset() > S.beginOffset() &&
4666 "Empty or backwards partition!");
4675 auto IsLoadSimplyStored = [](LoadInst *LI) {
4676 for (User *LU : LI->
users()) {
4678 if (!SI || !
SI->isSimple())
4683 if (!IsLoadSimplyStored(LI)) {
4684 UnsplittableLoads.
insert(LI);
4690 if (S.getUse() != &
SI->getOperandUse(
SI->getPointerOperandIndex()))
4694 if (!StoredLoad || !StoredLoad->isSimple())
4696 assert(!
SI->isVolatile() &&
"Cannot split volatile stores!");
4706 auto &
Offsets = SplitOffsetsMap[
I];
4708 "Should not have splits the first time we see an instruction!");
4710 Offsets.Splits.push_back(
P.endOffset() - S.beginOffset());
4715 for (Slice *S :
P.splitSliceTails()) {
4716 auto SplitOffsetsMapI =
4718 if (SplitOffsetsMapI == SplitOffsetsMap.
end())
4720 auto &
Offsets = SplitOffsetsMapI->second;
4724 "Cannot have an empty set of splits on the second partition!");
4726 P.beginOffset() -
Offsets.S->beginOffset() &&
4727 "Previous split does not end where this one begins!");
4731 if (S->endOffset() >
P.endOffset())
4740 llvm::erase_if(Stores, [&UnsplittableLoads, &SplitOffsetsMap](StoreInst *SI) {
4746 if (UnsplittableLoads.
count(LI))
4749 auto LoadOffsetsI = SplitOffsetsMap.
find(LI);
4750 if (LoadOffsetsI == SplitOffsetsMap.
end())
4752 auto &LoadOffsets = LoadOffsetsI->second;
4755 auto &StoreOffsets = SplitOffsetsMap[
SI];
4760 if (LoadOffsets.Splits == StoreOffsets.Splits)
4764 <<
" " << *LI <<
"\n"
4765 <<
" " << *SI <<
"\n");
4771 UnsplittableLoads.
insert(LI);
4780 return UnsplittableLoads.
count(LI);
4785 return UnsplittableLoads.
count(LI);
4795 IRBuilderTy IRB(&AI);
4802 SmallPtrSet<AllocaInst *, 4> ResplitPromotableAllocas;
4812 SmallDenseMap<LoadInst *, std::vector<LoadInst *>, 1> SplitLoadsMap;
4813 std::vector<LoadInst *> SplitLoads;
4814 const DataLayout &
DL = AI.getDataLayout();
4815 for (LoadInst *LI : Loads) {
4818 auto &
Offsets = SplitOffsetsMap[LI];
4819 unsigned SliceSize =
Offsets.S->endOffset() -
Offsets.S->beginOffset();
4821 "Load must have type size equal to store size");
4823 "Load must be >= slice size");
4825 uint64_t BaseOffset =
Offsets.S->beginOffset();
4826 assert(BaseOffset + SliceSize > BaseOffset &&
4827 "Cannot represent alloca access size using 64-bit integers!");
4830 IRB.SetInsertPoint(LI);
4834 uint64_t PartOffset = 0, PartSize =
Offsets.Splits.front();
4837 auto *PartTy = Type::getIntNTy(LI->
getContext(), PartSize * 8);
4840 LoadInst *PLoad = IRB.CreateAlignedLoad(
4843 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
4844 PartPtrTy,
BasePtr->getName() +
"."),
4847 PLoad->
copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
4848 LLVMContext::MD_access_group});
4852 SplitLoads.push_back(PLoad);
4856 Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
4860 <<
", " << NewSlices.
back().endOffset()
4861 <<
"): " << *PLoad <<
"\n");
4868 PartOffset =
Offsets.Splits[Idx];
4870 PartSize = (Idx <
Size ?
Offsets.Splits[Idx] : SliceSize) - PartOffset;
4876 bool DeferredStores =
false;
4877 for (User *LU : LI->
users()) {
4879 if (!Stores.
empty() && SplitOffsetsMap.
count(SI)) {
4880 DeferredStores =
true;
4886 Value *StoreBasePtr =
SI->getPointerOperand();
4887 IRB.SetInsertPoint(SI);
4888 AAMDNodes AATags =
SI->getAAMetadata();
4890 LLVM_DEBUG(
dbgs() <<
" Splitting store of load: " << *SI <<
"\n");
4892 for (
int Idx = 0,
Size = SplitLoads.size(); Idx <
Size; ++Idx) {
4893 LoadInst *PLoad = SplitLoads[Idx];
4894 uint64_t PartOffset = Idx == 0 ? 0 :
Offsets.Splits[Idx - 1];
4895 auto *PartPtrTy =
SI->getPointerOperandType();
4897 auto AS =
SI->getPointerAddressSpace();
4898 StoreInst *PStore = IRB.CreateAlignedStore(
4901 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
4902 PartPtrTy, StoreBasePtr->
getName() +
"."),
4905 PStore->
copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access,
4906 LLVMContext::MD_access_group,
4907 LLVMContext::MD_DIAssignID});
4912 LLVM_DEBUG(
dbgs() <<
" +" << PartOffset <<
":" << *PStore <<
"\n");
4920 ResplitPromotableAllocas.
insert(OtherAI);
4921 Worklist.insert(OtherAI);
4924 Worklist.insert(OtherAI);
4928 DeadInsts.push_back(SI);
4933 SplitLoadsMap.
insert(std::make_pair(LI, std::move(SplitLoads)));
4936 DeadInsts.push_back(LI);
4945 for (StoreInst *SI : Stores) {
4950 assert(StoreSize > 0 &&
"Cannot have a zero-sized integer store!");
4954 "Slice size should always match load size exactly!");
4955 uint64_t BaseOffset =
Offsets.S->beginOffset();
4956 assert(BaseOffset + StoreSize > BaseOffset &&
4957 "Cannot represent alloca access size using 64-bit integers!");
4965 auto SplitLoadsMapI = SplitLoadsMap.
find(LI);
4966 std::vector<LoadInst *> *SplitLoads =
nullptr;
4967 if (SplitLoadsMapI != SplitLoadsMap.
end()) {
4968 SplitLoads = &SplitLoadsMapI->second;
4970 "Too few split loads for the number of splits in the store!");
4975 uint64_t PartOffset = 0, PartSize =
Offsets.Splits.front();
4978 auto *PartTy = Type::getIntNTy(Ty->
getContext(), PartSize * 8);
4980 auto *StorePartPtrTy =
SI->getPointerOperandType();
4985 PLoad = (*SplitLoads)[Idx];
4987 IRB.SetInsertPoint(LI);
4989 PLoad = IRB.CreateAlignedLoad(
4992 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
4993 LoadPartPtrTy, LoadBasePtr->
getName() +
"."),
4996 PLoad->
copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
4997 LLVMContext::MD_access_group});
5001 IRB.SetInsertPoint(SI);
5002 auto AS =
SI->getPointerAddressSpace();
5003 StoreInst *PStore = IRB.CreateAlignedStore(
5006 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
5007 StorePartPtrTy, StoreBasePtr->
getName() +
"."),
5010 PStore->
copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access,
5011 LLVMContext::MD_access_group});
5015 Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
5019 <<
", " << NewSlices.
back().endOffset()
5020 <<
"): " << *PStore <<
"\n");
5030 PartOffset =
Offsets.Splits[Idx];
5032 PartSize = (Idx <
Size ?
Offsets.Splits[Idx] : StoreSize) - PartOffset;
5042 assert(OtherAI != &AI &&
"We can't re-split our own alloca!");
5043 ResplitPromotableAllocas.
insert(OtherAI);
5044 Worklist.insert(OtherAI);
5047 assert(OtherAI != &AI &&
"We can't re-split our own alloca!");
5048 Worklist.insert(OtherAI);
5063 DeadInsts.push_back(LI);
5065 DeadInsts.push_back(SI);
5074 AS.insert(NewSlices);
5078 for (
auto I = AS.begin(),
E = AS.end();
I !=
E; ++
I)
5084 PromotableAllocas.set_subtract(ResplitPromotableAllocas);
5100static std::tuple<Type *, bool, VectorType *>
5118 if (VecTy && VecTy->getElementType()->isFloatingPointTy() &&
5119 VecTy->getElementCount().getFixedValue() > 1)
5120 return {VecTy,
false, VecTy};
5124 auto [CommonUseTy, LargestIntTy] =
5127 TypeSize CommonUseSize =
DL.getTypeAllocSize(CommonUseTy);
5133 return {VecTy,
false, VecTy};
5142 P.beginOffset(),
P.size())) {
5146 if (TypePartitionTy->isArrayTy() &&
5147 TypePartitionTy->getArrayElementType()->isIntegerTy() &&
5148 DL.isLegalInteger(
P.size() * 8))
5152 return {TypePartitionTy,
true,
nullptr};
5154 return {VecTy,
false, VecTy};
5158 DL.getTypeAllocSize(LargestIntTy).getFixedValue() >=
P.size() &&
5160 return {LargestIntTy,
true,
nullptr};
5163 return {TypePartitionTy,
false,
nullptr};
5168 DL.getTypeAllocSize(LargestIntTy).getFixedValue() >=
P.size())
5169 return {LargestIntTy,
false,
nullptr};
5172 if (
DL.isLegalInteger(
P.size() * 8))
5189std::pair<AllocaInst *, uint64_t>
5190SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS, Partition &
P) {
5191 const DataLayout &
DL = AI.getDataLayout();
5193 auto [PartitionTy, IsIntegerWideningViable, VecTy] =
5203 if (PartitionTy == AI.getAllocatedType() &&
P.beginOffset() == 0) {
5213 const bool IsUnconstrained = Alignment <=
DL.getABITypeAlign(PartitionTy);
5214 NewAI =
new AllocaInst(
5215 PartitionTy, AI.getAddressSpace(),
nullptr,
5216 IsUnconstrained ?
DL.getPrefTypeAlign(PartitionTy) : Alignment,
5217 AI.
getName() +
".sroa." + Twine(
P.begin() - AS.begin()),
5224 LLVM_DEBUG(
dbgs() <<
"Rewriting alloca partition " <<
"[" <<
P.beginOffset()
5225 <<
"," <<
P.endOffset() <<
") to: " << *NewAI <<
"\n");
5230 unsigned PPWOldSize = PostPromotionWorklist.size();
5231 unsigned NumUses = 0;
5232 SmallSetVector<PHINode *, 8> PHIUsers;
5233 SmallSetVector<SelectInst *, 8> SelectUsers;
5236 DL, AS, *
this, AI, *NewAI, PartitionTy,
P.beginOffset(),
P.endOffset(),
5237 IsIntegerWideningViable, VecTy, PHIUsers, SelectUsers);
5238 bool Promotable =
true;
5240 if (
auto DeletedValues =
Rewriter.rewriteTreeStructuredMerge(
P)) {
5241 NumUses += DeletedValues->
size() + 1;
5242 for (
Value *V : *DeletedValues)
5243 DeadInsts.push_back(V);
5245 for (Slice *S :
P.splitSliceTails()) {
5249 for (Slice &S :
P) {
5255 NumAllocaPartitionUses += NumUses;
5256 MaxUsesPerAllocaPartition.updateMax(NumUses);
5260 for (PHINode *
PHI : PHIUsers)
5264 SelectUsers.
clear();
5269 NewSelectsToRewrite;
5271 for (SelectInst *Sel : SelectUsers) {
5272 std::optional<RewriteableMemOps>
Ops =
5277 SelectUsers.clear();
5278 NewSelectsToRewrite.
clear();
5285 for (Use *U : AS.getDeadUsesIfPromotable()) {
5287 Value::dropDroppableUse(*U);
5290 DeadInsts.push_back(OldInst);
5292 if (PHIUsers.empty() && SelectUsers.empty()) {
5294 PromotableAllocas.insert(NewAI);
5299 SpeculatablePHIs.insert_range(PHIUsers);
5300 SelectsToRewrite.reserve(SelectsToRewrite.size() +
5301 NewSelectsToRewrite.
size());
5303 std::make_move_iterator(NewSelectsToRewrite.
begin()),
5304 std::make_move_iterator(NewSelectsToRewrite.
end())))
5305 SelectsToRewrite.insert(std::move(KV));
5306 Worklist.insert(NewAI);
5310 while (PostPromotionWorklist.size() > PPWOldSize)
5311 PostPromotionWorklist.pop_back();
5316 return {
nullptr, 0};
5321 Worklist.insert(NewAI);
5324 return {NewAI,
DL.getTypeSizeInBits(PartitionTy).getFixedValue()};
5368 int64_t BitExtractOffset) {
5370 bool HasFragment =
false;
5371 bool HasBitExtract =
false;
5380 HasBitExtract =
true;
5381 int64_t ExtractOffsetInBits =
Op.getArg(0);
5382 int64_t ExtractSizeInBits =
Op.getArg(1);
5391 assert(BitExtractOffset <= 0);
5392 int64_t AdjustedOffset = ExtractOffsetInBits + BitExtractOffset;
5398 if (AdjustedOffset < 0)
5401 Ops.push_back(
Op.getOp());
5402 Ops.push_back(std::max<int64_t>(0, AdjustedOffset));
5403 Ops.push_back(ExtractSizeInBits);
5406 Op.appendToVector(
Ops);
5411 if (HasFragment && HasBitExtract)
5414 if (!HasBitExtract) {
5433 std::optional<DIExpression::FragmentInfo> NewFragment,
5434 int64_t BitExtractAdjustment) {
5444 BitExtractAdjustment);
5445 if (!NewFragmentExpr)
5451 BeforeInst->
getParent()->insertDbgRecordBefore(DVR,
5464 BeforeInst->
getParent()->insertDbgRecordBefore(DVR,
5470 if (!NewAddr->
hasMetadata(LLVMContext::MD_DIAssignID)) {
5478 LLVM_DEBUG(
dbgs() <<
"Created new DVRAssign: " << *NewAssign <<
"\n");
5484bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
5485 if (AS.begin() == AS.end())
5488 unsigned NumPartitions = 0;
5490 const DataLayout &
DL = AI.getModule()->getDataLayout();
5493 Changed |= presplitLoadsAndStores(AI, AS);
5501 bool IsSorted =
true;
5503 uint64_t AllocaSize = AI.getAllocationSize(
DL)->getFixedValue();
5504 const uint64_t MaxBitVectorSize = 1024;
5505 if (AllocaSize <= MaxBitVectorSize) {
5508 SmallBitVector SplittableOffset(AllocaSize + 1,
true);
5510 for (
unsigned O = S.beginOffset() + 1;
5511 O < S.endOffset() && O < AllocaSize; O++)
5512 SplittableOffset.reset(O);
5514 for (Slice &S : AS) {
5515 if (!S.isSplittable())
5518 if ((S.beginOffset() > AllocaSize || SplittableOffset[S.beginOffset()]) &&
5519 (S.endOffset() > AllocaSize || SplittableOffset[S.endOffset()]))
5524 S.makeUnsplittable();
5531 for (Slice &S : AS) {
5532 if (!S.isSplittable())
5535 if (S.beginOffset() == 0 && S.endOffset() >= AllocaSize)
5540 S.makeUnsplittable();
5555 Fragment(AllocaInst *AI, uint64_t O, uint64_t S)
5561 for (
auto &
P : AS.partitions()) {
5562 auto [NewAI, ActiveBits] = rewritePartition(AI, AS, P);
5566 uint64_t SizeOfByte = 8;
5568 uint64_t Size = std::min(ActiveBits, P.size() * SizeOfByte);
5569 Fragments.push_back(
5570 Fragment(NewAI, P.beginOffset() * SizeOfByte, Size));
5576 NumAllocaPartitions += NumPartitions;
5577 MaxPartitionsPerAlloca.updateMax(NumPartitions);
5581 auto MigrateOne = [&](DbgVariableRecord *DbgVariable) {
5586 const Value *DbgPtr = DbgVariable->getAddress();
5588 DbgVariable->getFragmentOrEntireVariable();
5591 int64_t CurrentExprOffsetInBytes = 0;
5592 SmallVector<uint64_t> PostOffsetOps;
5594 ->extractLeadingOffset(CurrentExprOffsetInBytes, PostOffsetOps))
5598 int64_t ExtractOffsetInBits = 0;
5602 ExtractOffsetInBits =
Op.getArg(0);
5607 DIBuilder DIB(*AI.getModule(),
false);
5608 for (
auto Fragment : Fragments) {
5609 int64_t OffsetFromLocationInBits;
5610 std::optional<DIExpression::FragmentInfo> NewDbgFragment;
5615 DL, &AI, Fragment.Offset, Fragment.Size, DbgPtr,
5616 CurrentExprOffsetInBytes * 8, ExtractOffsetInBits, VarFrag,
5617 NewDbgFragment, OffsetFromLocationInBits))
5623 if (NewDbgFragment && !NewDbgFragment->SizeInBits)
5628 if (!NewDbgFragment)
5629 NewDbgFragment = DbgVariable->getFragment();
5633 int64_t OffestFromNewAllocaInBits =
5634 OffsetFromLocationInBits - ExtractOffsetInBits;
5637 int64_t BitExtractOffset =
5638 std::min<int64_t>(0, OffestFromNewAllocaInBits);
5643 OffestFromNewAllocaInBits =
5644 std::max(int64_t(0), OffestFromNewAllocaInBits);
5650 DIExpression *NewExpr = DIExpression::get(AI.getContext(), PostOffsetOps);
5651 if (OffestFromNewAllocaInBits > 0) {
5652 int64_t OffsetInBytes = (OffestFromNewAllocaInBits + 7) / 8;
5658 auto RemoveOne = [DbgVariable](
auto *OldDII) {
5659 auto SameVariableFragment = [](
const auto *
LHS,
const auto *
RHS) {
5660 return LHS->getVariable() ==
RHS->getVariable() &&
5661 LHS->getDebugLoc()->getInlinedAt() ==
5662 RHS->getDebugLoc()->getInlinedAt();
5664 if (SameVariableFragment(OldDII, DbgVariable))
5665 OldDII->eraseFromParent();
5670 NewDbgFragment, BitExtractOffset);
5684void SROA::clobberUse(Use &U) {
5694 DeadInsts.push_back(OldI);
5716bool SROA::propagateStoredValuesToLoads(AllocaInst &AI, AllocaSlices &AS) {
5721 LLVM_DEBUG(
dbgs() <<
"Attempting to propagate values on " << AI <<
"\n");
5722 bool AllSameAndValid =
true;
5723 Type *PartitionType =
nullptr;
5725 uint64_t BeginOffset = 0;
5726 uint64_t EndOffset = 0;
5728 auto Flush = [&]() {
5729 if (AllSameAndValid && !Insts.
empty()) {
5730 LLVM_DEBUG(
dbgs() <<
"Propagate values on slice [" << BeginOffset <<
", "
5731 << EndOffset <<
")\n");
5733 SSAUpdater
SSA(&NewPHIs);
5735 BasicLoadAndStorePromoter Promoter(Insts,
SSA, PartitionType);
5736 Promoter.run(Insts);
5738 AllSameAndValid =
true;
5739 PartitionType =
nullptr;
5743 for (Slice &S : AS) {
5747 dbgs() <<
"Ignoring slice: ";
5748 AS.print(
dbgs(), &S);
5752 if (S.beginOffset() >= EndOffset) {
5754 BeginOffset = S.beginOffset();
5755 EndOffset = S.endOffset();
5756 }
else if (S.beginOffset() != BeginOffset || S.endOffset() != EndOffset) {
5757 if (AllSameAndValid) {
5759 dbgs() <<
"Slice does not match range [" << BeginOffset <<
", "
5760 << EndOffset <<
")";
5761 AS.print(
dbgs(), &S);
5763 AllSameAndValid =
false;
5765 EndOffset = std::max(EndOffset, S.endOffset());
5772 if (!LI->
isSimple() || (PartitionType && UserTy != PartitionType))
5773 AllSameAndValid =
false;
5774 PartitionType = UserTy;
5777 Type *UserTy =
SI->getValueOperand()->getType();
5778 if (!
SI->isSimple() || (PartitionType && UserTy != PartitionType))
5779 AllSameAndValid =
false;
5780 PartitionType = UserTy;
5783 AllSameAndValid =
false;
5796std::pair<
bool ,
bool >
5797SROA::runOnAlloca(AllocaInst &AI) {
5799 bool CFGChanged =
false;
5802 ++NumAllocasAnalyzed;
5805 if (AI.use_empty()) {
5806 AI.eraseFromParent();
5810 const DataLayout &
DL = AI.getDataLayout();
5813 std::optional<TypeSize>
Size = AI.getAllocationSize(
DL);
5814 if (AI.isArrayAllocation() || !
Size ||
Size->isScalable() ||
Size->isZero())
5819 IRBuilderTy IRB(&AI);
5820 AggLoadStoreRewriter AggRewriter(
DL, IRB);
5821 Changed |= AggRewriter.rewrite(AI);
5824 AllocaSlices AS(
DL, AI);
5829 if (AS.isEscapedReadOnly()) {
5830 Changed |= propagateStoredValuesToLoads(AI, AS);
5835 for (Instruction *DeadUser : AS.getDeadUsers()) {
5837 for (Use &DeadOp : DeadUser->operands())
5844 DeadInsts.push_back(DeadUser);
5847 for (Use *DeadOp : AS.getDeadOperands()) {
5848 clobberUse(*DeadOp);
5853 if (AS.begin() == AS.end())
5856 Changed |= splitAlloca(AI, AS);
5859 while (!SpeculatablePHIs.empty())
5863 auto RemainingSelectsToRewrite = SelectsToRewrite.takeVector();
5864 while (!RemainingSelectsToRewrite.empty()) {
5865 const auto [
K,
V] = RemainingSelectsToRewrite.pop_back_val();
5882bool SROA::deleteDeadInstructions(
5883 SmallPtrSetImpl<AllocaInst *> &DeletedAllocas) {
5885 while (!DeadInsts.empty()) {
5895 DeletedAllocas.
insert(AI);
5897 OldDII->eraseFromParent();
5903 for (Use &Operand :
I->operands())
5908 DeadInsts.push_back(U);
5912 I->eraseFromParent();
5922bool SROA::promoteAllocas() {
5923 if (PromotableAllocas.empty())
5930 NumPromoted += PromotableAllocas.size();
5931 PromoteMemToReg(PromotableAllocas.getArrayRef(), DTU->getDomTree(), AC);
5934 PromotableAllocas.clear();
5938std::pair<
bool ,
bool > SROA::runSROA(Function &
F) {
5941 const DataLayout &
DL =
F.getDataLayout();
5946 std::optional<TypeSize>
Size = AI->getAllocationSize(
DL);
5948 PromotableAllocas.insert(AI);
5950 Worklist.insert(AI);
5955 bool CFGChanged =
false;
5958 SmallPtrSet<AllocaInst *, 4> DeletedAllocas;
5961 while (!Worklist.empty()) {
5962 auto [IterationChanged, IterationCFGChanged] =
5963 runOnAlloca(*Worklist.pop_back_val());
5965 CFGChanged |= IterationCFGChanged;
5967 Changed |= deleteDeadInstructions(DeletedAllocas);
5971 if (!DeletedAllocas.
empty()) {
5972 Worklist.set_subtract(DeletedAllocas);
5973 PostPromotionWorklist.set_subtract(DeletedAllocas);
5974 PromotableAllocas.set_subtract(DeletedAllocas);
5975 DeletedAllocas.
clear();
5981 Worklist = PostPromotionWorklist;
5982 PostPromotionWorklist.clear();
5983 }
while (!Worklist.empty());
5985 assert((!CFGChanged ||
Changed) &&
"Can not only modify the CFG.");
5987 "Should not have modified the CFG when told to preserve it.");
5990 for (
auto &BB :
F) {
6003 SROA(&
F.getContext(), &DTU, &AC, PreserveCFG).runSROA(
F);
6016 OS, MapClassName2PassName);
6038 if (skipFunction(
F))
6041 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
6043 getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
F);
6050 void getAnalysisUsage(AnalysisUsage &AU)
const override {
6057 StringRef getPassName()
const override {
return "SROA"; }
6062char SROALegacyPass::ID = 0;
6070 "Scalar Replacement Of Aggregates",
false,
false)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
static bool runOnFunction(Function &F, bool PostInlining)
This is the interface for a simple mod/ref and alias analysis over globals.
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
This defines the Use class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
print mir2vec MIR2Vec Vocabulary Printer Pass
This file implements a map that provides insertion order iteration.
static std::optional< AllocFnsTy > getAllocationSize(const CallBase *CB, const TargetLibraryInfo *TLI)
uint64_t IntrinsicInst * II
PassBuilder PB(Machine, PassOpts->PTO, std::nullopt, &PIC)
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the PointerIntPair class.
This file provides a collection of visitors which walk the (instruction) uses of a pointer.
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static unsigned getNumElements(Type *Ty)
bool isDead(const MachineInstr &MI, const MachineRegisterInfo &MRI)
static void visit(BasicBlock &Start, std::function< bool(BasicBlock *)> op)
static void migrateDebugInfo(AllocaInst *OldAlloca, bool IsSplit, uint64_t OldAllocaOffsetInBits, uint64_t SliceSizeInBits, Instruction *OldInst, Instruction *Inst, Value *Dest, Value *Value, const DataLayout &DL)
Find linked dbg.assign and generate a new one with the correct FragmentInfo.
static std::tuple< Type *, bool, VectorType * > selectPartitionType(Partition &P, const DataLayout &DL, AllocaInst &AI, LLVMContext &C)
Select a partition type for an alloca partition.
static VectorType * isVectorPromotionViable(Partition &P, const DataLayout &DL, unsigned VScale)
Test whether the given alloca partitioning and range of slices can be promoted to a vector.
static Align getAdjustedAlignment(Instruction *I, uint64_t Offset)
Compute the adjusted alignment for a load or store from an offset.
static VectorType * checkVectorTypesForPromotion(Partition &P, const DataLayout &DL, SmallVectorImpl< VectorType * > &CandidateTys, bool HaveCommonEltTy, Type *CommonEltTy, bool HaveVecPtrTy, bool HaveCommonVecPtrTy, VectorType *CommonVecPtrTy, unsigned VScale)
Test whether any vector type in CandidateTys is viable for promotion.
static std::pair< Type *, IntegerType * > findCommonType(AllocaSlices::const_iterator B, AllocaSlices::const_iterator E, uint64_t EndOffset)
Walk the range of a partitioning looking for a common type to cover this sequence of slices.
static Type * stripAggregateTypeWrapping(const DataLayout &DL, Type *Ty)
Strip aggregate type wrapping.
static FragCalcResult calculateFragment(DILocalVariable *Variable, uint64_t NewStorageSliceOffsetInBits, uint64_t NewStorageSliceSizeInBits, std::optional< DIExpression::FragmentInfo > StorageFragment, std::optional< DIExpression::FragmentInfo > CurrentFragment, DIExpression::FragmentInfo &Target)
static DIExpression * createOrReplaceFragment(const DIExpression *Expr, DIExpression::FragmentInfo Frag, int64_t BitExtractOffset)
Create or replace an existing fragment in a DIExpression with Frag.
static Value * insertInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *Old, Value *V, uint64_t Offset, const Twine &Name)
static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S, VectorType *Ty, uint64_t ElementSize, const DataLayout &DL, unsigned VScale)
Test whether the given slice use can be promoted to a vector.
static Value * getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr, APInt Offset, Type *PointerTy, const Twine &NamePrefix)
Compute an adjusted pointer from Ptr by Offset bytes where the resulting pointer has PointerTy.
static bool isIntegerWideningViableForSlice(const Slice &S, uint64_t AllocBeginOffset, Type *AllocaTy, const DataLayout &DL, bool &WholeAllocaOp)
Test whether a slice of an alloca is valid for integer widening.
static Value * extractVector(IRBuilderTy &IRB, Value *V, unsigned BeginIndex, unsigned EndIndex, const Twine &Name)
static Value * foldPHINodeOrSelectInst(Instruction &I)
A helper that folds a PHI node or a select.
static bool rewriteSelectInstMemOps(SelectInst &SI, const RewriteableMemOps &Ops, IRBuilderTy &IRB, DomTreeUpdater *DTU)
static void rewriteMemOpOfSelect(SelectInst &SI, T &I, SelectHandSpeculativity Spec, DomTreeUpdater &DTU)
static Value * foldSelectInst(SelectInst &SI)
bool isKillAddress(const DbgVariableRecord *DVR)
static Value * insertVector(IRBuilderTy &IRB, Value *Old, Value *V, unsigned BeginIndex, const Twine &Name)
static bool isIntegerWideningViable(Partition &P, Type *AllocaTy, const DataLayout &DL)
Test whether the given alloca partition's integer operations can be widened to promotable ones.
static void speculatePHINodeLoads(IRBuilderTy &IRB, PHINode &PN)
static VectorType * createAndCheckVectorTypesForPromotion(SetVector< Type * > &OtherTys, ArrayRef< VectorType * > CandidateTysCopy, function_ref< void(Type *)> CheckCandidateType, Partition &P, const DataLayout &DL, SmallVectorImpl< VectorType * > &CandidateTys, bool &HaveCommonEltTy, Type *&CommonEltTy, bool &HaveVecPtrTy, bool &HaveCommonVecPtrTy, VectorType *&CommonVecPtrTy, unsigned VScale)
static DebugVariable getAggregateVariable(DbgVariableRecord *DVR)
static bool isSafePHIToSpeculate(PHINode &PN)
PHI instructions that use an alloca and are subsequently loaded can be rewritten to load both input p...
static Value * extractInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *V, IntegerType *Ty, uint64_t Offset, const Twine &Name)
static void insertNewDbgInst(DIBuilder &DIB, DbgVariableRecord *Orig, AllocaInst *NewAddr, DIExpression *NewAddrExpr, Instruction *BeforeInst, std::optional< DIExpression::FragmentInfo > NewFragment, int64_t BitExtractAdjustment)
Insert a new DbgRecord.
static void speculateSelectInstLoads(SelectInst &SI, LoadInst &LI, IRBuilderTy &IRB)
static Value * mergeTwoVectors(Value *V0, Value *V1, const DataLayout &DL, Type *NewAIEltTy, IRBuilder<> &Builder)
This function takes two vector values and combines them into a single vector by concatenating their e...
const DIExpression * getAddressExpression(const DbgVariableRecord *DVR)
static Type * getTypePartition(const DataLayout &DL, Type *Ty, uint64_t Offset, uint64_t Size)
Try to find a partition of the aggregate type passed in for a given offset and size.
static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy, unsigned VScale=0)
Test whether we can convert a value from the old to the new type.
static SelectHandSpeculativity isSafeLoadOfSelectToSpeculate(LoadInst &LI, SelectInst &SI, bool PreserveCFG)
This file provides the interface for LLVM's Scalar Replacement of Aggregates pass.
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static SymbolRef::Type getType(const Symbol *Sym)
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
Virtual Register Rewriter
Builder for the alloca slices.
SliceBuilder(const DataLayout &DL, AllocaInst &AI, AllocaSlices &AS)
An iterator over partitions of the alloca's slices.
bool operator==(const partition_iterator &RHS) const
friend class AllocaSlices
partition_iterator & operator++()
Class for arbitrary precision integers.
an instruction to allocate memory on the stack
LLVM_ABI bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
PointerType * getType() const
Overload to return most specific pointer type.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
Get the array size.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
InstListType::iterator iterator
Instruction iterators...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
Represents analyses that only rely on functions' control flow.
LLVM_ABI CaptureInfo getCaptureInfo(unsigned OpNo) const
Return which pointer components this operand may capture.
bool onlyReadsMemory(unsigned OpNo) const
bool isDataOperand(const Use *U) const
This is the shared class of boolean and integer constants.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static DIAssignID * getDistinct(LLVMContext &Context)
LLVM_ABI DbgInstPtr insertDbgAssign(Instruction *LinkedInstr, Value *Val, DILocalVariable *SrcVar, DIExpression *ValExpr, Value *Addr, DIExpression *AddrExpr, const DILocation *DL)
Insert a new llvm.dbg.assign intrinsic call.
iterator_range< expr_op_iterator > expr_ops() const
DbgVariableFragmentInfo FragmentInfo
LLVM_ABI bool startsWithDeref() const
Return whether the first element a DW_OP_deref.
static LLVM_ABI bool calculateFragmentIntersect(const DataLayout &DL, const Value *SliceStart, uint64_t SliceOffsetInBits, uint64_t SliceSizeInBits, const Value *DbgPtr, int64_t DbgPtrOffsetInBits, int64_t DbgExtractOffsetInBits, DIExpression::FragmentInfo VarFrag, std::optional< DIExpression::FragmentInfo > &Result, int64_t &OffsetFromLocationInBits)
Computes a fragment, bit-extract operation if needed, and new constant offset to describe a part of a...
static LLVM_ABI std::optional< DIExpression * > createFragmentExpression(const DIExpression *Expr, unsigned OffsetInBits, unsigned SizeInBits)
Create a DIExpression to describe one part of an aggregate variable that is fragmented across multipl...
static LLVM_ABI DIExpression * prepend(const DIExpression *Expr, uint8_t Flags, int64_t Offset=0)
Prepend DIExpr with a deref and offset operation and optionally turn it into a stack value or/and an ...
A parsed version of the target data layout string in and methods for querying it.
LLVM_ABI void moveBefore(DbgRecord *MoveBefore)
DebugLoc getDebugLoc() const
void setDebugLoc(DebugLoc Loc)
Record of a variable value-assignment, aka a non instruction representation of the dbg....
LLVM_ABI void setKillAddress()
Kill the address component.
LLVM_ABI bool isKillLocation() const
LocationType getType() const
LLVM_ABI bool isKillAddress() const
Check whether this kills the address component.
LLVM_ABI void replaceVariableLocationOp(Value *OldValue, Value *NewValue, bool AllowEmpty=false)
Value * getValue(unsigned OpIdx=0) const
static LLVM_ABI DbgVariableRecord * createLinkedDVRAssign(Instruction *LinkedInstr, Value *Val, DILocalVariable *Variable, DIExpression *Expression, Value *Address, DIExpression *AddressExpression, const DILocation *DI)
LLVM_ABI void setAssignId(DIAssignID *New)
DIExpression * getExpression() const
static LLVM_ABI DbgVariableRecord * createDVRDeclare(Value *Address, DILocalVariable *DV, DIExpression *Expr, const DILocation *DI)
static LLVM_ABI DbgVariableRecord * createDbgVariableRecord(Value *Location, DILocalVariable *DV, DIExpression *Expr, const DILocation *DI)
DILocalVariable * getVariable() const
LLVM_ABI void setKillLocation()
bool isDbgDeclare() const
void setAddress(Value *V)
DIExpression * getAddressExpression() const
LLVM_ABI DILocation * getInlinedAt() const
Identifies a unique instance of a variable.
ValueT lookup(const_arg_type_t< KeyT > Val) const
Return the entry for the specified key, or a default constructed value if no such entry exists.
iterator find(const_arg_type_t< KeyT > Val)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Analysis pass which computes a DominatorTree.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Class to represent fixed width SIMD vectors.
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
FunctionPass class - This class is used to implement most global optimizations.
unsigned getVScaleValue() const
Return the value for vscale based on the vscale_range attribute or 0 when unknown.
const BasicBlock & getEntryBlock() const
LLVM_ABI bool accumulateConstantOffset(const DataLayout &DL, APInt &Offset, function_ref< bool(Value &, APInt &)> ExternalAnalysis=nullptr) const
Accumulate the constant address offset of this GEP if possible.
Value * getPointerOperand()
iterator_range< op_iterator > indices()
Type * getSourceElementType() const
LLVM_ABI GEPNoWrapFlags getNoWrapFlags() const
Get the nowrap flags for the GEP instruction.
This provides the default implementation of the IRBuilder 'InsertHelper' method that is called whenev...
virtual void InsertHelper(Instruction *I, const Twine &Name, BasicBlock::iterator InsertPt) const
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Base class for instruction visitors.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
bool hasMetadata() const
Return true if this instruction has any metadata attached to it.
LLVM_ABI bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Class to represent integer types.
@ MAX_INT_BITS
Maximum number of bits that can be specified.
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
void setAlignment(Align Align)
Value * getPointerOperand()
bool isVolatile() const
Return true if this is a load from a volatile memory location.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Type * getPointerOperandType() const
static unsigned getPointerOperandIndex()
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Align getAlign() const
Return the alignment of the access that is being performed.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
LLVMContext & getContext() const
LLVM_ABI StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
This is the common base class for memset/memcpy/memmove.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
PointerIntPair - This class implements a pair of a pointer and small integer.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
PreservedAnalyses & preserve()
Mark an analysis as preserved.
PtrUseVisitor(const DataLayout &DL)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Run the pass over the function.
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
SROAPass(SROAOptions PreserveCFG)
If PreserveCFG is set, then the pass is not allowed to modify CFG in any way, even if it would update...
Helper class for SSA formation on a set of values defined in multiple blocks.
This class represents the LLVM 'select' instruction.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
void clear()
Completely clear the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
typename SuperClass::const_iterator const_iterator
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAlignment(Align Align)
Value * getValueOperand()
static unsigned getPointerOperandIndex()
Value * getPointerOperand()
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
Represent a constant reference to a string, i.e.
static constexpr size_t npos
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
size_t rfind(char C, size_t From=npos) const
Search for the last character C in the string.
size_t find(char C, size_t From=0) const
Search for the first character C in the string.
LLVM_ABI size_t find_first_not_of(char C, size_t From=0) const
Find the first character in the string that is not C or npos if not found.
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
TypeSize getSizeInBytes() const
LLVM_ABI unsigned getElementContainingOffset(uint64_t FixedOffset) const
Given a valid byte offset into the structure, returns the structure index that contains it.
TypeSize getElementOffset(unsigned Idx) const
TypeSize getSizeInBits() const
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
element_iterator element_end() const
element_iterator element_begin() const
Type * getElementType(unsigned N) const
Type::subtype_iterator element_iterator
Target - Wrapper for Target specific information.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM_ABI unsigned getIntegerBitWidth() const
bool isPointerTy() const
True if this is an instance of PointerType.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
bool isStructTy() const
True if this is an instance of StructType.
bool isTargetExtTy() const
Return true if this is a target extension type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
const Use & getOperandUse(unsigned i) const
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVMContext & getContext() const
All values hold a context through their type.
LLVM_ABI const Value * stripInBoundsOffsets(function_ref< void(const Value *)> Func=[](const Value *) {}) const
Strip off pointer casts and inbounds GEPs.
iterator_range< user_iterator > users()
LLVM_ABI void dropDroppableUsesIn(User &Usr)
Remove every use of this value in User that can safely be removed.
LLVM_ABI const Value * stripAndAccumulateConstantOffsets(const DataLayout &DL, APInt &Offset, bool AllowNonInbounds, bool AllowInvariantGroup=false, function_ref< bool(Value &Value, APInt &Offset)> ExternalAnalysis=nullptr, bool LookThroughIntToPtr=false) const
Accumulate the constant offset this value has compared to a base pointer.
iterator_range< use_iterator > uses()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
static VectorType * getWithSizeAndScalar(VectorType *SizeTy, Type *EltTy)
This static method attempts to construct a VectorType with the same size-in-bits as SizeTy but with a...
static LLVM_ABI bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
CRTP base class which implements the entire standard iterator facade in terms of a minimal subset of ...
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Return a range of dbg_assign records for which Inst performs the assignment they encode.
LLVM_ABI void deleteAssignmentMarkers(const Instruction *Inst)
Delete the llvm.dbg.assign intrinsics linked to Inst.
initializer< Ty > init(const Ty &Val)
@ DW_OP_LLVM_extract_bits_zext
Only used in LLVM metadata.
@ DW_OP_LLVM_fragment
Only used in LLVM metadata.
@ DW_OP_LLVM_extract_bits_sext
Only used in LLVM metadata.
@ User
could "use" a pointer
NodeAddr< PhiNode * > Phi
NodeAddr< UseNode * > Use
Context & getContext() const
friend class Instruction
Iterator for Instructions in a `BasicBlock.
LLVM_ABI iterator begin() const
This is an optimization pass for GlobalISel generic memory operations.
static cl::opt< bool > SROASkipMem2Reg("sroa-skip-mem2reg", cl::init(false), cl::Hidden)
Disable running mem2reg during SROA in order to test or debug SROA.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
bool operator<(int64_t V1, const APSInt &V2)
FunctionAddr VTableAddr Value
void stable_sort(R &&Range)
LLVM_ABI bool RemoveRedundantDbgInstrs(BasicBlock *BB)
Try to remove redundant dbg.value instructions from given basic block.
cl::opt< bool > ProfcheckDisableMetadataFixes
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI void PromoteMemToReg(ArrayRef< AllocaInst * > Allocas, DominatorTree &DT, AssumptionCache *AC=nullptr)
Promote the specified list of alloca instructions into scalar registers, inserting PHI nodes as appro...
LLVM_ABI bool isAssumeLikeIntrinsic(const Instruction *I)
Return true if it is an intrinsic that cannot be speculated but also cannot trap.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
auto successors(const MachineBasicBlock *BB)
bool operator!=(uint64_t V1, const APInt &V2)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI std::optional< RegOrConstant > getVectorSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI)
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
auto unique(Range &&R, Predicate P)
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
LLVM_ABI bool isAllocaPromotable(const AllocaInst *AI)
Return true if this alloca is legal for promotion.
auto dyn_cast_or_null(const Y &Val)
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
bool capturesFullProvenance(CaptureComponents CC)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
LLVM_ABI bool isSafeToLoadUnconditionally(Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, Instruction *ScanFrom, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI void initializeSROALegacyPassPass(PassRegistry &)
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
LLVM_ABI TinyPtrVector< DbgVariableRecord * > findDVRValues(Value *V)
As above, for DVRValues.
LLVM_ABI void llvm_unreachable_internal(const char *msg=nullptr, const char *file=nullptr, unsigned line=0)
This function calls abort(), and prints the optional message to stderr.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
constexpr int PoisonMaskElem
iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI bool isAssignmentTrackingEnabled(const Module &M)
Return true if assignment tracking is enabled for module M.
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
LLVM_ABI TinyPtrVector< DbgVariableRecord * > findDVRDeclares(Value *V)
Finds dbg.declare records declaring local variables as living in the memory that 'V' points to.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI FunctionPass * createSROAPass(bool PreserveCFG=true)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
AAMDNodes shift(size_t Offset) const
Create a new AAMDNode that describes this AAMDNode after applying a constant offset to the start of t...
LLVM_ABI AAMDNodes adjustForAccess(unsigned AccessSize)
Create a new AAMDNode for accessing AccessSize bytes of this AAMDNode.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Describes an element of a Bitfield.
static Bitfield::Type get(StorageType Packed)
Unpacks the field from the Packed value.
static void set(StorageType &Packed, typename Bitfield::Type Value)
Sets the typed value in the provided Packed value.
A CRTP mix-in to automatically provide informational APIs needed for passes.