47#include "llvm/Config/llvm-config.h"
103#define DEBUG_TYPE "sroa"
105STATISTIC(NumAllocasAnalyzed,
"Number of allocas analyzed for replacement");
106STATISTIC(NumAllocaPartitions,
"Number of alloca partitions formed");
107STATISTIC(MaxPartitionsPerAlloca,
"Maximum number of partitions per alloca");
108STATISTIC(NumAllocaPartitionUses,
"Number of alloca partition uses rewritten");
109STATISTIC(MaxUsesPerAllocaPartition,
"Maximum number of uses of a partition");
110STATISTIC(NumNewAllocas,
"Number of new, smaller allocas introduced");
111STATISTIC(NumPromoted,
"Number of allocas promoted to SSA values");
112STATISTIC(NumLoadsSpeculated,
"Number of loads speculated to allow promotion");
114 "Number of loads rewritten into predicated loads to allow promotion");
117 "Number of stores rewritten into predicated loads to allow promotion");
119STATISTIC(NumVectorized,
"Number of vectorized aggregates");
130class AllocaSliceRewriter;
134class SelectHandSpeculativity {
135 unsigned char Storage = 0;
139 SelectHandSpeculativity() =
default;
140 SelectHandSpeculativity &setAsSpeculatable(
bool isTrueVal);
141 bool isSpeculatable(
bool isTrueVal)
const;
142 bool areAllSpeculatable()
const;
143 bool areAnySpeculatable()
const;
144 bool areNoneSpeculatable()
const;
146 explicit operator intptr_t()
const {
return static_cast<intptr_t
>(Storage); }
147 explicit SelectHandSpeculativity(intptr_t Storage_) : Storage(Storage_) {}
149static_assert(
sizeof(SelectHandSpeculativity) ==
sizeof(
unsigned char));
151using PossiblySpeculatableLoad =
154using RewriteableMemOp =
155 std::variant<PossiblySpeculatableLoad, UnspeculatableStore>;
177 LLVMContext *
const C;
178 DomTreeUpdater *
const DTU;
179 AssumptionCache *
const AC;
180 const bool PreserveCFG;
189 SmallSetVector<AllocaInst *, 16> Worklist;
204 SmallSetVector<AllocaInst *, 16> PostPromotionWorklist;
207 SetVector<AllocaInst *, SmallVector<AllocaInst *>,
208 SmallPtrSet<AllocaInst *, 16>, 16>
216 SmallSetVector<PHINode *, 8> SpeculatablePHIs;
220 SmallMapVector<SelectInst *, RewriteableMemOps, 8> SelectsToRewrite;
238 static std::optional<RewriteableMemOps>
239 isSafeSelectToSpeculate(SelectInst &SI,
bool PreserveCFG);
242 SROA(LLVMContext *C, DomTreeUpdater *DTU, AssumptionCache *AC,
244 : C(C), DTU(DTU), AC(AC),
245 PreserveCFG(PreserveCFG_ ==
SROAOptions::PreserveCFG) {}
248 std::pair<
bool ,
bool > runSROA(Function &
F);
251 friend class AllocaSliceRewriter;
253 bool presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS);
254 AllocaInst *rewritePartition(AllocaInst &AI, AllocaSlices &AS, Partition &
P);
255 bool splitAlloca(AllocaInst &AI, AllocaSlices &AS);
256 bool propagateStoredValuesToLoads(AllocaInst &AI, AllocaSlices &AS);
257 std::pair<
bool ,
bool > runOnAlloca(AllocaInst &AI);
258 void clobberUse(Use &U);
259 bool deleteDeadInstructions(SmallPtrSetImpl<AllocaInst *> &DeletedAllocas);
260 bool promoteAllocas();
274enum FragCalcResult { UseFrag, UseNoFrag,
Skip };
278 uint64_t NewStorageSliceOffsetInBits,
280 std::optional<DIExpression::FragmentInfo> StorageFragment,
281 std::optional<DIExpression::FragmentInfo> CurrentFragment,
285 if (StorageFragment) {
287 std::min(NewStorageSliceSizeInBits, StorageFragment->SizeInBits);
289 NewStorageSliceOffsetInBits + StorageFragment->OffsetInBits;
291 Target.SizeInBits = NewStorageSliceSizeInBits;
292 Target.OffsetInBits = NewStorageSliceOffsetInBits;
298 if (!CurrentFragment) {
299 if (
auto Size = Variable->getSizeInBits()) {
302 if (
Target == CurrentFragment)
309 if (!CurrentFragment || *CurrentFragment ==
Target)
315 if (
Target.startInBits() < CurrentFragment->startInBits() ||
316 Target.endInBits() > CurrentFragment->endInBits())
355 if (DVRAssignMarkerRange.empty())
361 LLVM_DEBUG(
dbgs() <<
" OldAllocaOffsetInBits: " << OldAllocaOffsetInBits
363 LLVM_DEBUG(
dbgs() <<
" SliceSizeInBits: " << SliceSizeInBits <<
"\n");
375 DVR->getExpression()->getFragmentInfo();
388 auto *Expr = DbgAssign->getExpression();
389 bool SetKillLocation =
false;
392 std::optional<DIExpression::FragmentInfo> BaseFragment;
395 if (R == BaseFragments.
end())
397 BaseFragment = R->second;
399 std::optional<DIExpression::FragmentInfo> CurrentFragment =
400 Expr->getFragmentInfo();
403 DbgAssign->getVariable(), OldAllocaOffsetInBits, SliceSizeInBits,
404 BaseFragment, CurrentFragment, NewFragment);
408 if (Result == UseFrag && !(NewFragment == CurrentFragment)) {
409 if (CurrentFragment) {
414 NewFragment.
OffsetInBits -= CurrentFragment->OffsetInBits;
427 SetKillLocation =
true;
435 Inst->
setMetadata(LLVMContext::MD_DIAssignID, NewID);
444 DbgAssign->getDebugLoc())));
447 NewAssign = DbgAssign;
466 Value && (DbgAssign->hasArgList() ||
467 !DbgAssign->getExpression()->isSingleLocationExpression());
484 if (NewAssign != DbgAssign) {
485 NewAssign->
moveBefore(DbgAssign->getIterator());
488 LLVM_DEBUG(
dbgs() <<
"Created new assign: " << *NewAssign <<
"\n");
491 for_each(DVRAssignMarkerRange, MigrateDbgAssign);
501 Twine getNameWithPrefix(
const Twine &Name)
const {
506 void SetNamePrefix(
const Twine &
P) { Prefix =
P.str(); }
508 void InsertHelper(Instruction *
I,
const Twine &Name,
526 uint64_t BeginOffset = 0;
529 uint64_t EndOffset = 0;
533 PointerIntPair<Use *, 1, bool> UseAndIsSplittable;
538 Slice(uint64_t BeginOffset, uint64_t EndOffset, Use *U,
bool IsSplittable)
539 : BeginOffset(BeginOffset), EndOffset(EndOffset),
540 UseAndIsSplittable(
U, IsSplittable) {}
542 uint64_t beginOffset()
const {
return BeginOffset; }
543 uint64_t endOffset()
const {
return EndOffset; }
545 bool isSplittable()
const {
return UseAndIsSplittable.getInt(); }
546 void makeUnsplittable() { UseAndIsSplittable.setInt(
false); }
548 Use *getUse()
const {
return UseAndIsSplittable.getPointer(); }
550 bool isDead()
const {
return getUse() ==
nullptr; }
551 void kill() { UseAndIsSplittable.setPointer(
nullptr); }
560 if (beginOffset() <
RHS.beginOffset())
562 if (beginOffset() >
RHS.beginOffset())
564 if (isSplittable() !=
RHS.isSplittable())
565 return !isSplittable();
566 if (endOffset() >
RHS.endOffset())
572 [[maybe_unused]]
friend bool operator<(
const Slice &
LHS, uint64_t RHSOffset) {
573 return LHS.beginOffset() < RHSOffset;
575 [[maybe_unused]]
friend bool operator<(uint64_t LHSOffset,
const Slice &
RHS) {
576 return LHSOffset <
RHS.beginOffset();
580 return isSplittable() ==
RHS.isSplittable() &&
581 beginOffset() ==
RHS.beginOffset() && endOffset() ==
RHS.endOffset();
596 AllocaSlices(
const DataLayout &
DL, AllocaInst &AI);
602 bool isEscaped()
const {
return PointerEscapingInstr; }
603 bool isEscapedReadOnly()
const {
return PointerEscapingInstrReadOnly; }
608 using range = iterator_range<iterator>;
610 iterator
begin() {
return Slices.begin(); }
611 iterator
end() {
return Slices.end(); }
614 using const_range = iterator_range<const_iterator>;
616 const_iterator
begin()
const {
return Slices.begin(); }
617 const_iterator
end()
const {
return Slices.end(); }
621 void erase(iterator Start, iterator Stop) { Slices.erase(Start, Stop); }
629 int OldSize = Slices.size();
630 Slices.append(NewSlices.
begin(), NewSlices.
end());
631 auto SliceI = Slices.begin() + OldSize;
632 std::stable_sort(SliceI, Slices.end());
633 std::inplace_merge(Slices.begin(), SliceI, Slices.end());
646 return DeadUseIfPromotable;
657#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
658 void print(raw_ostream &OS, const_iterator
I, StringRef Indent =
" ")
const;
659 void printSlice(raw_ostream &OS, const_iterator
I,
660 StringRef Indent =
" ")
const;
661 void printUse(raw_ostream &OS, const_iterator
I,
662 StringRef Indent =
" ")
const;
663 void print(raw_ostream &OS)
const;
664 void dump(const_iterator
I)
const;
669 template <
typename DerivedT,
typename RetT =
void>
class BuilderBase;
672 friend class AllocaSlices::SliceBuilder;
674#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
702 SmallVector<Instruction *, 8> DeadUsers;
729 friend class AllocaSlices;
730 friend class AllocaSlices::partition_iterator;
732 using iterator = AllocaSlices::iterator;
736 uint64_t BeginOffset = 0, EndOffset = 0;
746 Partition(iterator SI) : SI(SI), SJ(SI) {}
752 uint64_t beginOffset()
const {
return BeginOffset; }
757 uint64_t endOffset()
const {
return EndOffset; }
762 uint64_t
size()
const {
763 assert(BeginOffset < EndOffset &&
"Partitions must span some bytes!");
764 return EndOffset - BeginOffset;
769 bool empty()
const {
return SI == SJ; }
780 iterator
begin()
const {
return SI; }
781 iterator
end()
const {
return SJ; }
813 AllocaSlices::iterator SE;
817 uint64_t MaxSplitSliceEndOffset = 0;
821 partition_iterator(AllocaSlices::iterator
SI, AllocaSlices::iterator SE)
833 assert((
P.SI != SE || !
P.SplitTails.empty()) &&
834 "Cannot advance past the end of the slices!");
837 if (!
P.SplitTails.empty()) {
838 if (
P.EndOffset >= MaxSplitSliceEndOffset) {
840 P.SplitTails.clear();
841 MaxSplitSliceEndOffset = 0;
847 [&](Slice *S) { return S->endOffset() <= P.EndOffset; });
850 return S->endOffset() == MaxSplitSliceEndOffset;
852 "Could not find the current max split slice offset!");
855 return S->endOffset() <= MaxSplitSliceEndOffset;
857 "Max split slice end offset is not actually the max!");
864 assert(P.SplitTails.empty() &&
"Failed to clear the split slices!");
874 if (S.isSplittable() && S.endOffset() > P.EndOffset) {
875 P.SplitTails.push_back(&S);
876 MaxSplitSliceEndOffset =
877 std::max(S.endOffset(), MaxSplitSliceEndOffset);
885 P.BeginOffset = P.EndOffset;
886 P.EndOffset = MaxSplitSliceEndOffset;
893 if (!P.SplitTails.empty() && P.SI->beginOffset() != P.EndOffset &&
894 !P.SI->isSplittable()) {
895 P.BeginOffset = P.EndOffset;
896 P.EndOffset = P.SI->beginOffset();
906 P.BeginOffset = P.SplitTails.empty() ? P.SI->beginOffset() : P.EndOffset;
907 P.EndOffset = P.SI->endOffset();
912 if (!P.SI->isSplittable()) {
915 assert(P.BeginOffset == P.SI->beginOffset());
919 while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
920 if (!P.SJ->isSplittable())
921 P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
933 assert(P.SI->isSplittable() &&
"Forming a splittable partition!");
936 while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset &&
937 P.SJ->isSplittable()) {
938 P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
945 if (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
946 assert(!P.SJ->isSplittable());
947 P.EndOffset = P.SJ->beginOffset();
954 "End iterators don't match between compared partition iterators!");
961 if (P.SI == RHS.P.SI && P.SplitTails.empty() == RHS.P.SplitTails.empty()) {
962 assert(P.SJ == RHS.P.SJ &&
963 "Same set of slices formed two different sized partitions!");
964 assert(P.SplitTails.size() == RHS.P.SplitTails.size() &&
965 "Same slice position with differently sized non-empty split "
988 return make_range(partition_iterator(begin(), end()),
989 partition_iterator(end(), end()));
997 return SI.getOperand(1 + CI->isZero());
998 if (
SI.getOperand(1) ==
SI.getOperand(2))
999 return SI.getOperand(1);
1008 return PN->hasConstantValue();
1035 AllocSize(
DL.getTypeAllocSize(AI.getAllocatedType()).getFixedValue()),
1040 if (VisitedDeadInsts.
insert(&
I).second)
1045 bool IsSplittable =
false) {
1051 <<
" which has zero size or starts outside of the "
1052 << AllocSize <<
" byte alloca:\n"
1053 <<
" alloca: " << AS.AI <<
"\n"
1054 <<
" use: " <<
I <<
"\n");
1055 return markAsDead(
I);
1058 uint64_t BeginOffset =
Offset.getZExtValue();
1059 uint64_t EndOffset = BeginOffset +
Size;
1067 assert(AllocSize >= BeginOffset);
1068 if (
Size > AllocSize - BeginOffset) {
1070 <<
Offset <<
" to remain within the " << AllocSize
1071 <<
" byte alloca:\n"
1072 <<
" alloca: " << AS.AI <<
"\n"
1073 <<
" use: " <<
I <<
"\n");
1074 EndOffset = AllocSize;
1077 AS.Slices.push_back(Slice(BeginOffset, EndOffset, U, IsSplittable));
1080 void visitBitCastInst(BitCastInst &BC) {
1082 return markAsDead(BC);
1084 return Base::visitBitCastInst(BC);
1087 void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
1089 return markAsDead(ASC);
1091 return Base::visitAddrSpaceCastInst(ASC);
1094 void visitGetElementPtrInst(GetElementPtrInst &GEPI) {
1096 return markAsDead(GEPI);
1098 return Base::visitGetElementPtrInst(GEPI);
1101 void handleLoadOrStore(
Type *Ty, Instruction &
I,
const APInt &
Offset,
1102 uint64_t
Size,
bool IsVolatile) {
1112 void visitLoadInst(LoadInst &LI) {
1114 "All simple FCA loads should have been pre-split");
1119 return PI.setEscapedReadOnly(&LI);
1122 if (
Size.isScalable()) {
1125 return PI.setAborted(&LI);
1134 void visitStoreInst(StoreInst &SI) {
1135 Value *ValOp =
SI.getValueOperand();
1137 return PI.setEscapedAndAborted(&SI);
1139 return PI.setAborted(&SI);
1141 TypeSize StoreSize =
DL.getTypeStoreSize(ValOp->
getType());
1143 unsigned VScale =
SI.getFunction()->getVScaleValue();
1145 return PI.setAborted(&SI);
1161 <<
Offset <<
" which extends past the end of the "
1162 << AllocSize <<
" byte alloca:\n"
1163 <<
" alloca: " << AS.AI <<
"\n"
1164 <<
" use: " << SI <<
"\n");
1165 return markAsDead(SI);
1169 "All simple FCA stores should have been pre-split");
1173 void visitMemSetInst(MemSetInst &
II) {
1174 assert(
II.getRawDest() == *U &&
"Pointer use is not the destination?");
1177 (IsOffsetKnown &&
Offset.uge(AllocSize)))
1179 return markAsDead(
II);
1182 return PI.setAborted(&
II);
1186 : AllocSize -
Offset.getLimitedValue(),
1190 void visitMemTransferInst(MemTransferInst &
II) {
1194 return markAsDead(
II);
1198 if (VisitedDeadInsts.
count(&
II))
1202 return PI.setAborted(&
II);
1209 if (
Offset.uge(AllocSize)) {
1210 SmallDenseMap<Instruction *, unsigned>::iterator MTPI =
1211 MemTransferSliceMap.
find(&
II);
1212 if (MTPI != MemTransferSliceMap.
end())
1213 AS.Slices[MTPI->second].kill();
1214 return markAsDead(
II);
1217 uint64_t RawOffset =
Offset.getLimitedValue();
1218 uint64_t
Size =
Length ?
Length->getLimitedValue() : AllocSize - RawOffset;
1222 if (*U ==
II.getRawDest() && *U ==
II.getRawSource()) {
1224 if (!
II.isVolatile())
1225 return markAsDead(
II);
1233 SmallDenseMap<Instruction *, unsigned>::iterator MTPI;
1234 std::tie(MTPI, Inserted) =
1235 MemTransferSliceMap.
insert(std::make_pair(&
II, AS.Slices.size()));
1236 unsigned PrevIdx = MTPI->second;
1238 Slice &PrevP = AS.Slices[PrevIdx];
1242 if (!
II.isVolatile() && PrevP.beginOffset() == RawOffset) {
1244 return markAsDead(
II);
1249 PrevP.makeUnsplittable();
1256 assert(AS.Slices[PrevIdx].getUse()->getUser() == &
II &&
1257 "Map index doesn't point back to a slice with this user.");
1263 void visitIntrinsicInst(IntrinsicInst &
II) {
1264 if (
II.isDroppable()) {
1265 AS.DeadUseIfPromotable.push_back(U);
1270 return PI.setAborted(&
II);
1272 if (
II.isLifetimeStartOrEnd()) {
1273 insertUse(
II,
Offset, AllocSize,
true);
1277 Base::visitIntrinsicInst(
II);
1280 Instruction *hasUnsafePHIOrSelectUse(Instruction *Root, uint64_t &
Size) {
1285 SmallPtrSet<Instruction *, 4> Visited;
1295 std::tie(UsedI,
I) =
Uses.pop_back_val();
1298 TypeSize LoadSize =
DL.getTypeStoreSize(LI->
getType());
1310 TypeSize StoreSize =
DL.getTypeStoreSize(
Op->getType());
1320 if (!
GEP->hasAllZeroIndices())
1327 for (User *U :
I->users())
1330 }
while (!
Uses.empty());
1335 void visitPHINodeOrSelectInst(Instruction &
I) {
1338 return markAsDead(
I);
1344 I.getParent()->getFirstInsertionPt() ==
I.getParent()->end())
1345 return PI.setAborted(&
I);
1363 AS.DeadOperands.push_back(U);
1369 return PI.setAborted(&
I);
1372 uint64_t &
Size = PHIOrSelectSizes[&
I];
1375 if (Instruction *UnsafeI = hasUnsafePHIOrSelectUse(&
I,
Size))
1376 return PI.setAborted(UnsafeI);
1385 if (
Offset.uge(AllocSize)) {
1386 AS.DeadOperands.push_back(U);
1393 void visitPHINode(PHINode &PN) { visitPHINodeOrSelectInst(PN); }
1395 void visitSelectInst(SelectInst &SI) { visitPHINodeOrSelectInst(SI); }
1398 void visitInstruction(Instruction &
I) { PI.setAborted(&
I); }
1400 void visitCallBase(CallBase &CB) {
1406 PI.setEscapedReadOnly(&CB);
1410 Base::visitCallBase(CB);
1414AllocaSlices::AllocaSlices(
const DataLayout &
DL, AllocaInst &AI)
1416#
if !defined(
NDEBUG) || defined(LLVM_ENABLE_DUMP)
1419 PointerEscapingInstr(nullptr), PointerEscapingInstrReadOnly(nullptr) {
1421 SliceBuilder::PtrInfo PtrI =
PB.visitPtr(AI);
1422 if (PtrI.isEscaped() || PtrI.isAborted()) {
1425 PointerEscapingInstr = PtrI.getEscapingInst() ? PtrI.getEscapingInst()
1426 : PtrI.getAbortingInst();
1427 assert(PointerEscapingInstr &&
"Did not track a bad instruction");
1430 PointerEscapingInstrReadOnly = PtrI.getEscapedReadOnlyInst();
1432 llvm::erase_if(Slices, [](
const Slice &S) {
return S.isDead(); });
1439#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1441void AllocaSlices::print(raw_ostream &OS, const_iterator
I,
1442 StringRef Indent)
const {
1443 printSlice(OS,
I, Indent);
1445 printUse(OS,
I, Indent);
1448void AllocaSlices::printSlice(raw_ostream &OS, const_iterator
I,
1449 StringRef Indent)
const {
1450 OS << Indent <<
"[" <<
I->beginOffset() <<
"," <<
I->endOffset() <<
")"
1451 <<
" slice #" << (
I -
begin())
1452 << (
I->isSplittable() ?
" (splittable)" :
"");
1455void AllocaSlices::printUse(raw_ostream &OS, const_iterator
I,
1456 StringRef Indent)
const {
1457 OS << Indent <<
" used by: " << *
I->getUse()->getUser() <<
"\n";
1460void AllocaSlices::print(raw_ostream &OS)
const {
1461 if (PointerEscapingInstr) {
1462 OS <<
"Can't analyze slices for alloca: " << AI <<
"\n"
1463 <<
" A pointer to this alloca escaped by:\n"
1464 <<
" " << *PointerEscapingInstr <<
"\n";
1468 if (PointerEscapingInstrReadOnly)
1469 OS <<
"Escapes into ReadOnly: " << *PointerEscapingInstrReadOnly <<
"\n";
1471 OS <<
"Slices of alloca: " << AI <<
"\n";
1485static std::pair<Type *, IntegerType *>
1489 bool TyIsCommon =
true;
1494 for (AllocaSlices::const_iterator
I =
B;
I !=
E; ++
I) {
1495 Use *U =
I->getUse();
1498 if (
I->beginOffset() !=
B->beginOffset() ||
I->endOffset() != EndOffset)
1501 Type *UserTy =
nullptr;
1505 UserTy =
SI->getValueOperand()->getType();
1513 if (UserITy->getBitWidth() % 8 != 0 ||
1514 UserITy->getBitWidth() / 8 > (EndOffset -
B->beginOffset()))
1519 if (!ITy || ITy->
getBitWidth() < UserITy->getBitWidth())
1525 if (!UserTy || (Ty && Ty != UserTy))
1531 return {TyIsCommon ? Ty :
nullptr, ITy};
1562 Type *LoadType =
nullptr;
1575 if (LoadType != LI->
getType())
1584 if (BBI->mayWriteToMemory())
1587 MaxAlign = std::max(MaxAlign, LI->
getAlign());
1594 APInt(APWidth,
DL.getTypeStoreSize(LoadType).getFixedValue());
1631 IRB.SetInsertPoint(&PN);
1633 PN.
getName() +
".sroa.speculated");
1663 IRB.SetInsertPoint(TI);
1665 LoadInst *Load = IRB.CreateAlignedLoad(
1666 LoadTy, InVal, Alignment,
1667 (PN.
getName() +
".sroa.speculate.load." + Pred->getName()));
1668 ++NumLoadsSpeculated;
1670 Load->setAAMetadata(AATags);
1672 InjectedLoads[Pred] = Load;
1679SelectHandSpeculativity &
1680SelectHandSpeculativity::setAsSpeculatable(
bool isTrueVal) {
1688bool SelectHandSpeculativity::isSpeculatable(
bool isTrueVal)
const {
1693bool SelectHandSpeculativity::areAllSpeculatable()
const {
1694 return isSpeculatable(
true) &&
1695 isSpeculatable(
false);
1698bool SelectHandSpeculativity::areAnySpeculatable()
const {
1699 return isSpeculatable(
true) ||
1700 isSpeculatable(
false);
1702bool SelectHandSpeculativity::areNoneSpeculatable()
const {
1703 return !areAnySpeculatable();
1706static SelectHandSpeculativity
1709 SelectHandSpeculativity
Spec;
1715 Spec.setAsSpeculatable(
Value ==
SI.getTrueValue());
1722std::optional<RewriteableMemOps>
1723SROA::isSafeSelectToSpeculate(SelectInst &SI,
bool PreserveCFG) {
1724 RewriteableMemOps
Ops;
1726 for (User *U :
SI.users()) {
1736 Ops.emplace_back(Store);
1747 PossiblySpeculatableLoad
Load(LI);
1753 Ops.emplace_back(Load);
1757 SelectHandSpeculativity Spec =
1763 Ops.emplace_back(Load);
1773 Value *TV =
SI.getTrueValue();
1774 Value *FV =
SI.getFalseValue();
1779 IRB.SetInsertPoint(&LI);
1783 LI.
getName() +
".sroa.speculate.load.true");
1786 LI.
getName() +
".sroa.speculate.load.false");
1787 NumLoadsSpeculated += 2;
1799 Value *V = IRB.CreateSelect(
SI.getCondition(), TL, FL,
1800 LI.
getName() +
".sroa.speculated",
1807template <
typename T>
1809 SelectHandSpeculativity
Spec,
1816 if (
Spec.areNoneSpeculatable())
1818 SI.getMetadata(LLVMContext::MD_prof), &DTU);
1821 SI.getMetadata(LLVMContext::MD_prof), &DTU,
1823 if (
Spec.isSpeculatable(
true))
1834 bool IsThen = SuccBB == HeadBI->getSuccessor(0);
1835 int SuccIdx = IsThen ? 0 : 1;
1836 auto *NewMemOpBB = SuccBB ==
Tail ? Head : SuccBB;
1837 auto &CondMemOp =
cast<T>(*
I.clone());
1838 if (NewMemOpBB != Head) {
1839 NewMemOpBB->setName(Head->
getName() + (IsThen ?
".then" :
".else"));
1841 ++NumLoadsPredicated;
1843 ++NumStoresPredicated;
1845 CondMemOp.dropUBImplyingAttrsAndMetadata();
1846 ++NumLoadsSpeculated;
1848 CondMemOp.insertBefore(NewMemOpBB->getTerminator()->getIterator());
1849 Value *Ptr =
SI.getOperand(1 + SuccIdx);
1850 CondMemOp.setOperand(
I.getPointerOperandIndex(), Ptr);
1852 CondMemOp.setName(
I.getName() + (IsThen ?
".then" :
".else") +
".val");
1860 I.replaceAllUsesWith(PN);
1865 SelectHandSpeculativity
Spec,
1876 const RewriteableMemOps &
Ops,
1878 bool CFGChanged =
false;
1881 for (
const RewriteableMemOp &
Op :
Ops) {
1882 SelectHandSpeculativity
Spec;
1884 if (
auto *
const *US = std::get_if<UnspeculatableStore>(&
Op)) {
1887 auto PSL = std::get<PossiblySpeculatableLoad>(
Op);
1888 I = PSL.getPointer();
1889 Spec = PSL.getInt();
1891 if (
Spec.areAllSpeculatable()) {
1894 assert(DTU &&
"Should not get here when not allowed to modify the CFG!");
1898 I->eraseFromParent();
1903 SI.eraseFromParent();
1911 const Twine &NamePrefix) {
1913 Ptr = IRB.CreateInBoundsPtrAdd(Ptr, IRB.getInt(
Offset),
1914 NamePrefix +
"sroa_idx");
1915 return IRB.CreatePointerBitCastOrAddrSpaceCast(Ptr,
PointerTy,
1916 NamePrefix +
"sroa_cast");
1931 unsigned VScale = 0) {
1941 "We can't have the same bitwidth for different int types");
1945 TypeSize NewSize =
DL.getTypeSizeInBits(NewTy);
1946 TypeSize OldSize =
DL.getTypeSizeInBits(OldTy);
1973 if (NewSize != OldSize)
1989 return OldAS == NewAS ||
1990 (!
DL.isNonIntegralAddressSpace(OldAS) &&
1991 !
DL.isNonIntegralAddressSpace(NewAS) &&
1992 DL.getPointerSize(OldAS) ==
DL.getPointerSize(NewAS));
1998 return !
DL.isNonIntegralPointerType(NewTy);
2002 if (!
DL.isNonIntegralPointerType(OldTy))
2022 Type *OldTy = V->getType();
2029 "Value not convertable to type");
2036 "Integer types must be the exact same to convert.");
2040 auto CreateBitCastLike = [&IRB](
Value *In,
Type *Ty) ->
Value * {
2041 Type *InTy = In->getType();
2049 return IRB.CreateBitCast(IRB.CreateInsertVector(VTy,
2059 return IRB.CreateExtractVector(Ty, IRB.CreateBitCast(In, VTy),
2063 return IRB.CreateBitCast(In, Ty);
2072 return IRB.CreateIntToPtr(CreateBitCastLike(V,
DL.getIntPtrType(NewTy)),
2082 return CreateBitCastLike(IRB.CreatePtrToInt(V,
DL.getIntPtrType(OldTy)),
2095 if (OldAS != NewAS) {
2096 assert(
DL.getPointerSize(OldAS) ==
DL.getPointerSize(NewAS));
2097 return IRB.CreateIntToPtr(
2098 CreateBitCastLike(IRB.CreatePtrToInt(V,
DL.getIntPtrType(OldTy)),
2099 DL.getIntPtrType(NewTy)),
2104 return CreateBitCastLike(V, NewTy);
2118 std::max(S.beginOffset(),
P.beginOffset()) -
P.beginOffset();
2119 uint64_t BeginIndex = BeginOffset / ElementSize;
2120 if (BeginIndex * ElementSize != BeginOffset ||
2123 uint64_t EndOffset = std::min(S.endOffset(),
P.endOffset()) -
P.beginOffset();
2124 uint64_t EndIndex = EndOffset / ElementSize;
2125 if (EndIndex * ElementSize != EndOffset ||
2129 assert(EndIndex > BeginIndex &&
"Empty vector!");
2130 uint64_t NumElements = EndIndex - BeginIndex;
2131 Type *SliceTy = (NumElements == 1)
2132 ? Ty->getElementType()
2138 Use *U = S.getUse();
2141 if (
MI->isVolatile())
2143 if (!S.isSplittable())
2146 if (!
II->isLifetimeStartOrEnd() && !
II->isDroppable())
2153 if (LTy->isStructTy())
2155 if (
P.beginOffset() > S.beginOffset() ||
P.endOffset() < S.endOffset()) {
2156 assert(LTy->isIntegerTy());
2162 if (
SI->isVolatile())
2164 Type *STy =
SI->getValueOperand()->getType();
2168 if (
P.beginOffset() > S.beginOffset() ||
P.endOffset() < S.endOffset()) {
2188 bool HaveCommonEltTy,
Type *CommonEltTy,
2189 bool HaveVecPtrTy,
bool HaveCommonVecPtrTy,
2190 VectorType *CommonVecPtrTy,
unsigned VScale) {
2192 if (CandidateTys.
empty())
2199 if (HaveVecPtrTy && !HaveCommonVecPtrTy)
2203 if (!HaveCommonEltTy && HaveVecPtrTy) {
2205 CandidateTys.
clear();
2207 }
else if (!HaveCommonEltTy && !HaveVecPtrTy) {
2210 if (!VTy->getElementType()->isIntegerTy())
2212 VTy->getContext(), VTy->getScalarSizeInBits())));
2219 assert(
DL.getTypeSizeInBits(RHSTy).getFixedValue() ==
2220 DL.getTypeSizeInBits(LHSTy).getFixedValue() &&
2221 "Cannot have vector types of different sizes!");
2222 assert(RHSTy->getElementType()->isIntegerTy() &&
2223 "All non-integer types eliminated!");
2224 assert(LHSTy->getElementType()->isIntegerTy() &&
2225 "All non-integer types eliminated!");
2231 assert(
DL.getTypeSizeInBits(RHSTy).getFixedValue() ==
2232 DL.getTypeSizeInBits(LHSTy).getFixedValue() &&
2233 "Cannot have vector types of different sizes!");
2234 assert(RHSTy->getElementType()->isIntegerTy() &&
2235 "All non-integer types eliminated!");
2236 assert(LHSTy->getElementType()->isIntegerTy() &&
2237 "All non-integer types eliminated!");
2241 llvm::sort(CandidateTys, RankVectorTypesComp);
2242 CandidateTys.erase(
llvm::unique(CandidateTys, RankVectorTypesEq),
2243 CandidateTys.end());
2249 assert(VTy->getElementType() == CommonEltTy &&
2250 "Unaccounted for element type!");
2251 assert(VTy == CandidateTys[0] &&
2252 "Different vector types with the same element type!");
2255 CandidateTys.resize(1);
2262 std::numeric_limits<unsigned short>::max();
2268 DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
2272 if (ElementSize % 8)
2274 assert((
DL.getTypeSizeInBits(VTy).getFixedValue() % 8) == 0 &&
2275 "vector size not a multiple of element size?");
2278 for (
const Slice &S :
P)
2282 for (
const Slice *S :
P.splitSliceTails())
2288 return VTy != CandidateTys.
end() ? *VTy :
nullptr;
2295 bool &HaveCommonEltTy,
Type *&CommonEltTy,
bool &HaveVecPtrTy,
2296 bool &HaveCommonVecPtrTy,
VectorType *&CommonVecPtrTy,
unsigned VScale) {
2298 CandidateTysCopy.
size() ? CandidateTysCopy[0] :
nullptr;
2301 for (
Type *Ty : OtherTys) {
2304 unsigned TypeSize =
DL.getTypeSizeInBits(Ty).getFixedValue();
2307 for (
VectorType *
const VTy : CandidateTysCopy) {
2309 assert(CandidateTysCopy[0] == OriginalElt &&
"Different Element");
2310 unsigned VectorSize =
DL.getTypeSizeInBits(VTy).getFixedValue();
2311 unsigned ElementSize =
2312 DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
2316 CheckCandidateType(NewVTy);
2322 P,
DL, CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
2323 HaveCommonVecPtrTy, CommonVecPtrTy, VScale);
2342 Type *CommonEltTy =
nullptr;
2344 bool HaveVecPtrTy =
false;
2345 bool HaveCommonEltTy =
true;
2346 bool HaveCommonVecPtrTy =
true;
2347 auto CheckCandidateType = [&](
Type *Ty) {
2350 if (!CandidateTys.
empty()) {
2352 if (
DL.getTypeSizeInBits(VTy).getFixedValue() !=
2353 DL.getTypeSizeInBits(V).getFixedValue()) {
2354 CandidateTys.
clear();
2359 Type *EltTy = VTy->getElementType();
2362 CommonEltTy = EltTy;
2363 else if (CommonEltTy != EltTy)
2364 HaveCommonEltTy =
false;
2367 HaveVecPtrTy =
true;
2368 if (!CommonVecPtrTy)
2369 CommonVecPtrTy = VTy;
2370 else if (CommonVecPtrTy != VTy)
2371 HaveCommonVecPtrTy =
false;
2377 for (
const Slice &S :
P) {
2382 Ty =
SI->getValueOperand()->getType();
2386 auto CandTy = Ty->getScalarType();
2387 if (CandTy->isPointerTy() && (S.beginOffset() !=
P.beginOffset() ||
2388 S.endOffset() !=
P.endOffset())) {
2395 if (S.beginOffset() ==
P.beginOffset() && S.endOffset() ==
P.endOffset())
2396 CheckCandidateType(Ty);
2401 LoadStoreTys, CandidateTysCopy, CheckCandidateType,
P,
DL,
2402 CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
2403 HaveCommonVecPtrTy, CommonVecPtrTy, VScale))
2406 CandidateTys.
clear();
2408 DeferredTys, CandidateTysCopy, CheckCandidateType,
P,
DL, CandidateTys,
2409 HaveCommonEltTy, CommonEltTy, HaveVecPtrTy, HaveCommonVecPtrTy,
2410 CommonVecPtrTy, VScale);
2421 bool &WholeAllocaOp) {
2424 uint64_t RelBegin = S.beginOffset() - AllocBeginOffset;
2425 uint64_t RelEnd = S.endOffset() - AllocBeginOffset;
2427 Use *U = S.getUse();
2434 if (
II->isLifetimeStartOrEnd() ||
II->isDroppable())
2452 if (S.beginOffset() < AllocBeginOffset)
2458 WholeAllocaOp =
true;
2460 if (ITy->getBitWidth() <
DL.getTypeStoreSizeInBits(ITy).getFixedValue())
2462 }
else if (RelBegin != 0 || RelEnd !=
Size ||
2469 Type *ValueTy =
SI->getValueOperand()->getType();
2470 if (
SI->isVolatile())
2473 TypeSize StoreSize =
DL.getTypeStoreSize(ValueTy);
2478 if (S.beginOffset() < AllocBeginOffset)
2484 WholeAllocaOp =
true;
2486 if (ITy->getBitWidth() <
DL.getTypeStoreSizeInBits(ITy).getFixedValue())
2488 }
else if (RelBegin != 0 || RelEnd !=
Size ||
2497 if (!S.isSplittable())
2514 uint64_t SizeInBits =
DL.getTypeSizeInBits(AllocaTy).getFixedValue();
2520 if (SizeInBits !=
DL.getTypeStoreSizeInBits(AllocaTy).getFixedValue())
2538 bool WholeAllocaOp =
P.empty() &&
DL.isLegalInteger(SizeInBits);
2540 for (
const Slice &S :
P)
2545 for (
const Slice *S :
P.splitSliceTails())
2550 return WholeAllocaOp;
2555 const Twine &Name) {
2559 DL.getTypeStoreSize(IntTy).getFixedValue() &&
2560 "Element extends past full value");
2562 if (
DL.isBigEndian())
2563 ShAmt = 8 * (
DL.getTypeStoreSize(IntTy).getFixedValue() -
2564 DL.getTypeStoreSize(Ty).getFixedValue() -
Offset);
2566 V = IRB.CreateLShr(V, ShAmt, Name +
".shift");
2569 assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
2570 "Cannot extract to a larger integer!");
2572 V = IRB.CreateTrunc(V, Ty, Name +
".trunc");
2582 assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
2583 "Cannot insert a larger integer!");
2586 V = IRB.CreateZExt(V, IntTy, Name +
".ext");
2590 DL.getTypeStoreSize(IntTy).getFixedValue() &&
2591 "Element store outside of alloca store");
2593 if (
DL.isBigEndian())
2594 ShAmt = 8 * (
DL.getTypeStoreSize(IntTy).getFixedValue() -
2595 DL.getTypeStoreSize(Ty).getFixedValue() -
Offset);
2597 V = IRB.CreateShl(V, ShAmt, Name +
".shift");
2601 if (ShAmt || Ty->getBitWidth() < IntTy->getBitWidth()) {
2602 APInt Mask = ~Ty->getMask().zext(IntTy->getBitWidth()).shl(ShAmt);
2603 Old = IRB.CreateAnd(Old, Mask, Name +
".mask");
2605 V = IRB.CreateOr(Old, V, Name +
".insert");
2612 unsigned EndIndex,
const Twine &Name) {
2614 unsigned NumElements = EndIndex - BeginIndex;
2617 if (NumElements == VecTy->getNumElements())
2620 if (NumElements == 1) {
2621 V = IRB.CreateExtractElement(V, IRB.getInt32(BeginIndex),
2628 V = IRB.CreateShuffleVector(V, Mask, Name +
".extract");
2634 unsigned BeginIndex,
const Twine &Name) {
2636 assert(VecTy &&
"Can only insert a vector into a vector");
2641 V = IRB.CreateInsertElement(Old, V, IRB.getInt32(BeginIndex),
2649 "Too many elements!");
2652 assert(V->getType() == VecTy &&
"Vector type mismatch");
2664 if (i >= BeginIndex && i < EndIndex)
2665 Mask.push_back(i - BeginIndex);
2668 V = IRB.CreateShuffleVector(V, Mask, Name +
".expand");
2674 Mask2.
push_back(IRB.getInt1(i >= BeginIndex && i < EndIndex));
2718 const char *DebugName) {
2719 Type *EltType = VecType->getElementType();
2720 if (EltType != NewAIEltTy) {
2722 unsigned TotalBits =
2723 VecType->getNumElements() *
DL.getTypeSizeInBits(EltType);
2724 unsigned NewNumElts = TotalBits /
DL.getTypeSizeInBits(NewAIEltTy);
2727 V = Builder.CreateBitCast(V, NewVecType);
2728 VecType = NewVecType;
2729 LLVM_DEBUG(
dbgs() <<
" bitcast " << DebugName <<
": " << *V <<
"\n");
2733 BitcastIfNeeded(V0, VecType0,
"V0");
2734 BitcastIfNeeded(V1, VecType1,
"V1");
2736 unsigned NumElts0 = VecType0->getNumElements();
2737 unsigned NumElts1 = VecType1->getNumElements();
2741 if (NumElts0 == NumElts1) {
2742 for (
unsigned i = 0; i < NumElts0 + NumElts1; ++i)
2743 ShuffleMask.push_back(i);
2747 unsigned SmallSize = std::min(NumElts0, NumElts1);
2748 unsigned LargeSize = std::max(NumElts0, NumElts1);
2749 bool IsV0Smaller = NumElts0 < NumElts1;
2750 Value *&ExtendedVec = IsV0Smaller ? V0 : V1;
2752 for (
unsigned i = 0; i < SmallSize; ++i)
2754 for (
unsigned i = SmallSize; i < LargeSize; ++i)
2756 ExtendedVec = Builder.CreateShuffleVector(
2758 LLVM_DEBUG(
dbgs() <<
" shufflevector: " << *ExtendedVec <<
"\n");
2759 for (
unsigned i = 0; i < NumElts0; ++i)
2760 ShuffleMask.push_back(i);
2761 for (
unsigned i = 0; i < NumElts1; ++i)
2762 ShuffleMask.push_back(LargeSize + i);
2765 return Builder.CreateShuffleVector(V0, V1, ShuffleMask);
2776class AllocaSliceRewriter :
public InstVisitor<AllocaSliceRewriter, bool> {
2778 friend class InstVisitor<AllocaSliceRewriter, bool>;
2780 using Base = InstVisitor<AllocaSliceRewriter, bool>;
2782 const DataLayout &
DL;
2785 AllocaInst &OldAI, &NewAI;
2786 const uint64_t NewAllocaBeginOffset, NewAllocaEndOffset;
2806 uint64_t ElementSize;
2810 uint64_t BeginOffset = 0;
2811 uint64_t EndOffset = 0;
2815 uint64_t NewBeginOffset = 0, NewEndOffset = 0;
2817 uint64_t SliceSize = 0;
2818 bool IsSplittable =
false;
2819 bool IsSplit =
false;
2820 Use *OldUse =
nullptr;
2824 SmallSetVector<PHINode *, 8> &PHIUsers;
2825 SmallSetVector<SelectInst *, 8> &SelectUsers;
2833 Value *getPtrToNewAI(
unsigned AddrSpace,
bool IsVolatile) {
2837 Type *AccessTy = IRB.getPtrTy(AddrSpace);
2838 return IRB.CreateAddrSpaceCast(&NewAI, AccessTy);
2842 AllocaSliceRewriter(
const DataLayout &
DL, AllocaSlices &AS, SROA &
Pass,
2843 AllocaInst &OldAI, AllocaInst &NewAI,
2844 uint64_t NewAllocaBeginOffset,
2845 uint64_t NewAllocaEndOffset,
bool IsIntegerPromotable,
2846 VectorType *PromotableVecTy,
2847 SmallSetVector<PHINode *, 8> &PHIUsers,
2848 SmallSetVector<SelectInst *, 8> &SelectUsers)
2849 :
DL(
DL), AS(AS),
Pass(
Pass), OldAI(OldAI), NewAI(NewAI),
2850 NewAllocaBeginOffset(NewAllocaBeginOffset),
2851 NewAllocaEndOffset(NewAllocaEndOffset),
2852 NewAllocaTy(NewAI.getAllocatedType()),
2856 DL.getTypeSizeInBits(NewAI.getAllocatedType())
2859 VecTy(PromotableVecTy),
2860 ElementTy(VecTy ? VecTy->getElementType() : nullptr),
2861 ElementSize(VecTy ?
DL.getTypeSizeInBits(ElementTy).getFixedValue() / 8
2863 PHIUsers(PHIUsers), SelectUsers(SelectUsers),
2866 assert((
DL.getTypeSizeInBits(ElementTy).getFixedValue() % 8) == 0 &&
2867 "Only multiple-of-8 sized vector elements are viable");
2870 assert((!IntTy && !VecTy) || (IntTy && !VecTy) || (!IntTy && VecTy));
2873 bool visit(AllocaSlices::const_iterator
I) {
2874 bool CanSROA =
true;
2875 BeginOffset =
I->beginOffset();
2876 EndOffset =
I->endOffset();
2877 IsSplittable =
I->isSplittable();
2879 BeginOffset < NewAllocaBeginOffset || EndOffset > NewAllocaEndOffset;
2880 LLVM_DEBUG(
dbgs() <<
" rewriting " << (IsSplit ?
"split " :
""));
2885 assert(BeginOffset < NewAllocaEndOffset);
2886 assert(EndOffset > NewAllocaBeginOffset);
2887 NewBeginOffset = std::max(BeginOffset, NewAllocaBeginOffset);
2888 NewEndOffset = std::min(EndOffset, NewAllocaEndOffset);
2890 SliceSize = NewEndOffset - NewBeginOffset;
2891 LLVM_DEBUG(
dbgs() <<
" Begin:(" << BeginOffset <<
", " << EndOffset
2892 <<
") NewBegin:(" << NewBeginOffset <<
", "
2893 << NewEndOffset <<
") NewAllocaBegin:("
2894 << NewAllocaBeginOffset <<
", " << NewAllocaEndOffset
2896 assert(IsSplit || NewBeginOffset == BeginOffset);
2897 OldUse =
I->getUse();
2901 IRB.SetInsertPoint(OldUserI);
2902 IRB.SetCurrentDebugLocation(OldUserI->
getDebugLoc());
2903 IRB.getInserter().SetNamePrefix(Twine(NewAI.
getName()) +
"." +
2904 Twine(BeginOffset) +
".");
2950 std::optional<SmallVector<Value *, 4>>
2951 rewriteTreeStructuredMerge(Partition &
P) {
2953 if (
P.splitSliceTails().size() > 0)
2954 return std::nullopt;
2957 LoadInst *TheLoad =
nullptr;
2962 uint64_t BeginOffset;
2965 StoreInfo(StoreInst *SI, uint64_t Begin, uint64_t End,
Value *Val)
2966 :
Store(
SI), BeginOffset(Begin), EndOffset(End), StoredValue(Val) {}
2973 Type *AllocatedEltTy =
2977 unsigned AllocatedEltTySize =
DL.getTypeSizeInBits(AllocatedEltTy);
2984 auto IsTypeValidForTreeStructuredMerge = [&](
Type *Ty) ->
bool {
2986 return FixedVecTy &&
2987 DL.getTypeSizeInBits(FixedVecTy->getElementType()) % 8 == 0 &&
2988 !FixedVecTy->getElementType()->isPointerTy();
2991 for (Slice &S :
P) {
2999 if (TheLoad || !IsTypeValidForTreeStructuredMerge(LI->
getType()) ||
3000 S.beginOffset() != NewAllocaBeginOffset ||
3001 S.endOffset() != NewAllocaEndOffset || LI->
isVolatile())
3002 return std::nullopt;
3010 if (!IsTypeValidForTreeStructuredMerge(
3011 SI->getValueOperand()->getType()) ||
3013 return std::nullopt;
3015 unsigned NumElts = VecTy->getNumElements();
3016 unsigned EltSize =
DL.getTypeSizeInBits(VecTy->getElementType());
3017 if (NumElts * EltSize % AllocatedEltTySize != 0)
3018 return std::nullopt;
3019 StoreInfos.
emplace_back(SI, S.beginOffset(), S.endOffset(),
3020 SI->getValueOperand());
3024 return std::nullopt;
3029 return std::nullopt;
3032 if (StoreInfos.
size() < 2)
3033 return std::nullopt;
3037 llvm::sort(StoreInfos, [](
const StoreInfo &
A,
const StoreInfo &
B) {
3038 return A.BeginOffset <
B.BeginOffset;
3042 uint64_t ExpectedStart = NewAllocaBeginOffset;
3043 for (
auto &StoreInfo : StoreInfos) {
3044 uint64_t BeginOff = StoreInfo.BeginOffset;
3045 uint64_t EndOff = StoreInfo.EndOffset;
3048 if (BeginOff != ExpectedStart)
3049 return std::nullopt;
3051 ExpectedStart = EndOff;
3054 if (ExpectedStart != NewAllocaEndOffset)
3055 return std::nullopt;
3066 BasicBlock *StoreBB = StoreInfos[0].Store->getParent();
3068 for (
auto &StoreInfo : StoreInfos) {
3069 if (StoreInfo.Store->getParent() != StoreBB)
3070 return std::nullopt;
3071 if (LoadBB == StoreBB && !StoreInfo.Store->comesBefore(TheLoad))
3072 return std::nullopt;
3078 dbgs() <<
"Tree structured merge rewrite:\n Load: " << *TheLoad
3079 <<
"\n Ordered stores:\n";
3081 dbgs() <<
" [" << i <<
"] Range[" <<
Info.BeginOffset <<
", "
3082 <<
Info.EndOffset <<
") \tStore: " << *
Info.Store
3083 <<
"\tValue: " << *
Info.StoredValue <<
"\n";
3088 std::queue<Value *> VecElements;
3090 for (
const auto &
Info : StoreInfos) {
3092 VecElements.push(
Info.StoredValue);
3096 while (VecElements.size() > 1) {
3097 const auto NumElts = VecElements.size();
3098 for ([[maybe_unused]]
const auto _ :
llvm::seq(NumElts / 2)) {
3099 Value *V0 = VecElements.front();
3101 Value *V1 = VecElements.front();
3105 VecElements.push(Merged);
3107 if (NumElts % 2 == 1) {
3108 Value *
V = VecElements.front();
3110 VecElements.push(V);
3115 Value *MergedValue = VecElements.front();
3116 Builder.CreateAlignedStore(MergedValue, &NewAI, getSliceAlign());
3121 TheLoad->
getName() +
".sroa.new.load"));
3124 return DeletedValues;
3132 bool visitInstruction(Instruction &
I) {
3140 assert(IsSplit || BeginOffset == NewBeginOffset);
3141 uint64_t
Offset = NewBeginOffset - NewAllocaBeginOffset;
3143 StringRef OldName = OldPtr->
getName();
3145 size_t LastSROAPrefix = OldName.
rfind(
".sroa.");
3147 OldName = OldName.
substr(LastSROAPrefix + strlen(
".sroa."));
3152 OldName = OldName.
substr(IndexEnd + 1);
3156 OldName = OldName.
substr(OffsetEnd + 1);
3160 OldName = OldName.
substr(0, OldName.
find(
".sroa_"));
3172 Align getSliceAlign() {
3174 NewBeginOffset - NewAllocaBeginOffset);
3177 unsigned getIndex(uint64_t
Offset) {
3178 assert(VecTy &&
"Can only call getIndex when rewriting a vector");
3179 uint64_t RelOffset =
Offset - NewAllocaBeginOffset;
3180 assert(RelOffset / ElementSize < UINT32_MAX &&
"Index out of bounds");
3181 uint32_t
Index = RelOffset / ElementSize;
3182 assert(Index * ElementSize == RelOffset);
3186 void deleteIfTriviallyDead(
Value *V) {
3189 Pass.DeadInsts.push_back(
I);
3192 Value *rewriteVectorizedLoadInst(LoadInst &LI) {
3193 unsigned BeginIndex = getIndex(NewBeginOffset);
3194 unsigned EndIndex = getIndex(NewEndOffset);
3195 assert(EndIndex > BeginIndex &&
"Empty vector!");
3200 Load->copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access,
3201 LLVMContext::MD_access_group});
3202 return extractVector(IRB, Load, BeginIndex, EndIndex,
"vec");
3205 Value *rewriteIntegerLoad(LoadInst &LI) {
3206 assert(IntTy &&
"We cannot insert an integer to the alloca");
3211 assert(NewBeginOffset >= NewAllocaBeginOffset &&
"Out of bounds offset");
3212 uint64_t
Offset = NewBeginOffset - NewAllocaBeginOffset;
3213 if (
Offset > 0 || NewEndOffset < NewAllocaEndOffset) {
3214 IntegerType *ExtractTy = Type::getIntNTy(LI.
getContext(), SliceSize * 8);
3223 "Can only handle an extract for an overly wide load");
3225 V = IRB.CreateZExt(V, LI.
getType());
3229 bool visitLoadInst(LoadInst &LI) {
3238 Type *TargetTy = IsSplit ? Type::getIntNTy(LI.
getContext(), SliceSize * 8)
3240 bool IsPtrAdjusted =
false;
3243 V = rewriteVectorizedLoadInst(LI);
3245 V = rewriteIntegerLoad(LI);
3246 }
else if (NewBeginOffset == NewAllocaBeginOffset &&
3247 NewEndOffset == NewAllocaEndOffset &&
3250 DL.getTypeStoreSize(TargetTy).getFixedValue() > SliceSize &&
3253 getPtrToNewAI(LI.getPointerAddressSpace(), LI.isVolatile());
3254 LoadInst *NewLI = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), NewPtr,
3255 NewAI.getAlign(), LI.isVolatile(),
3257 if (LI.isVolatile())
3258 NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
3259 if (NewLI->isAtomic())
3260 NewLI->setAlignment(LI.getAlign());
3265 copyMetadataForLoad(*NewLI, LI);
3269 NewLI->setAAMetadata(AATags.adjustForAccess(
3270 NewBeginOffset - BeginOffset, NewLI->getType(), DL));
3278 if (auto *AITy = dyn_cast<IntegerType>(NewAllocaTy))
3279 if (auto *TITy = dyn_cast<IntegerType>(TargetTy))
3280 if (AITy->getBitWidth() < TITy->getBitWidth()) {
3281 V = IRB.CreateZExt(V, TITy,
"load.ext");
3282 if (DL.isBigEndian())
3283 V = IRB.CreateShl(V, TITy->getBitWidth() - AITy->getBitWidth(),
3287 Type *LTy = IRB.getPtrTy(AS);
3289 IRB.CreateAlignedLoad(TargetTy, getNewAllocaSlicePtr(IRB, LTy),
3294 NewBeginOffset - BeginOffset, NewLI->
getType(),
DL));
3298 NewLI->
copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access,
3299 LLVMContext::MD_access_group});
3302 IsPtrAdjusted =
true;
3309 "Only integer type loads and stores are split");
3310 assert(SliceSize <
DL.getTypeStoreSize(LI.
getType()).getFixedValue() &&
3311 "Split load isn't smaller than original load");
3313 "Non-byte-multiple bit width");
3319 LIIt.setHeadBit(
true);
3320 IRB.SetInsertPoint(LI.
getParent(), LIIt);
3325 Value *Placeholder =
3331 Placeholder->replaceAllUsesWith(&LI);
3332 Placeholder->deleteValue();
3337 Pass.DeadInsts.push_back(&LI);
3338 deleteIfTriviallyDead(OldOp);
3343 bool rewriteVectorizedStoreInst(
Value *V, StoreInst &SI,
Value *OldOp,
3348 if (
V->getType() != VecTy) {
3349 unsigned BeginIndex = getIndex(NewBeginOffset);
3350 unsigned EndIndex = getIndex(NewEndOffset);
3351 assert(EndIndex > BeginIndex &&
"Empty vector!");
3352 unsigned NumElements = EndIndex - BeginIndex;
3354 "Too many elements!");
3355 Type *SliceTy = (NumElements == 1)
3357 : FixedVectorType::
get(ElementTy, NumElements);
3358 if (
V->getType() != SliceTy)
3366 StoreInst *
Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.
getAlign());
3367 Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
3368 LLVMContext::MD_access_group});
3372 Pass.DeadInsts.push_back(&SI);
3376 Store,
Store->getPointerOperand(), OrigV,
DL);
3381 bool rewriteIntegerStore(
Value *V, StoreInst &SI, AAMDNodes AATags) {
3382 assert(IntTy &&
"We cannot extract an integer from the alloca");
3384 if (
DL.getTypeSizeInBits(
V->getType()).getFixedValue() !=
3389 assert(BeginOffset >= NewAllocaBeginOffset &&
"Out of bounds offset");
3390 uint64_t
Offset = BeginOffset - NewAllocaBeginOffset;
3394 StoreInst *
Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.
getAlign());
3395 Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
3396 LLVMContext::MD_access_group});
3402 Store,
Store->getPointerOperand(),
3403 Store->getValueOperand(),
DL);
3405 Pass.DeadInsts.push_back(&SI);
3410 bool visitStoreInst(StoreInst &SI) {
3412 Value *OldOp =
SI.getOperand(1);
3415 AAMDNodes AATags =
SI.getAAMetadata();
3420 if (
V->getType()->isPointerTy())
3422 Pass.PostPromotionWorklist.insert(AI);
3424 TypeSize StoreSize =
DL.getTypeStoreSize(
V->getType());
3427 assert(
V->getType()->isIntegerTy() &&
3428 "Only integer type loads and stores are split");
3429 assert(
DL.typeSizeEqualsStoreSize(
V->getType()) &&
3430 "Non-byte-multiple bit width");
3431 IntegerType *NarrowTy = Type::getIntNTy(
SI.getContext(), SliceSize * 8);
3437 return rewriteVectorizedStoreInst(V, SI, OldOp, AATags);
3438 if (IntTy &&
V->getType()->isIntegerTy())
3439 return rewriteIntegerStore(V, SI, AATags);
3442 if (NewBeginOffset == NewAllocaBeginOffset &&
3443 NewEndOffset == NewAllocaEndOffset &&
3447 getPtrToNewAI(
SI.getPointerAddressSpace(),
SI.isVolatile());
3450 IRB.CreateAlignedStore(V, NewPtr, NewAI.
getAlign(),
SI.isVolatile());
3452 unsigned AS =
SI.getPointerAddressSpace();
3453 Value *NewPtr = getNewAllocaSlicePtr(IRB, IRB.getPtrTy(AS));
3455 IRB.CreateAlignedStore(V, NewPtr, getSliceAlign(),
SI.isVolatile());
3457 NewSI->
copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
3458 LLVMContext::MD_access_group});
3462 if (
SI.isVolatile())
3471 Pass.DeadInsts.push_back(&SI);
3472 deleteIfTriviallyDead(OldOp);
3490 assert(
Size > 0 &&
"Expected a positive number of bytes.");
3498 IRB.CreateZExt(V, SplatIntTy,
"zext"),
3508 V = IRB.CreateVectorSplat(NumElements, V,
"vsplat");
3513 bool visitMemSetInst(MemSetInst &
II) {
3517 AAMDNodes AATags =
II.getAAMetadata();
3523 assert(NewBeginOffset == BeginOffset);
3524 II.setDest(getNewAllocaSlicePtr(IRB, OldPtr->
getType()));
3525 II.setDestAlignment(getSliceAlign());
3530 "AT: Unexpected link to non-const GEP");
3531 deleteIfTriviallyDead(OldPtr);
3536 Pass.DeadInsts.push_back(&
II);
3541 const bool CanContinue = [&]() {
3544 if (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset)
3548 const uint64_t
Len =
C->getLimitedValue();
3549 if (Len > std::numeric_limits<unsigned>::max())
3551 auto *Int8Ty = IntegerType::getInt8Ty(NewAI.
getContext());
3554 DL.isLegalInteger(
DL.getTypeSizeInBits(ScalarTy).getFixedValue());
3560 Type *SizeTy =
II.getLength()->getType();
3561 unsigned Sz = NewEndOffset - NewBeginOffset;
3564 getNewAllocaSlicePtr(IRB, OldPtr->
getType()),
II.getValue(),
Size,
3565 MaybeAlign(getSliceAlign()),
II.isVolatile()));
3571 New,
New->getRawDest(),
nullptr,
DL);
3586 assert(ElementTy == ScalarTy);
3588 unsigned BeginIndex = getIndex(NewBeginOffset);
3589 unsigned EndIndex = getIndex(NewEndOffset);
3590 assert(EndIndex > BeginIndex &&
"Empty vector!");
3591 unsigned NumElements = EndIndex - BeginIndex;
3593 "Too many elements!");
3596 II.getValue(),
DL.getTypeSizeInBits(ElementTy).getFixedValue() / 8);
3598 if (NumElements > 1)
3609 uint64_t
Size = NewEndOffset - NewBeginOffset;
3610 V = getIntegerSplat(
II.getValue(),
Size);
3612 if (IntTy && (BeginOffset != NewAllocaBeginOffset ||
3613 EndOffset != NewAllocaBeginOffset)) {
3617 uint64_t
Offset = NewBeginOffset - NewAllocaBeginOffset;
3620 assert(
V->getType() == IntTy &&
3621 "Wrong type for an alloca wide integer!");
3626 assert(NewBeginOffset == NewAllocaBeginOffset);
3627 assert(NewEndOffset == NewAllocaEndOffset);
3629 V = getIntegerSplat(
II.getValue(),
3630 DL.getTypeSizeInBits(ScalarTy).getFixedValue() / 8);
3638 Value *NewPtr = getPtrToNewAI(
II.getDestAddressSpace(),
II.isVolatile());
3640 IRB.CreateAlignedStore(V, NewPtr, NewAI.
getAlign(),
II.isVolatile());
3641 New->copyMetadata(
II, {LLVMContext::MD_mem_parallel_loop_access,
3642 LLVMContext::MD_access_group});
3648 New,
New->getPointerOperand(), V,
DL);
3651 return !
II.isVolatile();
3654 bool visitMemTransferInst(MemTransferInst &
II) {
3660 AAMDNodes AATags =
II.getAAMetadata();
3662 bool IsDest = &
II.getRawDestUse() == OldUse;
3663 assert((IsDest &&
II.getRawDest() == OldPtr) ||
3664 (!IsDest &&
II.getRawSource() == OldPtr));
3666 Align SliceAlign = getSliceAlign();
3674 if (!IsSplittable) {
3675 Value *AdjustedPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
3680 DbgAssign->getAddress() ==
II.getDest())
3681 DbgAssign->replaceVariableLocationOp(
II.getDest(), AdjustedPtr);
3683 II.setDest(AdjustedPtr);
3684 II.setDestAlignment(SliceAlign);
3686 II.setSource(AdjustedPtr);
3687 II.setSourceAlignment(SliceAlign);
3691 deleteIfTriviallyDead(OldPtr);
3704 (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset ||
3713 if (EmitMemCpy && &OldAI == &NewAI) {
3715 assert(NewBeginOffset == BeginOffset);
3718 if (NewEndOffset != EndOffset)
3719 II.setLength(NewEndOffset - NewBeginOffset);
3723 Pass.DeadInsts.push_back(&
II);
3727 Value *OtherPtr = IsDest ?
II.getRawSource() :
II.getRawDest();
3728 if (AllocaInst *AI =
3730 assert(AI != &OldAI && AI != &NewAI &&
3731 "Splittable transfers cannot reach the same alloca on both ends.");
3732 Pass.Worklist.insert(AI);
3739 unsigned OffsetWidth =
DL.getIndexSizeInBits(OtherAS);
3740 APInt OtherOffset(OffsetWidth, NewBeginOffset - BeginOffset);
3742 (IsDest ?
II.getSourceAlign() :
II.getDestAlign()).valueOrOne();
3744 commonAlignment(OtherAlign, OtherOffset.zextOrTrunc(64).getZExtValue());
3752 Value *OurPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
3753 Type *SizeTy =
II.getLength()->getType();
3754 Constant *
Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset);
3756 Value *DestPtr, *SrcPtr;
3757 MaybeAlign DestAlign, SrcAlign;
3761 DestAlign = SliceAlign;
3763 SrcAlign = OtherAlign;
3766 DestAlign = OtherAlign;
3768 SrcAlign = SliceAlign;
3770 CallInst *
New = IRB.CreateMemCpy(DestPtr, DestAlign, SrcPtr, SrcAlign,
3773 New->setAAMetadata(AATags.
shift(NewBeginOffset - BeginOffset));
3778 &
II, New, DestPtr,
nullptr,
DL);
3783 SliceSize * 8, &
II, New, DestPtr,
nullptr,
DL);
3789 bool IsWholeAlloca = NewBeginOffset == NewAllocaBeginOffset &&
3790 NewEndOffset == NewAllocaEndOffset;
3791 uint64_t
Size = NewEndOffset - NewBeginOffset;
3792 unsigned BeginIndex = VecTy ? getIndex(NewBeginOffset) : 0;
3793 unsigned EndIndex = VecTy ? getIndex(NewEndOffset) : 0;
3794 unsigned NumElements = EndIndex - BeginIndex;
3795 IntegerType *SubIntTy =
3796 IntTy ? Type::getIntNTy(IntTy->
getContext(),
Size * 8) : nullptr;
3801 if (VecTy && !IsWholeAlloca) {
3802 if (NumElements == 1)
3803 OtherTy = VecTy->getElementType();
3806 }
else if (IntTy && !IsWholeAlloca) {
3809 OtherTy = NewAllocaTy;
3814 MaybeAlign SrcAlign = OtherAlign;
3815 MaybeAlign DstAlign = SliceAlign;
3823 DstPtr = getPtrToNewAI(
II.getDestAddressSpace(),
II.isVolatile());
3827 SrcPtr = getPtrToNewAI(
II.getSourceAddressSpace(),
II.isVolatile());
3831 if (VecTy && !IsWholeAlloca && !IsDest) {
3835 }
else if (IntTy && !IsWholeAlloca && !IsDest) {
3839 uint64_t
Offset = NewBeginOffset - NewAllocaBeginOffset;
3842 LoadInst *
Load = IRB.CreateAlignedLoad(OtherTy, SrcPtr, SrcAlign,
3843 II.isVolatile(),
"copyload");
3844 Load->copyMetadata(
II, {LLVMContext::MD_mem_parallel_loop_access,
3845 LLVMContext::MD_access_group});
3852 if (VecTy && !IsWholeAlloca && IsDest) {
3856 }
else if (IntTy && !IsWholeAlloca && IsDest) {
3860 uint64_t
Offset = NewBeginOffset - NewAllocaBeginOffset;
3866 IRB.CreateAlignedStore(Src, DstPtr, DstAlign,
II.isVolatile()));
3867 Store->copyMetadata(
II, {LLVMContext::MD_mem_parallel_loop_access,
3868 LLVMContext::MD_access_group});
3871 Src->getType(),
DL));
3877 Store, DstPtr, Src,
DL);
3882 &
II, Store, DstPtr, Src,
DL);
3886 return !
II.isVolatile();
3889 bool visitIntrinsicInst(IntrinsicInst &
II) {
3890 assert((
II.isLifetimeStartOrEnd() ||
II.isDroppable()) &&
3891 "Unexpected intrinsic!");
3895 Pass.DeadInsts.push_back(&
II);
3897 if (
II.isDroppable()) {
3898 assert(
II.getIntrinsicID() == Intrinsic::assume &&
"Expected assume");
3904 assert(
II.getArgOperand(0) == OldPtr);
3908 if (
II.getIntrinsicID() == Intrinsic::lifetime_start)
3909 New = IRB.CreateLifetimeStart(Ptr);
3911 New = IRB.CreateLifetimeEnd(Ptr);
3919 void fixLoadStoreAlign(Instruction &Root) {
3923 SmallPtrSet<Instruction *, 4> Visited;
3924 SmallVector<Instruction *, 4>
Uses;
3926 Uses.push_back(&Root);
3935 SI->setAlignment(std::min(
SI->getAlign(), getSliceAlign()));
3942 for (User *U :
I->users())
3945 }
while (!
Uses.empty());
3948 bool visitPHINode(PHINode &PN) {
3950 assert(BeginOffset >= NewAllocaBeginOffset &&
"PHIs are unsplittable");
3951 assert(EndOffset <= NewAllocaEndOffset &&
"PHIs are unsplittable");
3957 IRBuilderBase::InsertPointGuard Guard(IRB);
3960 OldPtr->
getParent()->getFirstInsertionPt());
3962 IRB.SetInsertPoint(OldPtr);
3963 IRB.SetCurrentDebugLocation(OldPtr->
getDebugLoc());
3965 Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
3970 deleteIfTriviallyDead(OldPtr);
3973 fixLoadStoreAlign(PN);
3982 bool visitSelectInst(SelectInst &SI) {
3984 assert((
SI.getTrueValue() == OldPtr ||
SI.getFalseValue() == OldPtr) &&
3985 "Pointer isn't an operand!");
3986 assert(BeginOffset >= NewAllocaBeginOffset &&
"Selects are unsplittable");
3987 assert(EndOffset <= NewAllocaEndOffset &&
"Selects are unsplittable");
3989 Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
3991 if (
SI.getOperand(1) == OldPtr)
3992 SI.setOperand(1, NewPtr);
3993 if (
SI.getOperand(2) == OldPtr)
3994 SI.setOperand(2, NewPtr);
3997 deleteIfTriviallyDead(OldPtr);
4000 fixLoadStoreAlign(SI);
4015class AggLoadStoreRewriter :
public InstVisitor<AggLoadStoreRewriter, bool> {
4017 friend class InstVisitor<AggLoadStoreRewriter, bool>;
4023 SmallPtrSet<User *, 8> Visited;
4030 const DataLayout &
DL;
4035 AggLoadStoreRewriter(
const DataLayout &
DL, IRBuilderTy &IRB)
4036 :
DL(
DL), IRB(IRB) {}
4040 bool rewrite(Instruction &
I) {
4044 while (!
Queue.empty()) {
4045 U =
Queue.pop_back_val();
4054 void enqueueUsers(Instruction &
I) {
4055 for (Use &U :
I.uses())
4056 if (Visited.
insert(
U.getUser()).second)
4057 Queue.push_back(&U);
4061 bool visitInstruction(Instruction &
I) {
return false; }
4064 template <
typename Derived>
class OpSplitter {
4071 SmallVector<unsigned, 4> Indices;
4089 const DataLayout &
DL;
4093 OpSplitter(Instruction *InsertionPoint,
Value *Ptr,
Type *BaseTy,
4094 Align BaseAlign,
const DataLayout &
DL, IRBuilderTy &IRB)
4095 : IRB(IRB), GEPIndices(1, IRB.getInt32(0)), Ptr(Ptr), BaseTy(BaseTy),
4096 BaseAlign(BaseAlign),
DL(
DL) {
4097 IRB.SetInsertPoint(InsertionPoint);
4114 void emitSplitOps(
Type *Ty,
Value *&Agg,
const Twine &Name) {
4116 unsigned Offset =
DL.getIndexedOffsetInType(BaseTy, GEPIndices);
4117 return static_cast<Derived *
>(
this)->emitFunc(
4122 unsigned OldSize = Indices.
size();
4124 for (
unsigned Idx = 0,
Size = ATy->getNumElements(); Idx !=
Size;
4126 assert(Indices.
size() == OldSize &&
"Did not return to the old size");
4128 GEPIndices.
push_back(IRB.getInt32(Idx));
4129 emitSplitOps(ATy->getElementType(), Agg, Name +
"." + Twine(Idx));
4137 unsigned OldSize = Indices.
size();
4139 for (
unsigned Idx = 0,
Size = STy->getNumElements(); Idx !=
Size;
4141 assert(Indices.
size() == OldSize &&
"Did not return to the old size");
4143 GEPIndices.
push_back(IRB.getInt32(Idx));
4144 emitSplitOps(STy->getElementType(Idx), Agg, Name +
"." + Twine(Idx));
4155 struct LoadOpSplitter :
public OpSplitter<LoadOpSplitter> {
4164 LoadOpSplitter(Instruction *InsertionPoint,
Value *Ptr,
Type *BaseTy,
4165 AAMDNodes AATags, Align BaseAlign,
const DataLayout &
DL,
4167 : OpSplitter<LoadOpSplitter>(InsertionPoint, Ptr, BaseTy, BaseAlign,
DL,
4173 void emitFunc(
Type *Ty,
Value *&Agg, Align Alignment,
const Twine &Name) {
4177 IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name +
".gep");
4179 IRB.CreateAlignedLoad(Ty,
GEP, Alignment, Name +
".load");
4185 Load->setAAMetadata(
4191 Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name +
".insert");
4196 void recordFakeUses(LoadInst &LI) {
4197 for (Use &U : LI.
uses())
4199 if (
II->getIntrinsicID() == Intrinsic::fake_use)
4205 void emitFakeUses() {
4206 for (Instruction *
I : FakeUses) {
4207 IRB.SetInsertPoint(
I);
4208 for (
auto *V : Components)
4209 IRB.CreateIntrinsic(Intrinsic::fake_use, {
V});
4210 I->eraseFromParent();
4215 bool visitLoadInst(LoadInst &LI) {
4224 Splitter.recordFakeUses(LI);
4227 Splitter.emitFakeUses();
4234 struct StoreOpSplitter :
public OpSplitter<StoreOpSplitter> {
4235 StoreOpSplitter(Instruction *InsertionPoint,
Value *Ptr,
Type *BaseTy,
4236 AAMDNodes AATags, StoreInst *AggStore, Align BaseAlign,
4237 const DataLayout &
DL, IRBuilderTy &IRB)
4238 : OpSplitter<StoreOpSplitter>(InsertionPoint, Ptr, BaseTy, BaseAlign,
4240 AATags(AATags), AggStore(AggStore) {}
4242 StoreInst *AggStore;
4245 void emitFunc(
Type *Ty,
Value *&Agg, Align Alignment,
const Twine &Name) {
4251 Value *ExtractValue =
4252 IRB.CreateExtractValue(Agg, Indices, Name +
".extract");
4253 Value *InBoundsGEP =
4254 IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name +
".gep");
4256 IRB.CreateAlignedStore(ExtractValue, InBoundsGEP, Alignment);
4272 uint64_t SizeInBits =
4273 DL.getTypeSizeInBits(
Store->getValueOperand()->getType());
4275 SizeInBits, AggStore, Store,
4276 Store->getPointerOperand(),
Store->getValueOperand(),
4280 "AT: unexpected debug.assign linked to store through "
4287 bool visitStoreInst(StoreInst &SI) {
4288 if (!
SI.isSimple() ||
SI.getPointerOperand() != *U)
4291 if (
V->getType()->isSingleValueType())
4296 StoreOpSplitter Splitter(&SI, *U,
V->getType(),
SI.getAAMetadata(), &SI,
4298 Splitter.emitSplitOps(
V->getType(), V,
V->getName() +
".fca");
4303 SI.eraseFromParent();
4307 bool visitBitCastInst(BitCastInst &BC) {
4312 bool visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
4322 bool unfoldGEPSelect(GetElementPtrInst &GEPI) {
4341 if (!ZI->getSrcTy()->isIntegerTy(1))
4354 dbgs() <<
" original: " << *Sel <<
"\n";
4355 dbgs() <<
" " << GEPI <<
"\n";);
4357 auto GetNewOps = [&](
Value *SelOp) {
4370 Cond =
SI->getCondition();
4371 True =
SI->getTrueValue();
4372 False =
SI->getFalseValue();
4376 Cond = Sel->getOperand(0);
4377 True = ConstantInt::get(Sel->getType(), 1);
4378 False = ConstantInt::get(Sel->getType(), 0);
4383 IRB.SetInsertPoint(&GEPI);
4387 Value *NTrue = IRB.CreateGEP(Ty, TrueOps[0],
ArrayRef(TrueOps).drop_front(),
4388 True->
getName() +
".sroa.gep", NW);
4391 IRB.CreateGEP(Ty, FalseOps[0],
ArrayRef(FalseOps).drop_front(),
4392 False->
getName() +
".sroa.gep", NW);
4394 Value *NSel = MDFrom
4395 ? IRB.CreateSelect(
Cond, NTrue, NFalse,
4396 Sel->getName() +
".sroa.sel", MDFrom)
4397 : IRB.CreateSelectWithUnknownProfile(
4399 Sel->getName() +
".sroa.sel");
4400 Visited.
erase(&GEPI);
4405 enqueueUsers(*NSelI);
4408 dbgs() <<
" " << *NFalse <<
"\n";
4409 dbgs() <<
" " << *NSel <<
"\n";);
4418 bool unfoldGEPPhi(GetElementPtrInst &GEPI) {
4423 auto IsInvalidPointerOperand = [](
Value *
V) {
4427 return !AI->isStaticAlloca();
4431 if (
any_of(
Phi->operands(), IsInvalidPointerOperand))
4446 [](
Value *V) { return isa<ConstantInt>(V); }))
4459 dbgs() <<
" original: " << *
Phi <<
"\n";
4460 dbgs() <<
" " << GEPI <<
"\n";);
4462 auto GetNewOps = [&](
Value *PhiOp) {
4472 IRB.SetInsertPoint(Phi);
4473 PHINode *NewPhi = IRB.CreatePHI(GEPI.
getType(),
Phi->getNumIncomingValues(),
4474 Phi->getName() +
".sroa.phi");
4480 for (
unsigned I = 0,
E =
Phi->getNumIncomingValues();
I !=
E; ++
I) {
4489 IRB.CreateGEP(SourceTy, NewOps[0],
ArrayRef(NewOps).drop_front(),
4495 Visited.
erase(&GEPI);
4499 enqueueUsers(*NewPhi);
4505 dbgs() <<
"\n " << *NewPhi <<
'\n');
4510 bool visitGetElementPtrInst(GetElementPtrInst &GEPI) {
4511 if (unfoldGEPSelect(GEPI))
4514 if (unfoldGEPPhi(GEPI))
4521 bool visitPHINode(PHINode &PN) {
4526 bool visitSelectInst(SelectInst &SI) {
4540 if (Ty->isSingleValueType())
4543 uint64_t AllocSize =
DL.getTypeAllocSize(Ty).getFixedValue();
4548 InnerTy = ArrTy->getElementType();
4552 InnerTy = STy->getElementType(Index);
4557 if (AllocSize >
DL.getTypeAllocSize(InnerTy).getFixedValue() ||
4558 TypeSize >
DL.getTypeSizeInBits(InnerTy).getFixedValue())
4579 if (
Offset == 0 &&
DL.getTypeAllocSize(Ty).getFixedValue() ==
Size)
4581 if (
Offset >
DL.getTypeAllocSize(Ty).getFixedValue() ||
4582 (
DL.getTypeAllocSize(Ty).getFixedValue() -
Offset) <
Size)
4589 ElementTy = AT->getElementType();
4590 TyNumElements = AT->getNumElements();
4595 ElementTy = VT->getElementType();
4596 TyNumElements = VT->getNumElements();
4598 uint64_t ElementSize =
DL.getTypeAllocSize(ElementTy).getFixedValue();
4600 if (NumSkippedElements >= TyNumElements)
4602 Offset -= NumSkippedElements * ElementSize;
4614 if (
Size == ElementSize)
4618 if (NumElements * ElementSize !=
Size)
4642 uint64_t ElementSize =
DL.getTypeAllocSize(ElementTy).getFixedValue();
4643 if (
Offset >= ElementSize)
4654 if (
Size == ElementSize)
4661 if (Index == EndIndex)
4671 assert(Index < EndIndex);
4710bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
4724 struct SplitOffsets {
4726 std::vector<uint64_t> Splits;
4728 SmallDenseMap<Instruction *, SplitOffsets, 8> SplitOffsetsMap;
4741 SmallPtrSet<LoadInst *, 8> UnsplittableLoads;
4743 LLVM_DEBUG(
dbgs() <<
" Searching for candidate loads and stores\n");
4744 for (
auto &
P : AS.partitions()) {
4745 for (Slice &S :
P) {
4747 if (!S.isSplittable() || S.endOffset() <=
P.endOffset()) {
4752 UnsplittableLoads.
insert(LI);
4755 UnsplittableLoads.
insert(LI);
4758 assert(
P.endOffset() > S.beginOffset() &&
4759 "Empty or backwards partition!");
4768 auto IsLoadSimplyStored = [](LoadInst *LI) {
4769 for (User *LU : LI->
users()) {
4771 if (!SI || !
SI->isSimple())
4776 if (!IsLoadSimplyStored(LI)) {
4777 UnsplittableLoads.
insert(LI);
4783 if (S.getUse() != &
SI->getOperandUse(
SI->getPointerOperandIndex()))
4787 if (!StoredLoad || !StoredLoad->isSimple())
4789 assert(!
SI->isVolatile() &&
"Cannot split volatile stores!");
4799 auto &
Offsets = SplitOffsetsMap[
I];
4801 "Should not have splits the first time we see an instruction!");
4803 Offsets.Splits.push_back(
P.endOffset() - S.beginOffset());
4808 for (Slice *S :
P.splitSliceTails()) {
4809 auto SplitOffsetsMapI =
4811 if (SplitOffsetsMapI == SplitOffsetsMap.
end())
4813 auto &
Offsets = SplitOffsetsMapI->second;
4817 "Cannot have an empty set of splits on the second partition!");
4819 P.beginOffset() -
Offsets.S->beginOffset() &&
4820 "Previous split does not end where this one begins!");
4824 if (S->endOffset() >
P.endOffset())
4833 llvm::erase_if(Stores, [&UnsplittableLoads, &SplitOffsetsMap](StoreInst *SI) {
4839 if (UnsplittableLoads.
count(LI))
4842 auto LoadOffsetsI = SplitOffsetsMap.
find(LI);
4843 if (LoadOffsetsI == SplitOffsetsMap.
end())
4845 auto &LoadOffsets = LoadOffsetsI->second;
4848 auto &StoreOffsets = SplitOffsetsMap[
SI];
4853 if (LoadOffsets.Splits == StoreOffsets.Splits)
4857 <<
" " << *LI <<
"\n"
4858 <<
" " << *SI <<
"\n");
4864 UnsplittableLoads.
insert(LI);
4873 return UnsplittableLoads.
count(LI);
4878 return UnsplittableLoads.
count(LI);
4888 IRBuilderTy IRB(&AI);
4895 SmallPtrSet<AllocaInst *, 4> ResplitPromotableAllocas;
4905 SmallDenseMap<LoadInst *, std::vector<LoadInst *>, 1> SplitLoadsMap;
4906 std::vector<LoadInst *> SplitLoads;
4907 const DataLayout &
DL = AI.getDataLayout();
4908 for (LoadInst *LI : Loads) {
4911 auto &
Offsets = SplitOffsetsMap[LI];
4912 unsigned SliceSize =
Offsets.S->endOffset() -
Offsets.S->beginOffset();
4914 "Load must have type size equal to store size");
4916 "Load must be >= slice size");
4918 uint64_t BaseOffset =
Offsets.S->beginOffset();
4919 assert(BaseOffset + SliceSize > BaseOffset &&
4920 "Cannot represent alloca access size using 64-bit integers!");
4923 IRB.SetInsertPoint(LI);
4927 uint64_t PartOffset = 0, PartSize =
Offsets.Splits.front();
4930 auto *PartTy = Type::getIntNTy(LI->
getContext(), PartSize * 8);
4933 LoadInst *PLoad = IRB.CreateAlignedLoad(
4936 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
4937 PartPtrTy,
BasePtr->getName() +
"."),
4940 PLoad->
copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
4941 LLVMContext::MD_access_group});
4945 SplitLoads.push_back(PLoad);
4949 Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
4953 <<
", " << NewSlices.
back().endOffset()
4954 <<
"): " << *PLoad <<
"\n");
4961 PartOffset =
Offsets.Splits[Idx];
4963 PartSize = (Idx <
Size ?
Offsets.Splits[Idx] : SliceSize) - PartOffset;
4969 bool DeferredStores =
false;
4970 for (User *LU : LI->
users()) {
4972 if (!Stores.
empty() && SplitOffsetsMap.
count(SI)) {
4973 DeferredStores =
true;
4979 Value *StoreBasePtr =
SI->getPointerOperand();
4980 IRB.SetInsertPoint(SI);
4981 AAMDNodes AATags =
SI->getAAMetadata();
4983 LLVM_DEBUG(
dbgs() <<
" Splitting store of load: " << *SI <<
"\n");
4985 for (
int Idx = 0,
Size = SplitLoads.size(); Idx <
Size; ++Idx) {
4986 LoadInst *PLoad = SplitLoads[Idx];
4987 uint64_t PartOffset = Idx == 0 ? 0 :
Offsets.Splits[Idx - 1];
4988 auto *PartPtrTy =
SI->getPointerOperandType();
4990 auto AS =
SI->getPointerAddressSpace();
4991 StoreInst *PStore = IRB.CreateAlignedStore(
4994 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
4995 PartPtrTy, StoreBasePtr->
getName() +
"."),
4998 PStore->
copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access,
4999 LLVMContext::MD_access_group,
5000 LLVMContext::MD_DIAssignID});
5005 LLVM_DEBUG(
dbgs() <<
" +" << PartOffset <<
":" << *PStore <<
"\n");
5013 ResplitPromotableAllocas.
insert(OtherAI);
5014 Worklist.insert(OtherAI);
5017 Worklist.insert(OtherAI);
5021 DeadInsts.push_back(SI);
5026 SplitLoadsMap.
insert(std::make_pair(LI, std::move(SplitLoads)));
5029 DeadInsts.push_back(LI);
5038 for (StoreInst *SI : Stores) {
5043 assert(StoreSize > 0 &&
"Cannot have a zero-sized integer store!");
5047 "Slice size should always match load size exactly!");
5048 uint64_t BaseOffset =
Offsets.S->beginOffset();
5049 assert(BaseOffset + StoreSize > BaseOffset &&
5050 "Cannot represent alloca access size using 64-bit integers!");
5058 auto SplitLoadsMapI = SplitLoadsMap.
find(LI);
5059 std::vector<LoadInst *> *SplitLoads =
nullptr;
5060 if (SplitLoadsMapI != SplitLoadsMap.
end()) {
5061 SplitLoads = &SplitLoadsMapI->second;
5063 "Too few split loads for the number of splits in the store!");
5068 uint64_t PartOffset = 0, PartSize =
Offsets.Splits.front();
5071 auto *PartTy = Type::getIntNTy(Ty->
getContext(), PartSize * 8);
5073 auto *StorePartPtrTy =
SI->getPointerOperandType();
5078 PLoad = (*SplitLoads)[Idx];
5080 IRB.SetInsertPoint(LI);
5082 PLoad = IRB.CreateAlignedLoad(
5085 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
5086 LoadPartPtrTy, LoadBasePtr->
getName() +
"."),
5089 PLoad->
copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
5090 LLVMContext::MD_access_group});
5094 IRB.SetInsertPoint(SI);
5095 auto AS =
SI->getPointerAddressSpace();
5096 StoreInst *PStore = IRB.CreateAlignedStore(
5099 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
5100 StorePartPtrTy, StoreBasePtr->
getName() +
"."),
5103 PStore->
copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access,
5104 LLVMContext::MD_access_group});
5108 Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
5112 <<
", " << NewSlices.
back().endOffset()
5113 <<
"): " << *PStore <<
"\n");
5123 PartOffset =
Offsets.Splits[Idx];
5125 PartSize = (Idx <
Size ?
Offsets.Splits[Idx] : StoreSize) - PartOffset;
5135 assert(OtherAI != &AI &&
"We can't re-split our own alloca!");
5136 ResplitPromotableAllocas.
insert(OtherAI);
5137 Worklist.insert(OtherAI);
5140 assert(OtherAI != &AI &&
"We can't re-split our own alloca!");
5141 Worklist.insert(OtherAI);
5156 DeadInsts.push_back(LI);
5158 DeadInsts.push_back(SI);
5167 AS.insert(NewSlices);
5171 for (
auto I = AS.begin(),
E = AS.end();
I !=
E; ++
I)
5177 PromotableAllocas.set_subtract(ResplitPromotableAllocas);
5192AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
5197 Type *SliceTy =
nullptr;
5198 const DataLayout &
DL = AI.getDataLayout();
5199 unsigned VScale = AI.getFunction()->getVScaleValue();
5201 std::pair<Type *, IntegerType *> CommonUseTy =
5204 if (CommonUseTy.first) {
5205 TypeSize CommonUseSize =
DL.getTypeAllocSize(CommonUseTy.first);
5207 SliceTy = CommonUseTy.first;
5212 P.beginOffset(),
P.
size()))
5213 SliceTy = TypePartitionTy;
5216 if (!SliceTy && CommonUseTy.second)
5217 if (
DL.getTypeAllocSize(CommonUseTy.second).getFixedValue() >=
P.
size())
5218 SliceTy = CommonUseTy.second;
5219 if ((!SliceTy || (SliceTy->
isArrayTy() &&
5221 DL.isLegalInteger(
P.size() * 8)) {
5222 SliceTy = Type::getIntNTy(*
C,
P.size() * 8);
5226 SliceTy = ArrayType::get(Type::getInt8Ty(*
C),
P.size());
5227 assert(
DL.getTypeAllocSize(SliceTy).getFixedValue() >=
P.
size());
5243 if (SliceTy == AI.getAllocatedType() &&
P.beginOffset() == 0) {
5253 const bool IsUnconstrained = Alignment <=
DL.getABITypeAlign(SliceTy);
5254 NewAI =
new AllocaInst(
5255 SliceTy, AI.getAddressSpace(),
nullptr,
5256 IsUnconstrained ?
DL.getPrefTypeAlign(SliceTy) : Alignment,
5257 AI.
getName() +
".sroa." + Twine(
P.begin() - AS.begin()),
5264 LLVM_DEBUG(
dbgs() <<
"Rewriting alloca partition " <<
"[" <<
P.beginOffset()
5265 <<
"," <<
P.endOffset() <<
") to: " << *NewAI <<
"\n");
5270 unsigned PPWOldSize = PostPromotionWorklist.size();
5271 unsigned NumUses = 0;
5272 SmallSetVector<PHINode *, 8> PHIUsers;
5273 SmallSetVector<SelectInst *, 8> SelectUsers;
5275 AllocaSliceRewriter
Rewriter(
DL, AS, *
this, AI, *NewAI,
P.beginOffset(),
5276 P.endOffset(), IsIntegerPromotable, VecTy,
5277 PHIUsers, SelectUsers);
5278 bool Promotable =
true;
5280 if (
auto DeletedValues =
Rewriter.rewriteTreeStructuredMerge(
P)) {
5281 NumUses += DeletedValues->
size() + 1;
5282 for (
Value *V : *DeletedValues)
5283 DeadInsts.push_back(V);
5285 for (Slice *S :
P.splitSliceTails()) {
5289 for (Slice &S :
P) {
5295 NumAllocaPartitionUses += NumUses;
5296 MaxUsesPerAllocaPartition.updateMax(NumUses);
5300 for (PHINode *
PHI : PHIUsers)
5304 SelectUsers.
clear();
5309 NewSelectsToRewrite;
5311 for (SelectInst *Sel : SelectUsers) {
5312 std::optional<RewriteableMemOps>
Ops =
5317 SelectUsers.clear();
5318 NewSelectsToRewrite.
clear();
5325 for (Use *U : AS.getDeadUsesIfPromotable()) {
5327 Value::dropDroppableUse(*U);
5330 DeadInsts.push_back(OldInst);
5332 if (PHIUsers.empty() && SelectUsers.empty()) {
5334 PromotableAllocas.insert(NewAI);
5339 SpeculatablePHIs.insert_range(PHIUsers);
5340 SelectsToRewrite.reserve(SelectsToRewrite.size() +
5341 NewSelectsToRewrite.
size());
5343 std::make_move_iterator(NewSelectsToRewrite.
begin()),
5344 std::make_move_iterator(NewSelectsToRewrite.
end())))
5345 SelectsToRewrite.insert(std::move(KV));
5346 Worklist.insert(NewAI);
5350 while (PostPromotionWorklist.size() > PPWOldSize)
5351 PostPromotionWorklist.pop_back();
5361 Worklist.insert(NewAI);
5408 int64_t BitExtractOffset) {
5410 bool HasFragment =
false;
5411 bool HasBitExtract =
false;
5420 HasBitExtract =
true;
5421 int64_t ExtractOffsetInBits =
Op.getArg(0);
5422 int64_t ExtractSizeInBits =
Op.getArg(1);
5431 assert(BitExtractOffset <= 0);
5432 int64_t AdjustedOffset = ExtractOffsetInBits + BitExtractOffset;
5438 if (AdjustedOffset < 0)
5441 Ops.push_back(
Op.getOp());
5442 Ops.push_back(std::max<int64_t>(0, AdjustedOffset));
5443 Ops.push_back(ExtractSizeInBits);
5446 Op.appendToVector(
Ops);
5451 if (HasFragment && HasBitExtract)
5454 if (!HasBitExtract) {
5473 std::optional<DIExpression::FragmentInfo> NewFragment,
5474 int64_t BitExtractAdjustment) {
5484 BitExtractAdjustment);
5485 if (!NewFragmentExpr)
5491 BeforeInst->
getParent()->insertDbgRecordBefore(DVR,
5504 BeforeInst->
getParent()->insertDbgRecordBefore(DVR,
5510 if (!NewAddr->
hasMetadata(LLVMContext::MD_DIAssignID)) {
5518 LLVM_DEBUG(
dbgs() <<
"Created new DVRAssign: " << *NewAssign <<
"\n");
5524bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
5525 if (AS.begin() == AS.end())
5528 unsigned NumPartitions = 0;
5530 const DataLayout &
DL = AI.getModule()->getDataLayout();
5533 Changed |= presplitLoadsAndStores(AI, AS);
5541 bool IsSorted =
true;
5543 uint64_t AllocaSize =
5544 DL.getTypeAllocSize(AI.getAllocatedType()).getFixedValue();
5545 const uint64_t MaxBitVectorSize = 1024;
5546 if (AllocaSize <= MaxBitVectorSize) {
5549 SmallBitVector SplittableOffset(AllocaSize + 1,
true);
5551 for (
unsigned O = S.beginOffset() + 1;
5552 O < S.endOffset() && O < AllocaSize; O++)
5553 SplittableOffset.reset(O);
5555 for (Slice &S : AS) {
5556 if (!S.isSplittable())
5559 if ((S.beginOffset() > AllocaSize || SplittableOffset[S.beginOffset()]) &&
5560 (S.endOffset() > AllocaSize || SplittableOffset[S.endOffset()]))
5565 S.makeUnsplittable();
5572 for (Slice &S : AS) {
5573 if (!S.isSplittable())
5576 if (S.beginOffset() == 0 && S.endOffset() >= AllocaSize)
5581 S.makeUnsplittable();
5596 Fragment(AllocaInst *AI, uint64_t O, uint64_t S)
5602 for (
auto &
P : AS.partitions()) {
5603 if (AllocaInst *NewAI = rewritePartition(AI, AS, P)) {
5606 uint64_t SizeOfByte = 8;
5607 uint64_t AllocaSize =
5608 DL.getTypeSizeInBits(NewAI->getAllocatedType()).getFixedValue();
5610 uint64_t Size = std::min(AllocaSize, P.size() * SizeOfByte);
5611 Fragments.push_back(
5612 Fragment(NewAI, P.beginOffset() * SizeOfByte, Size));
5618 NumAllocaPartitions += NumPartitions;
5619 MaxPartitionsPerAlloca.updateMax(NumPartitions);
5623 auto MigrateOne = [&](DbgVariableRecord *DbgVariable) {
5628 const Value *DbgPtr = DbgVariable->getAddress();
5630 DbgVariable->getFragmentOrEntireVariable();
5633 int64_t CurrentExprOffsetInBytes = 0;
5634 SmallVector<uint64_t> PostOffsetOps;
5636 ->extractLeadingOffset(CurrentExprOffsetInBytes, PostOffsetOps))
5640 int64_t ExtractOffsetInBits = 0;
5644 ExtractOffsetInBits =
Op.getArg(0);
5649 DIBuilder DIB(*AI.getModule(),
false);
5650 for (
auto Fragment : Fragments) {
5651 int64_t OffsetFromLocationInBits;
5652 std::optional<DIExpression::FragmentInfo> NewDbgFragment;
5657 DL, &AI, Fragment.Offset, Fragment.Size, DbgPtr,
5658 CurrentExprOffsetInBytes * 8, ExtractOffsetInBits, VarFrag,
5659 NewDbgFragment, OffsetFromLocationInBits))
5665 if (NewDbgFragment && !NewDbgFragment->SizeInBits)
5670 if (!NewDbgFragment)
5671 NewDbgFragment = DbgVariable->getFragment();
5675 int64_t OffestFromNewAllocaInBits =
5676 OffsetFromLocationInBits - ExtractOffsetInBits;
5679 int64_t BitExtractOffset =
5680 std::min<int64_t>(0, OffestFromNewAllocaInBits);
5685 OffestFromNewAllocaInBits =
5686 std::max(int64_t(0), OffestFromNewAllocaInBits);
5692 DIExpression *NewExpr = DIExpression::get(AI.getContext(), PostOffsetOps);
5693 if (OffestFromNewAllocaInBits > 0) {
5694 int64_t OffsetInBytes = (OffestFromNewAllocaInBits + 7) / 8;
5700 auto RemoveOne = [DbgVariable](
auto *OldDII) {
5701 auto SameVariableFragment = [](
const auto *
LHS,
const auto *
RHS) {
5702 return LHS->getVariable() ==
RHS->getVariable() &&
5703 LHS->getDebugLoc()->getInlinedAt() ==
5704 RHS->getDebugLoc()->getInlinedAt();
5706 if (SameVariableFragment(OldDII, DbgVariable))
5707 OldDII->eraseFromParent();
5712 NewDbgFragment, BitExtractOffset);
5726void SROA::clobberUse(Use &U) {
5736 DeadInsts.push_back(OldI);
5758bool SROA::propagateStoredValuesToLoads(AllocaInst &AI, AllocaSlices &AS) {
5763 LLVM_DEBUG(
dbgs() <<
"Attempting to propagate values on " << AI <<
"\n");
5764 bool AllSameAndValid =
true;
5765 Type *PartitionType =
nullptr;
5767 uint64_t BeginOffset = 0;
5768 uint64_t EndOffset = 0;
5770 auto Flush = [&]() {
5771 if (AllSameAndValid && !Insts.
empty()) {
5772 LLVM_DEBUG(
dbgs() <<
"Propagate values on slice [" << BeginOffset <<
", "
5773 << EndOffset <<
")\n");
5775 SSAUpdater
SSA(&NewPHIs);
5777 BasicLoadAndStorePromoter Promoter(Insts,
SSA, PartitionType);
5778 Promoter.run(Insts);
5780 AllSameAndValid =
true;
5781 PartitionType =
nullptr;
5785 for (Slice &S : AS) {
5789 dbgs() <<
"Ignoring slice: ";
5790 AS.print(
dbgs(), &S);
5794 if (S.beginOffset() >= EndOffset) {
5796 BeginOffset = S.beginOffset();
5797 EndOffset = S.endOffset();
5798 }
else if (S.beginOffset() != BeginOffset || S.endOffset() != EndOffset) {
5799 if (AllSameAndValid) {
5801 dbgs() <<
"Slice does not match range [" << BeginOffset <<
", "
5802 << EndOffset <<
")";
5803 AS.print(
dbgs(), &S);
5805 AllSameAndValid =
false;
5807 EndOffset = std::max(EndOffset, S.endOffset());
5814 if (!LI->
isSimple() || (PartitionType && UserTy != PartitionType))
5815 AllSameAndValid =
false;
5816 PartitionType = UserTy;
5819 Type *UserTy =
SI->getValueOperand()->getType();
5820 if (!
SI->isSimple() || (PartitionType && UserTy != PartitionType))
5821 AllSameAndValid =
false;
5822 PartitionType = UserTy;
5825 AllSameAndValid =
false;
5838std::pair<
bool ,
bool >
5839SROA::runOnAlloca(AllocaInst &AI) {
5841 bool CFGChanged =
false;
5844 ++NumAllocasAnalyzed;
5847 if (AI.use_empty()) {
5848 AI.eraseFromParent();
5852 const DataLayout &
DL = AI.getDataLayout();
5855 auto *AT = AI.getAllocatedType();
5856 TypeSize
Size =
DL.getTypeAllocSize(AT);
5857 if (AI.isArrayAllocation() || !AT->isSized() ||
Size.isScalable() ||
5858 Size.getFixedValue() == 0)
5863 IRBuilderTy IRB(&AI);
5864 AggLoadStoreRewriter AggRewriter(
DL, IRB);
5865 Changed |= AggRewriter.rewrite(AI);
5868 AllocaSlices AS(
DL, AI);
5873 if (AS.isEscapedReadOnly()) {
5874 Changed |= propagateStoredValuesToLoads(AI, AS);
5879 for (Instruction *DeadUser : AS.getDeadUsers()) {
5881 for (Use &DeadOp : DeadUser->operands())
5888 DeadInsts.push_back(DeadUser);
5891 for (Use *DeadOp : AS.getDeadOperands()) {
5892 clobberUse(*DeadOp);
5897 if (AS.begin() == AS.end())
5900 Changed |= splitAlloca(AI, AS);
5903 while (!SpeculatablePHIs.empty())
5907 auto RemainingSelectsToRewrite = SelectsToRewrite.takeVector();
5908 while (!RemainingSelectsToRewrite.empty()) {
5909 const auto [
K,
V] = RemainingSelectsToRewrite.pop_back_val();
5926bool SROA::deleteDeadInstructions(
5927 SmallPtrSetImpl<AllocaInst *> &DeletedAllocas) {
5929 while (!DeadInsts.empty()) {
5939 DeletedAllocas.
insert(AI);
5941 OldDII->eraseFromParent();
5947 for (Use &Operand :
I->operands())
5952 DeadInsts.push_back(U);
5956 I->eraseFromParent();
5966bool SROA::promoteAllocas() {
5967 if (PromotableAllocas.empty())
5974 NumPromoted += PromotableAllocas.size();
5975 PromoteMemToReg(PromotableAllocas.getArrayRef(), DTU->getDomTree(), AC);
5978 PromotableAllocas.clear();
5982std::pair<
bool ,
bool > SROA::runSROA(Function &
F) {
5985 const DataLayout &
DL =
F.getDataLayout();
5990 if (
DL.getTypeAllocSize(AI->getAllocatedType()).isScalable() &&
5992 PromotableAllocas.insert(AI);
5994 Worklist.insert(AI);
5999 bool CFGChanged =
false;
6002 SmallPtrSet<AllocaInst *, 4> DeletedAllocas;
6005 while (!Worklist.empty()) {
6006 auto [IterationChanged, IterationCFGChanged] =
6007 runOnAlloca(*Worklist.pop_back_val());
6009 CFGChanged |= IterationCFGChanged;
6011 Changed |= deleteDeadInstructions(DeletedAllocas);
6015 if (!DeletedAllocas.
empty()) {
6016 Worklist.set_subtract(DeletedAllocas);
6017 PostPromotionWorklist.set_subtract(DeletedAllocas);
6018 PromotableAllocas.set_subtract(DeletedAllocas);
6019 DeletedAllocas.
clear();
6025 Worklist = PostPromotionWorklist;
6026 PostPromotionWorklist.clear();
6027 }
while (!Worklist.empty());
6029 assert((!CFGChanged ||
Changed) &&
"Can not only modify the CFG.");
6031 "Should not have modified the CFG when told to preserve it.");
6034 for (
auto &BB :
F) {
6047 SROA(&
F.getContext(), &DTU, &AC, PreserveCFG).runSROA(
F);
6060 OS, MapClassName2PassName);
6082 if (skipFunction(
F))
6085 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
6087 getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
F);
6094 void getAnalysisUsage(AnalysisUsage &AU)
const override {
6101 StringRef getPassName()
const override {
return "SROA"; }
6106char SROALegacyPass::ID = 0;
6114 "Scalar Replacement Of Aggregates",
false,
false)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
static bool runOnFunction(Function &F, bool PostInlining)
This is the interface for a simple mod/ref and alias analysis over globals.
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
This defines the Use class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
print mir2vec MIR2Vec Vocabulary Printer Pass
This file implements a map that provides insertion order iteration.
uint64_t IntrinsicInst * II
PassBuilder PB(Machine, PassOpts->PTO, std::nullopt, &PIC)
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the PointerIntPair class.
This file provides a collection of visitors which walk the (instruction) uses of a pointer.
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static unsigned getNumElements(Type *Ty)
bool isDead(const MachineInstr &MI, const MachineRegisterInfo &MRI)
void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)
static void migrateDebugInfo(AllocaInst *OldAlloca, bool IsSplit, uint64_t OldAllocaOffsetInBits, uint64_t SliceSizeInBits, Instruction *OldInst, Instruction *Inst, Value *Dest, Value *Value, const DataLayout &DL)
Find linked dbg.assign and generate a new one with the correct FragmentInfo.
static VectorType * isVectorPromotionViable(Partition &P, const DataLayout &DL, unsigned VScale)
Test whether the given alloca partitioning and range of slices can be promoted to a vector.
static Align getAdjustedAlignment(Instruction *I, uint64_t Offset)
Compute the adjusted alignment for a load or store from an offset.
static VectorType * checkVectorTypesForPromotion(Partition &P, const DataLayout &DL, SmallVectorImpl< VectorType * > &CandidateTys, bool HaveCommonEltTy, Type *CommonEltTy, bool HaveVecPtrTy, bool HaveCommonVecPtrTy, VectorType *CommonVecPtrTy, unsigned VScale)
Test whether any vector type in CandidateTys is viable for promotion.
static std::pair< Type *, IntegerType * > findCommonType(AllocaSlices::const_iterator B, AllocaSlices::const_iterator E, uint64_t EndOffset)
Walk the range of a partitioning looking for a common type to cover this sequence of slices.
static Type * stripAggregateTypeWrapping(const DataLayout &DL, Type *Ty)
Strip aggregate type wrapping.
static FragCalcResult calculateFragment(DILocalVariable *Variable, uint64_t NewStorageSliceOffsetInBits, uint64_t NewStorageSliceSizeInBits, std::optional< DIExpression::FragmentInfo > StorageFragment, std::optional< DIExpression::FragmentInfo > CurrentFragment, DIExpression::FragmentInfo &Target)
static DIExpression * createOrReplaceFragment(const DIExpression *Expr, DIExpression::FragmentInfo Frag, int64_t BitExtractOffset)
Create or replace an existing fragment in a DIExpression with Frag.
static Value * insertInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *Old, Value *V, uint64_t Offset, const Twine &Name)
static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S, VectorType *Ty, uint64_t ElementSize, const DataLayout &DL, unsigned VScale)
Test whether the given slice use can be promoted to a vector.
static Value * getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr, APInt Offset, Type *PointerTy, const Twine &NamePrefix)
Compute an adjusted pointer from Ptr by Offset bytes where the resulting pointer has PointerTy.
static bool isIntegerWideningViableForSlice(const Slice &S, uint64_t AllocBeginOffset, Type *AllocaTy, const DataLayout &DL, bool &WholeAllocaOp)
Test whether a slice of an alloca is valid for integer widening.
static Value * extractVector(IRBuilderTy &IRB, Value *V, unsigned BeginIndex, unsigned EndIndex, const Twine &Name)
static Value * foldPHINodeOrSelectInst(Instruction &I)
A helper that folds a PHI node or a select.
static bool rewriteSelectInstMemOps(SelectInst &SI, const RewriteableMemOps &Ops, IRBuilderTy &IRB, DomTreeUpdater *DTU)
static void rewriteMemOpOfSelect(SelectInst &SI, T &I, SelectHandSpeculativity Spec, DomTreeUpdater &DTU)
static Value * foldSelectInst(SelectInst &SI)
bool isKillAddress(const DbgVariableRecord *DVR)
static Value * insertVector(IRBuilderTy &IRB, Value *Old, Value *V, unsigned BeginIndex, const Twine &Name)
static bool isIntegerWideningViable(Partition &P, Type *AllocaTy, const DataLayout &DL)
Test whether the given alloca partition's integer operations can be widened to promotable ones.
static void speculatePHINodeLoads(IRBuilderTy &IRB, PHINode &PN)
static VectorType * createAndCheckVectorTypesForPromotion(SetVector< Type * > &OtherTys, ArrayRef< VectorType * > CandidateTysCopy, function_ref< void(Type *)> CheckCandidateType, Partition &P, const DataLayout &DL, SmallVectorImpl< VectorType * > &CandidateTys, bool &HaveCommonEltTy, Type *&CommonEltTy, bool &HaveVecPtrTy, bool &HaveCommonVecPtrTy, VectorType *&CommonVecPtrTy, unsigned VScale)
static DebugVariable getAggregateVariable(DbgVariableRecord *DVR)
static bool isSafePHIToSpeculate(PHINode &PN)
PHI instructions that use an alloca and are subsequently loaded can be rewritten to load both input p...
static Value * extractInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *V, IntegerType *Ty, uint64_t Offset, const Twine &Name)
static void insertNewDbgInst(DIBuilder &DIB, DbgVariableRecord *Orig, AllocaInst *NewAddr, DIExpression *NewAddrExpr, Instruction *BeforeInst, std::optional< DIExpression::FragmentInfo > NewFragment, int64_t BitExtractAdjustment)
Insert a new DbgRecord.
static void speculateSelectInstLoads(SelectInst &SI, LoadInst &LI, IRBuilderTy &IRB)
static Value * mergeTwoVectors(Value *V0, Value *V1, const DataLayout &DL, Type *NewAIEltTy, IRBuilder<> &Builder)
This function takes two vector values and combines them into a single vector by concatenating their e...
const DIExpression * getAddressExpression(const DbgVariableRecord *DVR)
static Type * getTypePartition(const DataLayout &DL, Type *Ty, uint64_t Offset, uint64_t Size)
Try to find a partition of the aggregate type passed in for a given offset and size.
static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy, unsigned VScale=0)
Test whether we can convert a value from the old to the new type.
static SelectHandSpeculativity isSafeLoadOfSelectToSpeculate(LoadInst &LI, SelectInst &SI, bool PreserveCFG)
static Value * convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V, Type *NewTy)
Generic routine to convert an SSA value to a value of a different type.
This file provides the interface for LLVM's Scalar Replacement of Aggregates pass.
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static SymbolRef::Type getType(const Symbol *Sym)
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
Virtual Register Rewriter
Builder for the alloca slices.
SliceBuilder(const DataLayout &DL, AllocaInst &AI, AllocaSlices &AS)
An iterator over partitions of the alloca's slices.
bool operator==(const partition_iterator &RHS) const
friend class AllocaSlices
partition_iterator & operator++()
Class for arbitrary precision integers.
an instruction to allocate memory on the stack
LLVM_ABI bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
PointerType * getType() const
Overload to return most specific pointer type.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
const Function * getParent() const
Return the enclosing method, or null if none.
InstListType::iterator iterator
Instruction iterators...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Represents analyses that only rely on functions' control flow.
LLVM_ABI CaptureInfo getCaptureInfo(unsigned OpNo) const
Return which pointer components this operand may capture.
bool onlyReadsMemory(unsigned OpNo) const
bool isDataOperand(const Use *U) const
This is the shared class of boolean and integer constants.
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static DIAssignID * getDistinct(LLVMContext &Context)
LLVM_ABI DbgInstPtr insertDbgAssign(Instruction *LinkedInstr, Value *Val, DILocalVariable *SrcVar, DIExpression *ValExpr, Value *Addr, DIExpression *AddrExpr, const DILocation *DL)
Insert a new llvm.dbg.assign intrinsic call.
iterator_range< expr_op_iterator > expr_ops() const
DbgVariableFragmentInfo FragmentInfo
LLVM_ABI bool startsWithDeref() const
Return whether the first element a DW_OP_deref.
static LLVM_ABI bool calculateFragmentIntersect(const DataLayout &DL, const Value *SliceStart, uint64_t SliceOffsetInBits, uint64_t SliceSizeInBits, const Value *DbgPtr, int64_t DbgPtrOffsetInBits, int64_t DbgExtractOffsetInBits, DIExpression::FragmentInfo VarFrag, std::optional< DIExpression::FragmentInfo > &Result, int64_t &OffsetFromLocationInBits)
Computes a fragment, bit-extract operation if needed, and new constant offset to describe a part of a...
static LLVM_ABI std::optional< DIExpression * > createFragmentExpression(const DIExpression *Expr, unsigned OffsetInBits, unsigned SizeInBits)
Create a DIExpression to describe one part of an aggregate variable that is fragmented across multipl...
static LLVM_ABI DIExpression * prepend(const DIExpression *Expr, uint8_t Flags, int64_t Offset=0)
Prepend DIExpr with a deref and offset operation and optionally turn it into a stack value or/and an ...
A parsed version of the target data layout string in and methods for querying it.
LLVM_ABI void moveBefore(DbgRecord *MoveBefore)
DebugLoc getDebugLoc() const
void setDebugLoc(DebugLoc Loc)
Record of a variable value-assignment, aka a non instruction representation of the dbg....
LLVM_ABI void setKillAddress()
Kill the address component.
LLVM_ABI bool isKillLocation() const
LocationType getType() const
LLVM_ABI bool isKillAddress() const
Check whether this kills the address component.
LLVM_ABI void replaceVariableLocationOp(Value *OldValue, Value *NewValue, bool AllowEmpty=false)
Value * getValue(unsigned OpIdx=0) const
static LLVM_ABI DbgVariableRecord * createLinkedDVRAssign(Instruction *LinkedInstr, Value *Val, DILocalVariable *Variable, DIExpression *Expression, Value *Address, DIExpression *AddressExpression, const DILocation *DI)
LLVM_ABI void setAssignId(DIAssignID *New)
DIExpression * getExpression() const
static LLVM_ABI DbgVariableRecord * createDVRDeclare(Value *Address, DILocalVariable *DV, DIExpression *Expr, const DILocation *DI)
static LLVM_ABI DbgVariableRecord * createDbgVariableRecord(Value *Location, DILocalVariable *DV, DIExpression *Expr, const DILocation *DI)
DILocalVariable * getVariable() const
LLVM_ABI void setKillLocation()
bool isDbgDeclare() const
void setAddress(Value *V)
DIExpression * getAddressExpression() const
LLVM_ABI DILocation * getInlinedAt() const
Identifies a unique instance of a variable.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Analysis pass which computes a DominatorTree.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Class to represent fixed width SIMD vectors.
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
FunctionPass class - This class is used to implement most global optimizations.
unsigned getVScaleValue() const
Return the value for vscale based on the vscale_range attribute or 0 when unknown.
const BasicBlock & getEntryBlock() const
LLVM_ABI bool accumulateConstantOffset(const DataLayout &DL, APInt &Offset, function_ref< bool(Value &, APInt &)> ExternalAnalysis=nullptr) const
Accumulate the constant address offset of this GEP if possible.
Value * getPointerOperand()
iterator_range< op_iterator > indices()
Type * getSourceElementType() const
LLVM_ABI GEPNoWrapFlags getNoWrapFlags() const
Get the nowrap flags for the GEP instruction.
This provides the default implementation of the IRBuilder 'InsertHelper' method that is called whenev...
virtual void InsertHelper(Instruction *I, const Twine &Name, BasicBlock::iterator InsertPt) const
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Base class for instruction visitors.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
bool hasMetadata() const
Return true if this instruction has any metadata attached to it.
LLVM_ABI bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Class to represent integer types.
@ MAX_INT_BITS
Maximum number of bits that can be specified.
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
An instruction for reading from memory.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
void setAlignment(Align Align)
Value * getPointerOperand()
bool isVolatile() const
Return true if this is a load from a volatile memory location.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Type * getPointerOperandType() const
static unsigned getPointerOperandIndex()
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Align getAlign() const
Return the alignment of the access that is being performed.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
LLVMContext & getContext() const
LLVM_ABI StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
This is the common base class for memset/memcpy/memmove.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
PointerIntPair - This class implements a pair of a pointer and small integer.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
PreservedAnalyses & preserve()
Mark an analysis as preserved.
PtrUseVisitor(const DataLayout &DL)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Run the pass over the function.
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
SROAPass(SROAOptions PreserveCFG)
If PreserveCFG is set, then the pass is not allowed to modify CFG in any way, even if it would update...
Helper class for SSA formation on a set of values defined in multiple blocks.
This class represents the LLVM 'select' instruction.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
void clear()
Completely clear the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
typename SuperClass::const_iterator const_iterator
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAlignment(Align Align)
Value * getValueOperand()
static unsigned getPointerOperandIndex()
Value * getPointerOperand()
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
StringRef - Represent a constant reference to a string, i.e.
static constexpr size_t npos
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
size_t rfind(char C, size_t From=npos) const
Search for the last character C in the string.
size_t find(char C, size_t From=0) const
Search for the first character C in the string.
LLVM_ABI size_t find_first_not_of(char C, size_t From=0) const
Find the first character in the string that is not C or npos if not found.
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
TypeSize getSizeInBytes() const
LLVM_ABI unsigned getElementContainingOffset(uint64_t FixedOffset) const
Given a valid byte offset into the structure, returns the structure index that contains it.
TypeSize getElementOffset(unsigned Idx) const
TypeSize getSizeInBits() const
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
element_iterator element_end() const
element_iterator element_begin() const
Type * getElementType(unsigned N) const
Type::subtype_iterator element_iterator
Target - Wrapper for Target specific information.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM_ABI unsigned getIntegerBitWidth() const
bool isArrayTy() const
True if this is an instance of ArrayType.
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
bool isPointerTy() const
True if this is an instance of PointerType.
Type * getArrayElementType() const
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
bool isStructTy() const
True if this is an instance of StructType.
bool isTargetExtTy() const
Return true if this is a target extension type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
const Use & getOperandUse(unsigned i) const
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI const Value * stripInBoundsOffsets(function_ref< void(const Value *)> Func=[](const Value *) {}) const
Strip off pointer casts and inbounds GEPs.
iterator_range< user_iterator > users()
LLVM_ABI void dropDroppableUsesIn(User &Usr)
Remove every use of this value in User that can safely be removed.
LLVM_ABI const Value * stripAndAccumulateConstantOffsets(const DataLayout &DL, APInt &Offset, bool AllowNonInbounds, bool AllowInvariantGroup=false, function_ref< bool(Value &Value, APInt &Offset)> ExternalAnalysis=nullptr, bool LookThroughIntToPtr=false) const
Accumulate the constant offset this value has compared to a base pointer.
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
iterator_range< use_iterator > uses()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
static VectorType * getWithSizeAndScalar(VectorType *SizeTy, Type *EltTy)
This static method attempts to construct a VectorType with the same size-in-bits as SizeTy but with a...
static LLVM_ABI bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
CRTP base class which implements the entire standard iterator facade in terms of a minimal subset of ...
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Return a range of dbg_assign records for which Inst performs the assignment they encode.
LLVM_ABI void deleteAssignmentMarkers(const Instruction *Inst)
Delete the llvm.dbg.assign intrinsics linked to Inst.
initializer< Ty > init(const Ty &Val)
@ DW_OP_LLVM_extract_bits_zext
Only used in LLVM metadata.
@ DW_OP_LLVM_fragment
Only used in LLVM metadata.
@ DW_OP_LLVM_extract_bits_sext
Only used in LLVM metadata.
@ User
could "use" a pointer
NodeAddr< PhiNode * > Phi
NodeAddr< UseNode * > Use
Context & getContext() const
friend class Instruction
Iterator for Instructions in a `BasicBlock.
LLVM_ABI iterator begin() const
This is an optimization pass for GlobalISel generic memory operations.
static cl::opt< bool > SROASkipMem2Reg("sroa-skip-mem2reg", cl::init(false), cl::Hidden)
Disable running mem2reg during SROA in order to test or debug SROA.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
bool operator<(int64_t V1, const APSInt &V2)
FunctionAddr VTableAddr Value
void stable_sort(R &&Range)
LLVM_ABI bool RemoveRedundantDbgInstrs(BasicBlock *BB)
Try to remove redundant dbg.value instructions from given basic block.
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI void PromoteMemToReg(ArrayRef< AllocaInst * > Allocas, DominatorTree &DT, AssumptionCache *AC=nullptr)
Promote the specified list of alloca instructions into scalar registers, inserting PHI nodes as appro...
LLVM_ABI bool isAssumeLikeIntrinsic(const Instruction *I)
Return true if it is an intrinsic that cannot be speculated but also cannot trap.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
auto successors(const MachineBasicBlock *BB)
bool operator!=(uint64_t V1, const APInt &V2)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI std::optional< RegOrConstant > getVectorSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI)
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
auto unique(Range &&R, Predicate P)
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
LLVM_ABI bool isAllocaPromotable(const AllocaInst *AI)
Return true if this alloca is legal for promotion.
auto dyn_cast_or_null(const Y &Val)
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
bool capturesFullProvenance(CaptureComponents CC)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
LLVM_ABI bool isSafeToLoadUnconditionally(Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, Instruction *ScanFrom, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI void initializeSROALegacyPassPass(PassRegistry &)
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
LLVM_ABI TinyPtrVector< DbgVariableRecord * > findDVRValues(Value *V)
As above, for DVRValues.
LLVM_ABI void llvm_unreachable_internal(const char *msg=nullptr, const char *file=nullptr, unsigned line=0)
This function calls abort(), and prints the optional message to stderr.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
constexpr int PoisonMaskElem
iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI bool isAssignmentTrackingEnabled(const Module &M)
Return true if assignment tracking is enabled for module M.
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
LLVM_ABI TinyPtrVector< DbgVariableRecord * > findDVRDeclares(Value *V)
Finds dbg.declare records declaring local variables as living in the memory that 'V' points to.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
cl::opt< bool > ProfcheckDisableMetadataFixes("profcheck-disable-metadata-fixes", cl::Hidden, cl::init(false), cl::desc("Disable metadata propagation fixes discovered through Issue #147390"))
LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI FunctionPass * createSROAPass(bool PreserveCFG=true)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
AAMDNodes shift(size_t Offset) const
Create a new AAMDNode that describes this AAMDNode after applying a constant offset to the start of t...
LLVM_ABI AAMDNodes adjustForAccess(unsigned AccessSize)
Create a new AAMDNode for accessing AccessSize bytes of this AAMDNode.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Describes an element of a Bitfield.
static Bitfield::Type get(StorageType Packed)
Unpacks the field from the Packed value.
static void set(StorageType &Packed, typename Bitfield::Type Value)
Sets the typed value in the provided Packed value.
A CRTP mix-in to automatically provide informational APIs needed for passes.