44 #include "llvm/Config/llvm-config.h"
100 #define DEBUG_TYPE "sroa"
102 STATISTIC(NumAllocasAnalyzed,
"Number of allocas analyzed for replacement");
103 STATISTIC(NumAllocaPartitions,
"Number of alloca partitions formed");
104 STATISTIC(MaxPartitionsPerAlloca,
"Maximum number of partitions per alloca");
105 STATISTIC(NumAllocaPartitionUses,
"Number of alloca partition uses rewritten");
106 STATISTIC(MaxUsesPerAllocaPartition,
"Maximum number of uses of a partition");
107 STATISTIC(NumNewAllocas,
"Number of new, smaller allocas introduced");
108 STATISTIC(NumPromoted,
"Number of allocas promoted to SSA values");
109 STATISTIC(NumLoadsSpeculated,
"Number of loads speculated to allow promotion");
110 STATISTIC(NumDeleted,
"Number of instructions deleted");
111 STATISTIC(NumVectorized,
"Number of vectorized aggregates");
150 uint64_t BeginOffset = 0;
153 uint64_t EndOffset = 0;
162 Slice(uint64_t BeginOffset, uint64_t EndOffset,
Use *U,
bool IsSplittable)
163 : BeginOffset(BeginOffset), EndOffset(EndOffset),
164 UseAndIsSplittable(U, IsSplittable) {}
166 uint64_t beginOffset()
const {
return BeginOffset; }
167 uint64_t endOffset()
const {
return EndOffset; }
169 bool isSplittable()
const {
return UseAndIsSplittable.
getInt(); }
170 void makeUnsplittable() { UseAndIsSplittable.
setInt(
false); }
172 Use *getUse()
const {
return UseAndIsSplittable.
getPointer(); }
174 bool isDead()
const {
return getUse() ==
nullptr; }
175 void kill() { UseAndIsSplittable.
setPointer(
nullptr); }
184 if (beginOffset() < RHS.beginOffset())
186 if (beginOffset() > RHS.beginOffset())
188 if (isSplittable() != RHS.isSplittable())
189 return !isSplittable();
190 if (endOffset() > RHS.endOffset())
197 uint64_t RHSOffset) {
198 return LHS.beginOffset() < RHSOffset;
202 return LHSOffset < RHS.beginOffset();
206 return isSplittable() == RHS.isSplittable() &&
207 beginOffset() == RHS.beginOffset() && endOffset() == RHS.endOffset();
256 int OldSize = Slices.size();
258 auto SliceI = Slices.begin() + OldSize;
260 std::inplace_merge(Slices.begin(), SliceI, Slices.end());
265 class partition_iterator;
273 return DeadUseIfPromotable;
284 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
296 template <
typename DerivedT,
typename RetT =
void>
class BuilderBase;
301 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
362 uint64_t BeginOffset = 0, EndOffset = 0;
389 assert(BeginOffset < EndOffset &&
"Partitions must span some bytes!");
390 return EndOffset - BeginOffset;
407 iterator
end()
const {
return SJ; }
441 uint64_t MaxSplitSliceEndOffset = 0;
457 assert((
P.SI != SE || !
P.SplitTails.empty()) &&
458 "Cannot advance past the end of the slices!");
461 if (!
P.SplitTails.empty()) {
462 if (
P.EndOffset >= MaxSplitSliceEndOffset) {
464 P.SplitTails.clear();
465 MaxSplitSliceEndOffset = 0;
471 [&](Slice *
S) { return S->endOffset() <= P.EndOffset; });
474 return S->endOffset() == MaxSplitSliceEndOffset;
476 "Could not find the current max split slice offset!");
479 return S->endOffset() <= MaxSplitSliceEndOffset;
481 "Max split slice end offset is not actually the max!");
488 assert(
P.SplitTails.empty() &&
"Failed to clear the split slices!");
498 if (
S.isSplittable() &&
S.endOffset() >
P.EndOffset) {
499 P.SplitTails.push_back(&
S);
500 MaxSplitSliceEndOffset =
501 std::max(
S.endOffset(), MaxSplitSliceEndOffset);
509 P.BeginOffset =
P.EndOffset;
510 P.EndOffset = MaxSplitSliceEndOffset;
517 if (!
P.SplitTails.empty() &&
P.SI->beginOffset() !=
P.EndOffset &&
518 !
P.SI->isSplittable()) {
519 P.BeginOffset =
P.EndOffset;
520 P.EndOffset =
P.SI->beginOffset();
530 P.BeginOffset =
P.SplitTails.empty() ?
P.SI->beginOffset() :
P.EndOffset;
531 P.EndOffset =
P.SI->endOffset();
536 if (!
P.SI->isSplittable()) {
539 assert(
P.BeginOffset ==
P.SI->beginOffset());
543 while (
P.SJ != SE &&
P.SJ->beginOffset() <
P.EndOffset) {
544 if (!
P.SJ->isSplittable())
545 P.EndOffset =
std::max(
P.EndOffset,
P.SJ->endOffset());
557 assert(
P.SI->isSplittable() &&
"Forming a splittable partition!");
560 while (
P.SJ != SE &&
P.SJ->beginOffset() <
P.EndOffset &&
561 P.SJ->isSplittable()) {
562 P.EndOffset =
std::max(
P.EndOffset,
P.SJ->endOffset());
569 if (
P.SJ != SE &&
P.SJ->beginOffset() <
P.EndOffset) {
571 P.EndOffset =
P.SJ->beginOffset();
578 "End iterators don't match between compared partition iterators!");
585 if (
P.SI == RHS.P.SI &&
P.SplitTails.empty() == RHS.P.SplitTails.empty()) {
587 "Same set of slices formed two different sized partitions!");
588 assert(
P.SplitTails.size() == RHS.P.SplitTails.size() &&
589 "Same slice position with differently sized non-empty split "
613 partition_iterator(
end(),
end()));
620 if (
ConstantInt *CI = dyn_cast<ConstantInt>(
SI.getCondition()))
621 return SI.getOperand(1 + CI->isZero());
622 if (
SI.getOperand(1) ==
SI.getOperand(2))
623 return SI.getOperand(1);
630 if (
PHINode *PN = dyn_cast<PHINode>(&
I)) {
632 return PN->hasConstantValue();
647 const uint64_t AllocSize;
659 AllocSize(
DL.getTypeAllocSize(AI.getAllocatedType()).getFixedSize()),
664 if (VisitedDeadInsts.
insert(&
I).second)
665 AS.DeadUsers.push_back(&
I);
669 bool IsSplittable =
false) {
675 <<
" which has zero size or starts outside of the "
676 << AllocSize <<
" byte alloca:\n"
677 <<
" alloca: " << AS.AI <<
"\n"
678 <<
" use: " <<
I <<
"\n");
679 return markAsDead(
I);
682 uint64_t BeginOffset =
Offset.getZExtValue();
683 uint64_t EndOffset = BeginOffset +
Size;
691 assert(AllocSize >= BeginOffset);
692 if (
Size > AllocSize - BeginOffset) {
694 <<
Offset <<
" to remain within the " << AllocSize
696 <<
" alloca: " << AS.AI <<
"\n"
697 <<
" use: " <<
I <<
"\n");
698 EndOffset = AllocSize;
701 AS.Slices.push_back(Slice(BeginOffset, EndOffset, U, IsSplittable));
706 return markAsDead(BC);
708 return Base::visitBitCastInst(BC);
713 return markAsDead(ASC);
715 return Base::visitAddrSpaceCastInst(ASC);
720 return markAsDead(GEPI);
735 ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
740 if (
StructType *STy = GTI.getStructTypeOrNull()) {
752 DL.getTypeAllocSize(GTI.getIndexedType()).getFixedSize());
758 if (GEPOffset.
ugt(AllocSize))
759 return markAsDead(GEPI);
763 return Base::visitGetElementPtrInst(GEPI);
778 "All simple FCA loads should have been pre-split");
781 return PI.setAborted(&LI);
785 return PI.setAborted(&LI);
787 if (isa<ScalableVectorType>(LI.
getType()))
788 return PI.setAborted(&LI);
790 uint64_t
Size =
DL.getTypeStoreSize(LI.
getType()).getFixedSize();
795 Value *ValOp =
SI.getValueOperand();
797 return PI.setEscapedAndAborted(&
SI);
799 return PI.setAborted(&
SI);
801 if (
SI.isVolatile() &&
802 SI.getPointerAddressSpace() !=
DL.getAllocaAddrSpace())
803 return PI.setAborted(&
SI);
805 if (isa<ScalableVectorType>(ValOp->
getType()))
806 return PI.setAborted(&
SI);
808 uint64_t
Size =
DL.getTypeStoreSize(ValOp->
getType()).getFixedSize();
819 <<
Offset <<
" which extends past the end of the "
820 << AllocSize <<
" byte alloca:\n"
821 <<
" alloca: " << AS.AI <<
"\n"
822 <<
" use: " <<
SI <<
"\n");
823 return markAsDead(
SI);
827 "All simple FCA stores should have been pre-split");
834 if ((Length && Length->
getValue() == 0) ||
835 (IsOffsetKnown &&
Offset.uge(AllocSize)))
837 return markAsDead(II);
840 return PI.setAborted(&II);
845 return PI.setAborted(&II);
848 : AllocSize -
Offset.getLimitedValue(),
854 if (Length && Length->
getValue() == 0)
856 return markAsDead(II);
860 if (VisitedDeadInsts.
count(&II))
864 return PI.setAborted(&II);
871 return PI.setAborted(&II);
878 if (
Offset.uge(AllocSize)) {
880 MemTransferSliceMap.
find(&II);
881 if (MTPI != MemTransferSliceMap.
end())
882 AS.Slices[MTPI->second].kill();
883 return markAsDead(II);
886 uint64_t RawOffset =
Offset.getLimitedValue();
894 return markAsDead(II);
903 std::tie(MTPI, Inserted) =
904 MemTransferSliceMap.
insert(std::make_pair(&II, AS.Slices.size()));
905 unsigned PrevIdx = MTPI->second;
907 Slice &PrevP = AS.Slices[PrevIdx];
911 if (!II.
isVolatile() && PrevP.beginOffset() == RawOffset) {
913 return markAsDead(II);
918 PrevP.makeUnsplittable();
922 insertUse(II,
Offset,
Size, Inserted && Length);
925 assert(AS.Slices[PrevIdx].getUse()->getUser() == &II &&
926 "Map index doesn't point back to a slice with this user.");
935 AS.DeadUseIfPromotable.push_back(U);
940 return PI.setAborted(&II);
955 Base::visitIntrinsicInst(II);
966 Uses.push_back(std::make_pair(cast<Instruction>(*U), Root));
973 std::tie(UsedI,
I) =
Uses.pop_back_val();
975 if (
LoadInst *LI = dyn_cast<LoadInst>(
I)) {
977 DL.getTypeStoreSize(LI->
getType()).getFixedSize());
985 DL.getTypeStoreSize(
Op->getType()).getFixedSize());
990 if (!
GEP->hasAllZeroIndices())
992 }
else if (!isa<BitCastInst>(
I) && !isa<PHINode>(
I) &&
993 !isa<SelectInst>(
I) && !isa<AddrSpaceCastInst>(
I)) {
997 for (
User *U :
I->users())
998 if (Visited.
insert(cast<Instruction>(U)).second)
999 Uses.push_back(std::make_pair(
I, cast<Instruction>(U)));
1000 }
while (!
Uses.empty());
1006 assert(isa<PHINode>(
I) || isa<SelectInst>(
I));
1008 return markAsDead(
I);
1026 AS.DeadOperands.push_back(U);
1032 return PI.setAborted(&
I);
1035 uint64_t &
Size = PHIOrSelectSizes[&
I];
1039 return PI.setAborted(UnsafeI);
1048 if (
Offset.uge(AllocSize)) {
1049 AS.DeadOperands.push_back(U);
1056 void visitPHINode(
PHINode &PN) { visitPHINodeOrSelectInst(PN); }
1058 void visitSelectInst(
SelectInst &
SI) { visitPHINodeOrSelectInst(
SI); }
1061 void visitInstruction(
Instruction &
I) { PI.setAborted(&
I); }
1066 #
if !defined(
NDEBUG) || defined(LLVM_ENABLE_DUMP)
1069 PointerEscapingInstr(nullptr) {
1071 SliceBuilder::PtrInfo PtrI = PB.visitPtr(AI);
1072 if (PtrI.isEscaped() || PtrI.isAborted()) {
1075 PointerEscapingInstr = PtrI.getEscapingInst() ? PtrI.getEscapingInst()
1076 : PtrI.getAbortingInst();
1077 assert(PointerEscapingInstr &&
"Did not track a bad instruction");
1088 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1092 printSlice(OS,
I, Indent);
1094 printUse(OS,
I, Indent);
1097 void AllocaSlices::printSlice(
raw_ostream &OS, const_iterator
I,
1099 OS << Indent <<
"[" <<
I->beginOffset() <<
"," <<
I->endOffset() <<
")"
1100 <<
" slice #" << (
I -
begin())
1101 << (
I->isSplittable() ?
" (splittable)" :
"");
1104 void AllocaSlices::printUse(
raw_ostream &OS, const_iterator
I,
1106 OS << Indent <<
" used by: " << *
I->getUse()->getUser() <<
"\n";
1110 if (PointerEscapingInstr) {
1111 OS <<
"Can't analyze slices for alloca: " << AI <<
"\n"
1112 <<
" A pointer to this alloca escaped by:\n"
1113 <<
" " << *PointerEscapingInstr <<
"\n";
1117 OS <<
"Slices of alloca: " << AI <<
"\n";
1127 #endif // !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1131 static std::pair<Type *, IntegerType *>
1133 uint64_t EndOffset) {
1135 bool TyIsCommon =
true;
1140 for (AllocaSlices::const_iterator
I =
B;
I !=
E; ++
I) {
1141 Use *U =
I->getUse();
1142 if (isa<IntrinsicInst>(*U->getUser()))
1144 if (
I->beginOffset() !=
B->beginOffset() ||
I->endOffset() != EndOffset)
1147 Type *UserTy =
nullptr;
1148 if (
LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
1150 }
else if (
StoreInst *
SI = dyn_cast<StoreInst>(U->getUser())) {
1151 UserTy =
SI->getValueOperand()->getType();
1154 if (
IntegerType *UserITy = dyn_cast_or_null<IntegerType>(UserTy)) {
1159 if (UserITy->getBitWidth() % 8 != 0 ||
1160 UserITy->getBitWidth() / 8 > (EndOffset -
B->beginOffset()))
1165 if (!ITy || ITy->
getBitWidth() < UserITy->getBitWidth())
1171 if (!UserTy || (Ty && Ty != UserTy))
1177 return {TyIsCommon ? Ty :
nullptr, ITy};
1207 uint64_t APWidth =
DL.getIndexTypeSizeInBits(PN.
getType());
1208 APInt MaxSize(APWidth, 0);
1209 bool HaveLoad =
false;
1211 LoadInst *LI = dyn_cast<LoadInst>(U);
1224 if (BBI->mayWriteToMemory())
1227 uint64_t
Size =
DL.getTypeStoreSize(LI->
getType()).getFixedSize();
1271 IRBuilderTy PHIBuilder(&PN);
1273 PN.
getName() +
".sroa.speculated");
1304 IRBuilderTy PredBuilder(TI);
1307 LoadTy, InVal, Alignment,
1309 ++NumLoadsSpeculated;
1311 Load->setAAMetadata(AATags);
1313 InjectedLoads[Pred] =
Load;
1334 Value *TValue =
SI.getTrueValue();
1335 Value *FValue =
SI.getFalseValue();
1338 for (
User *U :
SI.users()) {
1339 LoadInst *LI = dyn_cast<LoadInst>(U);
1360 IRBuilderTy IRB(&
SI);
1361 Value *TV =
SI.getTrueValue();
1362 Value *FV =
SI.getFalseValue();
1364 while (!
SI.use_empty()) {
1365 LoadInst *LI = cast<LoadInst>(
SI.user_back());
1368 IRB.SetInsertPoint(LI);
1370 LI->
getName() +
".sroa.speculate.load.true");
1372 LI->
getName() +
".sroa.speculate.load.false");
1373 NumLoadsSpeculated += 2;
1386 Value *V = IRB.CreateSelect(
SI.getCondition(), TL, FL,
1387 LI->
getName() +
".sroa.speculated");
1393 SI.eraseFromParent();
1402 const Twine &NamePrefix) {
1403 if (Indices.empty())
1408 if (Indices.size() == 1 && cast<ConstantInt>(Indices.back())->isZero())
1411 return IRB.CreateInBoundsGEP(BasePtr->getType()->getPointerElementType(),
1412 BasePtr, Indices, NamePrefix +
"sroa_idx");
1427 const Twine &NamePrefix) {
1429 return buildGEP(IRB, BasePtr, Indices, NamePrefix);
1432 unsigned OffsetSize =
DL.getIndexTypeSizeInBits(BasePtr->getType());
1436 unsigned NumLayers = 0;
1437 Type *ElementTy = Ty;
1442 if (
ArrayType *ArrayTy = dyn_cast<ArrayType>(ElementTy)) {
1443 ElementTy = ArrayTy->getElementType();
1444 Indices.push_back(IRB.getIntN(OffsetSize, 0));
1445 }
else if (
VectorType *VectorTy = dyn_cast<VectorType>(ElementTy)) {
1446 ElementTy = VectorTy->getElementType();
1447 Indices.push_back(IRB.getInt32(0));
1448 }
else if (
StructType *STy = dyn_cast<StructType>(ElementTy)) {
1449 if (STy->element_begin() == STy->element_end())
1451 ElementTy = *STy->element_begin();
1452 Indices.push_back(IRB.getInt32(0));
1457 }
while (ElementTy != TargetTy);
1458 if (ElementTy != TargetTy)
1459 Indices.
erase(Indices.end() - NumLayers, Indices.end());
1461 return buildGEP(IRB, BasePtr, Indices, NamePrefix);
1472 const Twine &NamePrefix) {
1484 if (
VectorType *VecTy = dyn_cast<VectorType>(Ty)) {
1485 unsigned ElementSizeInBits =
1486 DL.getTypeSizeInBits(VecTy->getScalarType()).getFixedSize();
1487 if (ElementSizeInBits % 8 != 0) {
1491 APInt ElementSize(
Offset.getBitWidth(), ElementSizeInBits / 8);
1492 APInt NumSkippedElements =
Offset.sdiv(ElementSize);
1493 if (NumSkippedElements.
ugt(cast<FixedVectorType>(VecTy)->getNumElements()))
1495 Offset -= NumSkippedElements * ElementSize;
1496 Indices.push_back(IRB.getInt(NumSkippedElements));
1498 Offset, TargetTy, Indices, NamePrefix);
1501 if (
ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) {
1502 Type *ElementTy = ArrTy->getElementType();
1504 DL.getTypeAllocSize(ElementTy).getFixedSize());
1505 APInt NumSkippedElements =
Offset.sdiv(ElementSize);
1506 if (NumSkippedElements.
ugt(ArrTy->getNumElements()))
1509 Offset -= NumSkippedElements * ElementSize;
1510 Indices.push_back(IRB.getInt(NumSkippedElements));
1512 Indices, NamePrefix);
1520 uint64_t StructOffset =
Offset.getZExtValue();
1526 if (
Offset.uge(
DL.getTypeAllocSize(ElementTy).getFixedSize()))
1529 Indices.push_back(IRB.getInt32(
Index));
1531 Indices, NamePrefix);
1547 const Twine &NamePrefix) {
1558 if (isa<ScalableVectorType>(ElementTy))
1561 DL.getTypeAllocSize(ElementTy).getFixedSize());
1562 if (ElementSize == 0)
1564 APInt NumSkippedElements =
Offset.sdiv(ElementSize);
1566 Offset -= NumSkippedElements * ElementSize;
1567 Indices.push_back(IRB.getInt(NumSkippedElements));
1569 Indices, NamePrefix);
1589 const Twine &NamePrefix) {
1599 Value *OffsetPtr =
nullptr;
1600 Value *OffsetBasePtr;
1604 Value *Int8Ptr =
nullptr;
1613 auto AS = cast<PointerType>(Ptr->
getType())->getAddressSpace();
1620 if (!
GEP->accumulateConstantOffset(
DL, GEPOffset))
1623 Ptr =
GEP->getPointerOperand();
1624 if (!Visited.
insert(Ptr).second)
1631 Indices, NamePrefix)) {
1635 if (OffsetPtr && OffsetPtr != OffsetBasePtr)
1636 if (
Instruction *
I = dyn_cast<Instruction>(OffsetPtr)) {
1637 assert(
I->use_empty() &&
"Built a GEP with uses some how!");
1638 I->eraseFromParent();
1641 OffsetBasePtr = Ptr;
1655 Ptr = cast<Operator>(Ptr)->getOperand(0);
1656 }
else if (
GlobalAlias *GA = dyn_cast<GlobalAlias>(Ptr)) {
1657 if (GA->isInterposable())
1659 Ptr = GA->getAliasee();
1664 }
while (Visited.
insert(Ptr).second);
1668 Int8Ptr = IRB.CreateBitCast(
1669 Ptr, IRB.getInt8PtrTy(
PointerTy->getPointerAddressSpace()),
1670 NamePrefix +
"sroa_raw_cast");
1674 OffsetPtr = Int8PtrOffset == 0
1676 : IRB.CreateInBoundsGEP(IRB.getInt8Ty(), Int8Ptr,
1677 IRB.getInt(Int8PtrOffset),
1678 NamePrefix +
"sroa_raw_idx");
1683 if (cast<PointerType>(Ptr->
getType()) != TargetPtrTy) {
1684 Ptr = IRB.CreatePointerBitCastOrAddrSpaceCast(Ptr,
1686 NamePrefix +
"sroa_cast");
1710 if (isa<IntegerType>(OldTy) && isa<IntegerType>(NewTy)) {
1713 "We can't have the same bitwidth for different int types");
1717 if (
DL.getTypeSizeInBits(NewTy).getFixedSize() !=
1718 DL.getTypeSizeInBits(OldTy).getFixedSize())
1734 return OldAS == NewAS ||
1735 (!
DL.isNonIntegralAddressSpace(OldAS) &&
1736 !
DL.isNonIntegralAddressSpace(NewAS) &&
1737 DL.getPointerSize(OldAS) ==
DL.getPointerSize(NewAS));
1743 return !
DL.isNonIntegralPointerType(NewTy);
1747 if (!
DL.isNonIntegralPointerType(OldTy))
1770 assert(!(isa<IntegerType>(OldTy) && isa<IntegerType>(NewTy)) &&
1771 "Integer types must be the exact same to convert.");
1779 return IRB.CreateIntToPtr(IRB.CreateBitCast(V,
DL.getIntPtrType(NewTy)),
1789 return IRB.CreateBitCast(IRB.CreatePtrToInt(V,
DL.getIntPtrType(OldTy)),
1802 if (OldAS != NewAS) {
1803 assert(
DL.getPointerSize(OldAS) ==
DL.getPointerSize(NewAS));
1804 return IRB.CreateIntToPtr(IRB.CreatePtrToInt(V,
DL.getIntPtrType(OldTy)),
1809 return IRB.CreateBitCast(V, NewTy);
1818 uint64_t ElementSize,
1821 uint64_t BeginOffset =
1822 std::max(
S.beginOffset(),
P.beginOffset()) -
P.beginOffset();
1823 uint64_t BeginIndex = BeginOffset / ElementSize;
1824 if (BeginIndex * ElementSize != BeginOffset ||
1825 BeginIndex >= cast<FixedVectorType>(Ty)->getNumElements())
1827 uint64_t EndOffset =
1828 std::min(
S.endOffset(),
P.endOffset()) -
P.beginOffset();
1829 uint64_t EndIndex = EndOffset / ElementSize;
1830 if (EndIndex * ElementSize != EndOffset ||
1831 EndIndex > cast<FixedVectorType>(Ty)->getNumElements())
1834 assert(EndIndex > BeginIndex &&
"Empty vector!");
1835 uint64_t NumElements = EndIndex - BeginIndex;
1836 Type *SliceTy = (NumElements == 1)
1841 Type::getIntNTy(Ty->
getContext(), NumElements * ElementSize * 8);
1843 Use *U =
S.getUse();
1846 if (
MI->isVolatile())
1848 if (!
S.isSplittable())
1850 }
else if (
IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
1856 }
else if (
LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
1860 if (
P.beginOffset() >
S.beginOffset() ||
P.endOffset() <
S.endOffset()) {
1866 }
else if (
StoreInst *
SI = dyn_cast<StoreInst>(U->getUser())) {
1867 if (
SI->isVolatile())
1869 Type *STy =
SI->getValueOperand()->getType();
1870 if (
P.beginOffset() >
S.beginOffset() ||
P.endOffset() <
S.endOffset()) {
1896 Type *CommonEltTy =
nullptr;
1897 bool HaveCommonEltTy =
true;
1898 auto CheckCandidateType = [&](
Type *Ty) {
1899 if (
auto *VTy = dyn_cast<VectorType>(Ty)) {
1901 if (!CandidateTys.empty()) {
1903 if (
DL.getTypeSizeInBits(VTy).getFixedSize() !=
1904 DL.getTypeSizeInBits(V).getFixedSize()) {
1905 CandidateTys.
clear();
1909 CandidateTys.push_back(VTy);
1911 CommonEltTy = VTy->getElementType();
1912 else if (CommonEltTy != VTy->getElementType())
1913 HaveCommonEltTy =
false;
1917 for (
const Slice &
S :
P)
1918 if (
S.beginOffset() ==
P.beginOffset() &&
1919 S.endOffset() ==
P.endOffset()) {
1920 if (
auto *LI = dyn_cast<LoadInst>(
S.getUse()->getUser()))
1921 CheckCandidateType(LI->
getType());
1922 else if (
auto *
SI = dyn_cast<StoreInst>(
S.getUse()->getUser()))
1923 CheckCandidateType(
SI->getValueOperand()->getType());
1927 if (CandidateTys.empty())
1934 if (!HaveCommonEltTy) {
1940 if (CandidateTys.empty())
1947 assert(
DL.getTypeSizeInBits(RHSTy).getFixedSize() ==
1948 DL.getTypeSizeInBits(LHSTy).getFixedSize() &&
1949 "Cannot have vector types of different sizes!");
1951 "All non-integer types eliminated!");
1952 assert(LHSTy->getElementType()->isIntegerTy() &&
1953 "All non-integer types eliminated!");
1954 return cast<FixedVectorType>(RHSTy)->getNumElements() <
1955 cast<FixedVectorType>(LHSTy)->getNumElements();
1959 std::unique(CandidateTys.begin(), CandidateTys.end(), RankVectorTypes),
1960 CandidateTys.end());
1966 assert(VTy->getElementType() == CommonEltTy &&
1967 "Unaccounted for element type!");
1968 assert(VTy == CandidateTys[0] &&
1969 "Different vector types with the same element type!");
1972 CandidateTys.resize(1);
1976 auto CheckVectorTypeForPromotion = [&](
VectorType *VTy) {
1977 uint64_t ElementSize =
1978 DL.getTypeSizeInBits(VTy->getElementType()).getFixedSize();
1982 if (ElementSize % 8)
1984 assert((
DL.getTypeSizeInBits(VTy).getFixedSize() % 8) == 0 &&
1985 "vector size not a multiple of element size?");
1988 for (
const Slice &
S :
P)
1992 for (
const Slice *
S :
P.splitSliceTails())
1999 if (CheckVectorTypeForPromotion(VTy))
2010 uint64_t AllocBeginOffset,
2013 bool &WholeAllocaOp) {
2014 uint64_t
Size =
DL.getTypeStoreSize(AllocaTy).getFixedSize();
2016 uint64_t RelBegin =
S.beginOffset() - AllocBeginOffset;
2017 uint64_t RelEnd =
S.endOffset() - AllocBeginOffset;
2024 Use *U =
S.getUse();
2026 if (
LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
2030 if (
DL.getTypeStoreSize(LI->
getType()).getFixedSize() >
Size)
2034 if (
S.beginOffset() < AllocBeginOffset)
2039 if (!isa<VectorType>(LI->
getType()) && RelBegin == 0 && RelEnd ==
Size)
2040 WholeAllocaOp =
true;
2042 if (ITy->getBitWidth() <
DL.getTypeStoreSizeInBits(ITy).getFixedSize())
2044 }
else if (RelBegin != 0 || RelEnd !=
Size ||
2050 }
else if (
StoreInst *
SI = dyn_cast<StoreInst>(U->getUser())) {
2051 Type *ValueTy =
SI->getValueOperand()->getType();
2052 if (
SI->isVolatile())
2055 if (
DL.getTypeStoreSize(ValueTy).getFixedSize() >
Size)
2059 if (
S.beginOffset() < AllocBeginOffset)
2064 if (!isa<VectorType>(ValueTy) && RelBegin == 0 && RelEnd ==
Size)
2065 WholeAllocaOp =
true;
2066 if (
IntegerType *ITy = dyn_cast<IntegerType>(ValueTy)) {
2067 if (ITy->getBitWidth() <
DL.getTypeStoreSizeInBits(ITy).getFixedSize())
2069 }
else if (RelBegin != 0 || RelEnd !=
Size ||
2075 }
else if (
MemIntrinsic *
MI = dyn_cast<MemIntrinsic>(U->getUser())) {
2076 if (
MI->isVolatile() || !isa<Constant>(
MI->getLength()))
2078 if (!
S.isSplittable())
2080 }
else if (
IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
2098 uint64_t SizeInBits =
DL.getTypeSizeInBits(AllocaTy).getFixedSize();
2100 if (SizeInBits > IntegerType::MAX_INT_BITS)
2104 if (SizeInBits !=
DL.getTypeStoreSizeInBits(AllocaTy).getFixedSize())
2110 Type *IntTy = Type::getIntNTy(AllocaTy->
getContext(), SizeInBits);
2122 bool WholeAllocaOp =
P.empty() &&
DL.isLegalInteger(SizeInBits);
2124 for (
const Slice &
S :
P)
2129 for (
const Slice *
S :
P.splitSliceTails())
2134 return WholeAllocaOp;
2143 DL.getTypeStoreSize(IntTy).getFixedSize() &&
2144 "Element extends past full value");
2145 uint64_t ShAmt = 8 *
Offset;
2146 if (
DL.isBigEndian())
2147 ShAmt = 8 * (
DL.getTypeStoreSize(IntTy).getFixedSize() -
2148 DL.getTypeStoreSize(Ty).getFixedSize() -
Offset);
2150 V = IRB.CreateLShr(V, ShAmt,
Name +
".shift");
2154 "Cannot extract to a larger integer!");
2156 V = IRB.CreateTrunc(V, Ty,
Name +
".trunc");
2167 "Cannot insert a larger integer!");
2170 V = IRB.CreateZExt(V, IntTy,
Name +
".ext");
2174 DL.getTypeStoreSize(IntTy).getFixedSize() &&
2175 "Element store outside of alloca store");
2176 uint64_t ShAmt = 8 *
Offset;
2177 if (
DL.isBigEndian())
2178 ShAmt = 8 * (
DL.getTypeStoreSize(IntTy).getFixedSize() -
2179 DL.getTypeStoreSize(Ty).getFixedSize() -
Offset);
2181 V = IRB.CreateShl(V, ShAmt,
Name +
".shift");
2187 Old = IRB.CreateAnd(Old,
Mask,
Name +
".mask");
2189 V = IRB.CreateOr(Old, V,
Name +
".insert");
2197 auto *VecTy = cast<FixedVectorType>(V->
getType());
2198 unsigned NumElements = EndIndex - BeginIndex;
2199 assert(NumElements <= VecTy->getNumElements() &&
"Too many elements!");
2201 if (NumElements == VecTy->getNumElements())
2204 if (NumElements == 1) {
2205 V = IRB.CreateExtractElement(V, IRB.getInt32(BeginIndex),
2212 Mask.reserve(NumElements);
2213 for (
unsigned i = BeginIndex;
i != EndIndex; ++
i)
2215 V = IRB.CreateShuffleVector(V,
Mask,
Name +
".extract");
2221 unsigned BeginIndex,
const Twine &
Name) {
2223 assert(VecTy &&
"Can only insert a vector into a vector");
2228 V = IRB.CreateInsertElement(Old, V, IRB.getInt32(BeginIndex),
2234 assert(cast<FixedVectorType>(Ty)->getNumElements() <=
2235 cast<FixedVectorType>(VecTy)->getNumElements() &&
2236 "Too many elements!");
2237 if (cast<FixedVectorType>(Ty)->getNumElements() ==
2238 cast<FixedVectorType>(VecTy)->getNumElements()) {
2242 unsigned EndIndex = BeginIndex + cast<FixedVectorType>(Ty)->getNumElements();
2249 Mask.reserve(cast<FixedVectorType>(VecTy)->getNumElements());
2250 for (
unsigned i = 0;
i != cast<FixedVectorType>(VecTy)->getNumElements(); ++
i)
2251 if (
i >= BeginIndex &&
i < EndIndex)
2252 Mask.push_back(
i - BeginIndex);
2255 V = IRB.CreateShuffleVector(V,
Mask,
Name +
".expand");
2259 Mask2.
reserve(cast<FixedVectorType>(VecTy)->getNumElements());
2260 for (
unsigned i = 0;
i != cast<FixedVectorType>(VecTy)->getNumElements(); ++
i)
2261 Mask2.push_back(IRB.getInt1(
i >= BeginIndex &&
i < EndIndex));
2286 const uint64_t NewAllocaBeginOffset, NewAllocaEndOffset;
2306 uint64_t ElementSize;
2310 uint64_t BeginOffset = 0;
2311 uint64_t EndOffset = 0;
2315 uint64_t NewBeginOffset = 0, NewEndOffset = 0;
2317 uint64_t SliceSize = 0;
2318 bool IsSplittable =
false;
2319 bool IsSplit =
false;
2320 Use *OldUse =
nullptr;
2334 uint64_t NewAllocaBeginOffset,
2335 uint64_t NewAllocaEndOffset,
bool IsIntegerPromotable,
2339 :
DL(
DL), AS(AS),
Pass(
Pass), OldAI(OldAI), NewAI(NewAI),
2340 NewAllocaBeginOffset(NewAllocaBeginOffset),
2341 NewAllocaEndOffset(NewAllocaEndOffset),
2342 NewAllocaTy(NewAI.getAllocatedType()),
2345 ?
Type::getIntNTy(NewAI.getContext(),
2346 DL.getTypeSizeInBits(NewAI.getAllocatedType())
2349 VecTy(PromotableVecTy),
2350 ElementTy(VecTy ? VecTy->getElementType() : nullptr),
2351 ElementSize(VecTy ?
DL.getTypeSizeInBits(ElementTy).getFixedSize() / 8
2353 PHIUsers(PHIUsers), SelectUsers(SelectUsers),
2356 assert((
DL.getTypeSizeInBits(ElementTy).getFixedSize() % 8) == 0 &&
2357 "Only multiple-of-8 sized vector elements are viable");
2360 assert((!IntTy && !VecTy) || (IntTy && !VecTy) || (!IntTy && VecTy));
2364 bool CanSROA =
true;
2365 BeginOffset =
I->beginOffset();
2366 EndOffset =
I->endOffset();
2367 IsSplittable =
I->isSplittable();
2369 BeginOffset < NewAllocaBeginOffset || EndOffset > NewAllocaEndOffset;
2370 LLVM_DEBUG(
dbgs() <<
" rewriting " << (IsSplit ?
"split " :
""));
2375 assert(BeginOffset < NewAllocaEndOffset);
2376 assert(EndOffset > NewAllocaBeginOffset);
2377 NewBeginOffset =
std::max(BeginOffset, NewAllocaBeginOffset);
2378 NewEndOffset =
std::min(EndOffset, NewAllocaEndOffset);
2380 SliceSize = NewEndOffset - NewBeginOffset;
2382 OldUse =
I->getUse();
2383 OldPtr = cast<Instruction>(OldUse->get());
2385 Instruction *OldUserI = cast<Instruction>(OldUse->getUser());
2386 IRB.SetInsertPoint(OldUserI);
2387 IRB.SetCurrentDebugLocation(OldUserI->
getDebugLoc());
2388 IRB.getInserter().SetNamePrefix(
2391 CanSROA &= visit(cast<Instruction>(OldUse->getUser()));
2410 assert(IsSplit || BeginOffset == NewBeginOffset);
2411 uint64_t
Offset = NewBeginOffset - NewAllocaBeginOffset;
2416 size_t LastSROAPrefix = OldName.
rfind(
".sroa.");
2417 if (LastSROAPrefix != StringRef::npos) {
2418 OldName = OldName.
substr(LastSROAPrefix + strlen(
".sroa."));
2421 if (IndexEnd != StringRef::npos && OldName[IndexEnd] ==
'.') {
2423 OldName = OldName.
substr(IndexEnd + 1);
2425 if (OffsetEnd != StringRef::npos && OldName[OffsetEnd] ==
'.')
2427 OldName = OldName.
substr(OffsetEnd + 1);
2431 OldName = OldName.
substr(0, OldName.
find(
".sroa_"));
2438 Twine(OldName) +
"."
2450 Align getSliceAlign() {
2452 NewBeginOffset - NewAllocaBeginOffset);
2455 unsigned getIndex(uint64_t
Offset) {
2456 assert(VecTy &&
"Can only call getIndex when rewriting a vector");
2457 uint64_t RelOffset =
Offset - NewAllocaBeginOffset;
2458 assert(RelOffset / ElementSize < UINT32_MAX &&
"Index out of bounds");
2464 void deleteIfTriviallyDead(
Value *V) {
2467 Pass.DeadInsts.push_back(
I);
2470 Value *rewriteVectorizedLoadInst() {
2471 unsigned BeginIndex = getIndex(NewBeginOffset);
2472 unsigned EndIndex = getIndex(NewEndOffset);
2473 assert(EndIndex > BeginIndex &&
"Empty vector!");
2481 assert(IntTy &&
"We cannot insert an integer to the alloca");
2486 assert(NewBeginOffset >= NewAllocaBeginOffset &&
"Out of bounds offset");
2487 uint64_t
Offset = NewBeginOffset - NewAllocaBeginOffset;
2488 if (
Offset > 0 || NewEndOffset < NewAllocaEndOffset) {
2497 assert(cast<IntegerType>(LI.
getType())->getBitWidth() >= SliceSize * 8 &&
2498 "Can only handle an extract for an overly wide load");
2499 if (cast<IntegerType>(LI.
getType())->getBitWidth() > SliceSize * 8)
2500 V = IRB.CreateZExt(V, LI.
getType());
2514 Type *TargetTy = IsSplit ? Type::getIntNTy(LI.
getContext(), SliceSize * 8)
2516 const bool IsLoadPastEnd =
2517 DL.getTypeStoreSize(TargetTy).getFixedSize() > SliceSize;
2518 bool IsPtrAdjusted =
false;
2521 V = rewriteVectorizedLoadInst();
2523 V = rewriteIntegerLoad(LI);
2524 }
else if (NewBeginOffset == NewAllocaBeginOffset &&
2525 NewEndOffset == NewAllocaEndOffset &&
2558 if (
auto *AITy = dyn_cast<IntegerType>(NewAllocaTy))
2559 if (
auto *TITy = dyn_cast<IntegerType>(TargetTy))
2560 if (AITy->getBitWidth() < TITy->getBitWidth()) {
2561 V = IRB.CreateZExt(V, TITy,
"load.ext");
2562 if (
DL.isBigEndian())
2563 V = IRB.CreateShl(V, TITy->getBitWidth() - AITy->getBitWidth(),
2569 IRB.CreateAlignedLoad(TargetTy, getNewAllocaSlicePtr(IRB, LTy),
2577 IsPtrAdjusted =
true;
2584 "Only integer type loads and stores are split");
2585 assert(SliceSize <
DL.getTypeStoreSize(LI.
getType()).getFixedSize() &&
2586 "Split load isn't smaller than original load");
2588 "Non-byte-multiple bit width");
2598 V =
insertInteger(
DL, IRB, Placeholder, V, NewBeginOffset - BeginOffset,
2607 Pass.DeadInsts.push_back(&LI);
2608 deleteIfTriviallyDead(OldOp);
2616 unsigned BeginIndex = getIndex(NewBeginOffset);
2617 unsigned EndIndex = getIndex(NewEndOffset);
2618 assert(EndIndex > BeginIndex &&
"Empty vector!");
2619 unsigned NumElements = EndIndex - BeginIndex;
2620 assert(NumElements <= cast<FixedVectorType>(VecTy)->getNumElements() &&
2621 "Too many elements!");
2622 Type *SliceTy = (NumElements == 1)
2635 Store->setAAMetadata(AATags.
shift(NewBeginOffset - BeginOffset));
2636 Pass.DeadInsts.push_back(&
SI);
2643 assert(IntTy &&
"We cannot extract an integer from the alloca");
2645 if (
DL.getTypeSizeInBits(V->
getType()).getFixedSize() !=
2650 assert(BeginOffset >= NewAllocaBeginOffset &&
"Out of bounds offset");
2651 uint64_t
Offset = BeginOffset - NewAllocaBeginOffset;
2656 Store->copyMetadata(
SI, {LLVMContext::MD_mem_parallel_loop_access,
2657 LLVMContext::MD_access_group});
2659 Store->setAAMetadata(AATags.
shift(NewBeginOffset - BeginOffset));
2660 Pass.DeadInsts.push_back(&
SI);
2667 Value *OldOp =
SI.getOperand(1);
2671 SI.getAAMetadata(AATags);
2673 Value *V =
SI.getValueOperand();
2679 Pass.PostPromotionWorklist.insert(AI);
2681 if (SliceSize <
DL.getTypeStoreSize(V->
getType()).getFixedSize()) {
2684 "Only integer type loads and stores are split");
2686 "Non-byte-multiple bit width");
2687 IntegerType *NarrowTy = Type::getIntNTy(
SI.getContext(), SliceSize * 8);
2693 return rewriteVectorizedStoreInst(V,
SI, OldOp, AATags);
2695 return rewriteIntegerStore(V,
SI, AATags);
2697 const bool IsStorePastEnd =
2698 DL.getTypeStoreSize(V->
getType()).getFixedSize() > SliceSize;
2700 if (NewBeginOffset == NewAllocaBeginOffset &&
2701 NewEndOffset == NewAllocaEndOffset &&
2708 if (
auto *VITy = dyn_cast<IntegerType>(V->
getType()))
2709 if (
auto *AITy = dyn_cast<IntegerType>(NewAllocaTy))
2710 if (VITy->getBitWidth() > AITy->getBitWidth()) {
2711 if (
DL.isBigEndian())
2712 V = IRB.CreateLShr(V, VITy->getBitWidth() - AITy->getBitWidth(),
2714 V = IRB.CreateTrunc(V, AITy,
"load.trunc");
2719 IRB.CreateAlignedStore(V, &NewAI, NewAI.
getAlign(),
SI.isVolatile());
2721 unsigned AS =
SI.getPointerAddressSpace();
2724 IRB.CreateAlignedStore(V, NewPtr, getSliceAlign(),
SI.isVolatile());
2726 NewSI->
copyMetadata(
SI, {LLVMContext::MD_mem_parallel_loop_access,
2727 LLVMContext::MD_access_group});
2730 if (
SI.isVolatile())
2734 Pass.DeadInsts.push_back(&
SI);
2735 deleteIfTriviallyDead(OldOp);
2751 assert(
Size > 0 &&
"Expected a positive number of bytes.");
2759 IRB.CreateZExt(V, SplatIntTy,
"zext"),
2760 ConstantExpr::getUDiv(
2761 Constant::getAllOnesValue(SplatIntTy),
2762 ConstantExpr::getZExt(Constant::getAllOnesValue(V->
getType()),
2770 V = IRB.CreateVectorSplat(NumElements, V,
"vsplat");
2786 assert(NewBeginOffset == BeginOffset);
2790 deleteIfTriviallyDead(OldPtr);
2795 Pass.DeadInsts.push_back(&II);
2800 const bool CanContinue = [&]() {
2803 if (BeginOffset > NewAllocaBeginOffset ||
2804 EndOffset < NewAllocaEndOffset)
2807 if (
C->getBitWidth() > 64)
2809 const auto Len =
C->getZExtValue();
2810 auto *Int8Ty = IntegerType::getInt8Ty(NewAI.
getContext());
2813 DL.isLegalInteger(
DL.getTypeSizeInBits(ScalarTy).getFixedSize());
2825 New->setAAMetadata(AATags.
shift(NewBeginOffset - BeginOffset));
2839 assert(ElementTy == ScalarTy);
2841 unsigned BeginIndex = getIndex(NewBeginOffset);
2842 unsigned EndIndex = getIndex(NewEndOffset);
2843 assert(EndIndex > BeginIndex &&
"Empty vector!");
2844 unsigned NumElements = EndIndex - BeginIndex;
2845 assert(NumElements <= cast<FixedVectorType>(VecTy)->getNumElements() &&
2846 "Too many elements!");
2848 Value *Splat = getIntegerSplat(
2849 II.
getValue(),
DL.getTypeSizeInBits(ElementTy).getFixedSize() / 8);
2851 if (NumElements > 1)
2862 uint64_t
Size = NewEndOffset - NewBeginOffset;
2865 if (IntTy && (BeginOffset != NewAllocaBeginOffset ||
2866 EndOffset != NewAllocaBeginOffset)) {
2870 uint64_t
Offset = NewBeginOffset - NewAllocaBeginOffset;
2874 "Wrong type for an alloca wide integer!");
2879 assert(NewBeginOffset == NewAllocaBeginOffset);
2880 assert(NewEndOffset == NewAllocaEndOffset);
2883 DL.getTypeSizeInBits(ScalarTy).getFixedSize() / 8);
2884 if (
VectorType *AllocaVecTy = dyn_cast<VectorType>(AllocaTy))
2886 V, cast<FixedVectorType>(AllocaVecTy)->getNumElements());
2894 New->setAAMetadata(AATags.
shift(NewBeginOffset - BeginOffset));
2921 if (!IsSplittable) {
2922 Value *AdjustedPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
2933 deleteIfTriviallyDead(OldPtr);
2946 (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset ||
2954 if (EmitMemCpy && &OldAI == &NewAI) {
2956 assert(NewBeginOffset == BeginOffset);
2959 if (NewEndOffset != EndOffset)
2961 NewEndOffset - NewBeginOffset));
2965 Pass.DeadInsts.push_back(&II);
2972 assert(AI != &OldAI && AI != &NewAI &&
2973 "Splittable transfers cannot reach the same alloca on both ends.");
2974 Pass.Worklist.insert(AI);
2981 unsigned OffsetWidth =
DL.getIndexSizeInBits(OtherAS);
2982 APInt OtherOffset(OffsetWidth, NewBeginOffset - BeginOffset);
2986 commonAlignment(OtherAlign, OtherOffset.zextOrTrunc(64).getZExtValue());
2994 Value *OurPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
2998 Value *DestPtr, *SrcPtr;
3003 DestAlign = SliceAlign;
3005 SrcAlign = OtherAlign;
3008 DestAlign = OtherAlign;
3010 SrcAlign = SliceAlign;
3012 CallInst *
New = IRB.CreateMemCpy(DestPtr, DestAlign, SrcPtr, SrcAlign,
3015 New->setAAMetadata(AATags.
shift(NewBeginOffset - BeginOffset));
3020 bool IsWholeAlloca = NewBeginOffset == NewAllocaBeginOffset &&
3021 NewEndOffset == NewAllocaEndOffset;
3022 uint64_t
Size = NewEndOffset - NewBeginOffset;
3023 unsigned BeginIndex = VecTy ? getIndex(NewBeginOffset) : 0;
3024 unsigned EndIndex = VecTy ? getIndex(NewEndOffset) : 0;
3025 unsigned NumElements = EndIndex - BeginIndex;
3027 IntTy ? Type::getIntNTy(IntTy->
getContext(),
Size * 8) : nullptr;
3032 if (VecTy && !IsWholeAlloca) {
3033 if (NumElements == 1)
3037 }
else if (IntTy && !IsWholeAlloca) {
3040 OtherTy = NewAllocaTy;
3047 Value *DstPtr = &NewAI;
3055 if (VecTy && !IsWholeAlloca && !IsDest) {
3059 }
else if (IntTy && !IsWholeAlloca && !IsDest) {
3063 uint64_t
Offset = NewBeginOffset - NewAllocaBeginOffset;
3066 LoadInst *
Load = IRB.CreateAlignedLoad(OtherTy, SrcPtr, SrcAlign,
3069 Load->setAAMetadata(AATags.
shift(NewBeginOffset - BeginOffset));
3073 if (VecTy && !IsWholeAlloca && IsDest) {
3077 }
else if (IntTy && !IsWholeAlloca && IsDest) {
3081 uint64_t
Offset = NewBeginOffset - NewAllocaBeginOffset;
3087 IRB.CreateAlignedStore(Src, DstPtr, DstAlign, II.
isVolatile()));
3089 Store->setAAMetadata(AATags.
shift(NewBeginOffset - BeginOffset));
3096 "Unexpected intrinsic!");
3100 Pass.DeadInsts.push_back(&II);
3117 if (NewBeginOffset != NewAllocaBeginOffset ||
3118 NewEndOffset != NewAllocaEndOffset)
3123 NewEndOffset - NewBeginOffset);
3130 New = IRB.CreateLifetimeStart(Ptr,
Size);
3132 New = IRB.CreateLifetimeEnd(Ptr,
Size);
3147 Uses.push_back(&Root);
3151 if (
LoadInst *LI = dyn_cast<LoadInst>(
I)) {
3156 SI->setAlignment(
std::min(
SI->getAlign(), getSliceAlign()));
3160 assert(isa<BitCastInst>(
I) || isa<AddrSpaceCastInst>(
I) ||
3161 isa<PHINode>(
I) || isa<SelectInst>(
I) ||
3162 isa<GetElementPtrInst>(
I));
3163 for (
User *U :
I->users())
3164 if (Visited.
insert(cast<Instruction>(U)).second)
3165 Uses.push_back(cast<Instruction>(U));
3166 }
while (!
Uses.empty());
3169 bool visitPHINode(
PHINode &PN) {
3171 assert(BeginOffset >= NewAllocaBeginOffset &&
"PHIs are unsplittable");
3172 assert(EndOffset <= NewAllocaEndOffset &&
"PHIs are unsplittable");
3179 if (isa<PHINode>(OldPtr))
3182 IRB.SetInsertPoint(OldPtr);
3183 IRB.SetCurrentDebugLocation(OldPtr->
getDebugLoc());
3185 Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
3190 deleteIfTriviallyDead(OldPtr);
3193 fixLoadStoreAlign(PN);
3204 assert((
SI.getTrueValue() == OldPtr ||
SI.getFalseValue() == OldPtr) &&
3205 "Pointer isn't an operand!");
3206 assert(BeginOffset >= NewAllocaBeginOffset &&
"Selects are unsplittable");
3207 assert(EndOffset <= NewAllocaEndOffset &&
"Selects are unsplittable");
3209 Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
3211 if (
SI.getOperand(1) == OldPtr)
3212 SI.setOperand(1, NewPtr);
3213 if (
SI.getOperand(2) == OldPtr)
3214 SI.setOperand(2, NewPtr);
3217 deleteIfTriviallyDead(OldPtr);
3220 fixLoadStoreAlign(
SI);
3237 class AggLoadStoreRewriter :
public InstVisitor<AggLoadStoreRewriter, bool> {
3239 friend class InstVisitor<AggLoadStoreRewriter, bool>;
3262 bool Changed =
false;
3263 while (!
Queue.empty()) {
3264 U =
Queue.pop_back_val();
3265 Changed |= visit(cast<Instruction>(U->getUser()));
3274 for (
Use &U :
I.uses())
3276 Queue.push_back(&U);
3280 bool visitInstruction(
Instruction &
I) {
return false; }
3283 template <
typename Derived>
class OpSplitter {
3314 : IRB(InsertionPoint), GEPIndices(1, IRB.getInt32(0)), Ptr(Ptr),
3315 BaseTy(BaseTy), BaseAlign(BaseAlign),
DL(
DL) {}
3333 unsigned Offset =
DL.getIndexedOffsetInType(BaseTy, GEPIndices);
3334 return static_cast<Derived *
>(
this)->emitFunc(
3338 if (
ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
3339 unsigned OldSize = Indices.size();
3341 for (
unsigned Idx = 0,
Size = ATy->getNumElements(); Idx !=
Size;
3343 assert(Indices.size() == OldSize &&
"Did not return to the old size");
3344 Indices.push_back(Idx);
3345 GEPIndices.push_back(IRB.getInt32(Idx));
3346 emitSplitOps(ATy->getElementType(), Agg,
Name +
"." +
Twine(Idx));
3347 GEPIndices.pop_back();
3353 if (
StructType *STy = dyn_cast<StructType>(Ty)) {
3354 unsigned OldSize = Indices.size();
3356 for (
unsigned Idx = 0,
Size = STy->getNumElements(); Idx !=
Size;
3358 assert(Indices.size() == OldSize &&
"Did not return to the old size");
3359 Indices.push_back(Idx);
3360 GEPIndices.push_back(IRB.getInt32(Idx));
3361 emitSplitOps(STy->getElementType(Idx), Agg,
Name +
"." +
Twine(Idx));
3362 GEPIndices.pop_back();
3372 struct LoadOpSplitter :
public OpSplitter<LoadOpSplitter> {
3377 : OpSplitter<LoadOpSplitter>(InsertionPoint, Ptr, BaseTy, BaseAlign,
3387 IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices,
Name +
".gep");
3389 IRB.CreateAlignedLoad(Ty,
GEP, Alignment,
Name +
".load");
3394 GEPOperator::accumulateConstantOffset(BaseTy, GEPIndices,
DL,
Offset))
3397 Agg = IRB.CreateInsertValue(Agg,
Load, Indices,
Name +
".insert");
3411 LoadOpSplitter Splitter(&LI, *U, LI.
getType(), AATags,
3421 struct StoreOpSplitter :
public OpSplitter<StoreOpSplitter> {
3424 : OpSplitter<StoreOpSplitter>(InsertionPoint, Ptr, BaseTy, BaseAlign,
3436 Value *ExtractValue =
3437 IRB.CreateExtractValue(Agg, Indices,
Name +
".extract");
3438 Value *InBoundsGEP =
3439 IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices,
Name +
".gep");
3441 IRB.CreateAlignedStore(ExtractValue, InBoundsGEP, Alignment);
3446 GEPOperator::accumulateConstantOffset(BaseTy, GEPIndices,
DL,
Offset))
3454 if (!
SI.isSimple() ||
SI.getPointerOperand() != *U)
3456 Value *V =
SI.getValueOperand();
3463 SI.getAAMetadata(AATags);
3464 StoreOpSplitter Splitter(&
SI, *U, V->
getType(), AATags,
3468 SI.eraseFromParent();
3490 <<
"\n original: " << *Sel
3502 True->
getName() +
".sroa.gep")
3510 False->
getName() +
".sroa.gep")
3512 False->
getName() +
".sroa.gep");
3515 Sel->
getName() +
".sroa.sel");
3516 Visited.
erase(&GEPI);
3521 enqueueUsers(*NSelI);
3525 <<
"\n " << *NSel <<
'\n');
3538 { Instruction *I = dyn_cast<Instruction>(In);
3539 return !I || isa<GetElementPtrInst>(I) || isa<PHINode>(I) ||
3540 succ_empty(I->getParent()) ||
3541 !I->getParent()->isLegalToHoistInto();
3546 <<
"\n original: " << *PHI
3555 PHI->
getName() +
".sroa.phi");
3558 Value *NewVal =
nullptr;
3565 IRBuilderTy
B(
In->getParent(), std::next(
In->getIterator()));
3568 ?
B.CreateInBoundsGEP(Ty,
In,
Index,
In->getName() +
".sroa.gep")
3569 :
B.CreateGEP(Ty,
In,
Index,
In->getName() +
".sroa.gep");
3574 Visited.
erase(&GEPI);
3578 enqueueUsers(*NewPN);
3582 dbgs() <<
"\n " << *NewPN <<
'\n');
3589 foldGEPSelect(GEPI))
3600 bool visitPHINode(
PHINode &PN) {
3622 uint64_t AllocSize =
DL.getTypeAllocSize(Ty).getFixedSize();
3623 uint64_t
TypeSize =
DL.getTypeSizeInBits(Ty).getFixedSize();
3626 if (
ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) {
3627 InnerTy = ArrTy->getElementType();
3628 }
else if (
StructType *STy = dyn_cast<StructType>(Ty)) {
3631 InnerTy = STy->getElementType(
Index);
3636 if (AllocSize >
DL.getTypeAllocSize(InnerTy).getFixedSize() ||
3637 TypeSize >
DL.getTypeSizeInBits(InnerTy).getFixedSize())
3658 if (
Offset == 0 &&
DL.getTypeAllocSize(Ty).getFixedSize() ==
Size)
3660 if (
Offset >
DL.getTypeAllocSize(Ty).getFixedSize() ||
3661 (
DL.getTypeAllocSize(Ty).getFixedSize() -
Offset) <
Size)
3664 if (isa<ArrayType>(Ty) || isa<VectorType>(Ty)) {
3666 uint64_t TyNumElements;
3667 if (
auto *AT = dyn_cast<ArrayType>(Ty)) {
3668 ElementTy = AT->getElementType();
3669 TyNumElements = AT->getNumElements();
3673 auto *VT = cast<FixedVectorType>(Ty);
3674 ElementTy = VT->getElementType();
3675 TyNumElements = VT->getNumElements();
3677 uint64_t ElementSize =
DL.getTypeAllocSize(ElementTy).getFixedSize();
3678 uint64_t NumSkippedElements =
Offset / ElementSize;
3679 if (NumSkippedElements >= TyNumElements)
3681 Offset -= NumSkippedElements * ElementSize;
3693 if (
Size == ElementSize)
3696 uint64_t NumElements =
Size / ElementSize;
3697 if (NumElements * ElementSize !=
Size)
3717 uint64_t ElementSize =
DL.getTypeAllocSize(ElementTy).getFixedSize();
3718 if (
Offset >= ElementSize)
3729 if (
Size == ElementSize)
3736 if (
Index == EndIndex)
3804 struct SplitOffsets {
3806 std::vector<uint64_t> Splits;
3823 LLVM_DEBUG(
dbgs() <<
" Searching for candidate loads and stores\n");
3825 for (Slice &
S :
P) {
3827 if (!
S.isSplittable() ||
S.endOffset() <=
P.endOffset()) {
3831 if (
auto *LI = dyn_cast<LoadInst>(
I))
3832 UnsplittableLoads.
insert(LI);
3833 else if (
auto *
SI = dyn_cast<StoreInst>(
I))
3834 if (
auto *LI = dyn_cast<LoadInst>(
SI->getValueOperand()))
3835 UnsplittableLoads.
insert(LI);
3838 assert(
P.endOffset() >
S.beginOffset() &&
3839 "Empty or backwards partition!");
3842 if (
auto *LI = dyn_cast<LoadInst>(
I)) {
3848 auto IsLoadSimplyStored = [](
LoadInst *LI) {
3850 auto *
SI = dyn_cast<StoreInst>(LU);
3851 if (!
SI || !
SI->isSimple())
3856 if (!IsLoadSimplyStored(LI)) {
3857 UnsplittableLoads.
insert(LI);
3861 Loads.push_back(LI);
3862 }
else if (
auto *
SI = dyn_cast<StoreInst>(
I)) {
3863 if (
S.getUse() != &
SI->getOperandUse(
SI->getPointerOperandIndex()))
3866 auto *StoredLoad = dyn_cast<LoadInst>(
SI->getValueOperand());
3867 if (!StoredLoad || !StoredLoad->isSimple())
3869 assert(!
SI->isVolatile() &&
"Cannot split volatile stores!");
3871 Stores.push_back(
SI);
3879 auto &
Offsets = SplitOffsetsMap[
I];
3881 "Should not have splits the first time we see an instruction!");
3883 Offsets.Splits.push_back(
P.endOffset() -
S.beginOffset());
3888 for (Slice *
S :
P.splitSliceTails()) {
3889 auto SplitOffsetsMapI =
3890 SplitOffsetsMap.
find(cast<Instruction>(
S->getUse()->getUser()));
3891 if (SplitOffsetsMapI == SplitOffsetsMap.
end())
3893 auto &
Offsets = SplitOffsetsMapI->second;
3897 "Cannot have an empty set of splits on the second partition!");
3899 P.beginOffset() -
Offsets.S->beginOffset() &&
3900 "Previous split does not end where this one begins!");
3904 if (
S->endOffset() >
P.endOffset())
3916 auto *LI = cast<LoadInst>(
SI->getValueOperand());
3919 if (UnsplittableLoads.
count(LI))
3922 auto LoadOffsetsI = SplitOffsetsMap.
find(LI);
3923 if (LoadOffsetsI == SplitOffsetsMap.
end())
3925 auto &LoadOffsets = LoadOffsetsI->second;
3928 auto &StoreOffsets = SplitOffsetsMap[
SI];
3933 if (LoadOffsets.Splits == StoreOffsets.Splits)
3937 <<
" " << *LI <<
"\n"
3938 <<
" " << *
SI <<
"\n");
3944 UnsplittableLoads.
insert(LI);
3952 auto *LI = cast<LoadInst>(
SI->getValueOperand());
3953 return UnsplittableLoads.
count(LI);
3958 return UnsplittableLoads.
count(LI);
3963 if (Loads.empty() && Stores.empty())
3968 IRBuilderTy IRB(&AI);
3986 std::vector<LoadInst *> SplitLoads;
3993 assert(LoadSize > 0 &&
"Cannot have a zero-sized integer load!");
3995 auto &
Offsets = SplitOffsetsMap[LI];
3997 "Slice size should always match load size exactly!");
3998 uint64_t BaseOffset =
Offsets.S->beginOffset();
3999 assert(BaseOffset + LoadSize > BaseOffset &&
4000 "Cannot represent alloca access size using 64-bit integers!");
4003 IRB.SetInsertPoint(LI);
4007 uint64_t PartOffset = 0, PartSize =
Offsets.Splits.front();
4010 auto *PartTy = Type::getIntNTy(Ty->
getContext(), PartSize * 8);
4012 auto *PartPtrTy = PartTy->getPointerTo(AS);
4013 LoadInst *PLoad = IRB.CreateAlignedLoad(
4016 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
4017 PartPtrTy,
BasePtr->getName() +
"."),
4020 PLoad->
copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
4021 LLVMContext::MD_access_group});
4025 SplitLoads.push_back(PLoad);
4028 NewSlices.push_back(
4029 Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
4032 LLVM_DEBUG(
dbgs() <<
" new slice [" << NewSlices.back().beginOffset()
4033 <<
", " << NewSlices.back().endOffset()
4034 <<
"): " << *PLoad <<
"\n");
4041 PartOffset =
Offsets.Splits[Idx];
4043 PartSize = (Idx <
Size ?
Offsets.Splits[Idx] : LoadSize) - PartOffset;
4049 bool DeferredStores =
false;
4052 if (!Stores.empty() && SplitOffsetsMap.
count(
SI)) {
4053 DeferredStores =
true;
4059 Value *StoreBasePtr =
SI->getPointerOperand();
4060 IRB.SetInsertPoint(
SI);
4064 for (
int Idx = 0,
Size = SplitLoads.size(); Idx <
Size; ++Idx) {
4066 uint64_t PartOffset = Idx == 0 ? 0 :
Offsets.Splits[Idx - 1];
4070 auto AS =
SI->getPointerAddressSpace();
4071 StoreInst *PStore = IRB.CreateAlignedStore(
4074 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
4075 PartPtrTy, StoreBasePtr->
getName() +
"."),
4078 PStore->
copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
4079 LLVMContext::MD_access_group});
4080 LLVM_DEBUG(
dbgs() <<
" +" << PartOffset <<
":" << *PStore <<
"\n");
4087 if (
AllocaInst *OtherAI = dyn_cast<AllocaInst>(StoreBasePtr)) {
4088 ResplitPromotableAllocas.
insert(OtherAI);
4089 Worklist.insert(OtherAI);
4090 }
else if (
AllocaInst *OtherAI = dyn_cast<AllocaInst>(
4092 Worklist.insert(OtherAI);
4096 DeadInsts.push_back(
SI);
4104 DeadInsts.push_back(LI);
4114 auto *LI = cast<LoadInst>(
SI->getValueOperand());
4117 assert(StoreSize > 0 &&
"Cannot have a zero-sized integer store!");
4121 "Slice size should always match load size exactly!");
4122 uint64_t BaseOffset =
Offsets.S->beginOffset();
4123 assert(BaseOffset + StoreSize > BaseOffset &&
4124 "Cannot represent alloca access size using 64-bit integers!");
4127 Instruction *StoreBasePtr = cast<Instruction>(
SI->getPointerOperand());
4132 auto SplitLoadsMapI = SplitLoadsMap.
find(LI);
4133 std::vector<LoadInst *> *SplitLoads =
nullptr;
4134 if (SplitLoadsMapI != SplitLoadsMap.
end()) {
4135 SplitLoads = &SplitLoadsMapI->second;
4137 "Too few split loads for the number of splits in the store!");
4142 uint64_t PartOffset = 0, PartSize =
Offsets.Splits.front();
4145 auto *PartTy = Type::getIntNTy(Ty->
getContext(), PartSize * 8);
4147 auto *StorePartPtrTy = PartTy->getPointerTo(
SI->getPointerAddressSpace());
4152 PLoad = (*SplitLoads)[Idx];
4154 IRB.SetInsertPoint(LI);
4156 PLoad = IRB.CreateAlignedLoad(
4159 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
4160 LoadPartPtrTy, LoadBasePtr->
getName() +
"."),
4166 IRB.SetInsertPoint(
SI);
4167 auto AS =
SI->getPointerAddressSpace();
4168 StoreInst *PStore = IRB.CreateAlignedStore(
4171 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
4172 StorePartPtrTy, StoreBasePtr->
getName() +
"."),
4177 NewSlices.push_back(
4178 Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
4181 LLVM_DEBUG(
dbgs() <<
" new slice [" << NewSlices.back().beginOffset()
4182 <<
", " << NewSlices.back().endOffset()
4183 <<
"): " << *PStore <<
"\n");
4193 PartOffset =
Offsets.Splits[Idx];
4195 PartSize = (Idx <
Size ?
Offsets.Splits[Idx] : StoreSize) - PartOffset;
4204 if (
AllocaInst *OtherAI = dyn_cast<AllocaInst>(LoadBasePtr)) {
4205 assert(OtherAI != &AI &&
"We can't re-split our own alloca!");
4206 ResplitPromotableAllocas.
insert(OtherAI);
4207 Worklist.insert(OtherAI);
4208 }
else if (
AllocaInst *OtherAI = dyn_cast<AllocaInst>(
4210 assert(OtherAI != &AI &&
"We can't re-split our own alloca!");
4211 Worklist.insert(OtherAI);
4226 DeadInsts.push_back(LI);
4228 DeadInsts.push_back(
SI);
4248 return ResplitPromotableAllocas.
count(AI);
4269 Type *SliceTy =
nullptr;
4271 std::pair<Type *, IntegerType *> CommonUseTy =
4274 if (CommonUseTy.first)
4275 if (
DL.getTypeAllocSize(CommonUseTy.first).getFixedSize() >=
P.size())
4276 SliceTy = CommonUseTy.first;
4280 P.beginOffset(),
P.size()))
4281 SliceTy = TypePartitionTy;
4283 if (!SliceTy && CommonUseTy.second)
4284 if (
DL.getTypeAllocSize(CommonUseTy.second).getFixedSize() >=
P.size())
4285 SliceTy = CommonUseTy.second;
4286 if ((!SliceTy || (SliceTy->
isArrayTy() &&
4288 DL.isLegalInteger(
P.size() * 8))
4289 SliceTy = Type::getIntNTy(*
C,
P.size() * 8);
4292 assert(
DL.getTypeAllocSize(SliceTy).getFixedSize() >=
P.size());
4318 const bool IsUnconstrained = Alignment <=
DL.getABITypeAlign(SliceTy);
4321 IsUnconstrained ?
DL.getPrefTypeAlign(SliceTy) : Alignment,
4329 <<
"[" <<
P.beginOffset() <<
"," <<
P.endOffset()
4330 <<
") to: " << *NewAI <<
"\n");
4335 unsigned PPWOldSize = PostPromotionWorklist.size();
4336 unsigned NumUses = 0;
4341 P.endOffset(), IsIntegerPromotable, VecTy,
4342 PHIUsers, SelectUsers);
4343 bool Promotable =
true;
4344 for (Slice *
S :
P.splitSliceTails()) {
4348 for (Slice &
S :
P) {
4353 NumAllocaPartitionUses += NumUses;
4354 MaxUsesPerAllocaPartition.updateMax(NumUses);
4362 SelectUsers.
clear();
4370 SelectUsers.clear();
4376 auto *OldInst = dyn_cast<Instruction>(U->get());
4377 Value::dropDroppableUse(*U);
4380 DeadInsts.push_back(OldInst);
4382 if (PHIUsers.empty() && SelectUsers.empty()) {
4384 PromotableAllocas.push_back(NewAI);
4389 for (
PHINode *PHIUser : PHIUsers)
4390 SpeculatablePHIs.insert(PHIUser);
4392 SpeculatableSelects.insert(SelectUser);
4393 Worklist.insert(NewAI);
4397 while (PostPromotionWorklist.size() > PPWOldSize)
4398 PostPromotionWorklist.pop_back();
4408 Worklist.insert(NewAI);
4420 unsigned NumPartitions = 0;
4421 bool Changed =
false;
4425 Changed |= presplitLoadsAndStores(AI, AS);
4433 bool IsSorted =
true;
4435 uint64_t AllocaSize =
4437 const uint64_t MaxBitVectorSize = 1024;
4438 if (AllocaSize <= MaxBitVectorSize) {
4443 for (
unsigned O =
S.beginOffset() + 1;
4444 O <
S.endOffset() &&
O < AllocaSize;
O++)
4445 SplittableOffset.reset(
O);
4447 for (Slice &
S : AS) {
4448 if (!
S.isSplittable())
4451 if ((
S.beginOffset() > AllocaSize || SplittableOffset[
S.beginOffset()]) &&
4452 (
S.endOffset() > AllocaSize || SplittableOffset[
S.endOffset()]))
4455 if (isa<LoadInst>(
S.getUse()->getUser()) ||
4456 isa<StoreInst>(
S.getUse()->getUser())) {
4457 S.makeUnsplittable();
4465 for (Slice &
S : AS) {
4466 if (!
S.isSplittable())
4469 if (
S.beginOffset() == 0 &&
S.endOffset() >= AllocaSize)
4472 if (isa<LoadInst>(
S.getUse()->getUser()) ||
4473 isa<StoreInst>(
S.getUse()->getUser())) {
4474 S.makeUnsplittable();
4495 for (
auto &
P : AS.partitions()) {
4496 if (
AllocaInst *NewAI = rewritePartition(AI, AS,
P)) {
4499 uint64_t SizeOfByte = 8;
4500 uint64_t AllocaSize =
4503 uint64_t
Size =
std::min(AllocaSize,
P.size() * SizeOfByte);
4504 Fragments.push_back(Fragment(NewAI,
P.beginOffset() * SizeOfByte,
Size));
4510 NumAllocaPartitions += NumPartitions;
4511 MaxPartitionsPerAlloca.updateMax(NumPartitions);
4517 auto *Expr = DbgDeclare->getExpression();
4519 uint64_t AllocaSize =
4521 for (
auto Fragment : Fragments) {
4524 auto *FragmentExpr = Expr;
4525 if (Fragment.Size < AllocaSize || Expr->isFragment()) {
4528 auto ExprFragment = Expr->getFragmentInfo();
4529 uint64_t
Offset = ExprFragment ? ExprFragment->OffsetInBits : 0;
4530 uint64_t Start =
Offset + Fragment.Offset;
4531 uint64_t
Size = Fragment.Size;
4534 ExprFragment->OffsetInBits + ExprFragment->SizeInBits;
4535 if (Start >= AbsEnd)
4541 if (
auto OrigFragment = FragmentExpr->getFragmentInfo()) {
4542 assert(Start >= OrigFragment->OffsetInBits &&
4543 "new fragment is outside of original fragment");
4544 Start -= OrigFragment->OffsetInBits;
4548 auto VarSize = DbgDeclare->getVariable()->getSizeInBits();
4550 if (
Size > *VarSize)
4552 if (
Size == 0 || Start +
Size > *VarSize)
4557 if (!VarSize || *VarSize !=
Size) {
4559 DIExpression::createFragmentExpression(Expr, Start,
Size))
4571 return LHS->
getVariable() == RHS->getVariable() &&
4573 RHS->getDebugLoc()->getInlinedAt();
4575 if (SameVariableFragment(OldDII, DbgDeclare))
4576 OldDII->eraseFromParent();
4579 DIB.insertDeclare(Fragment.Alloca, DbgDeclare->getVariable(), FragmentExpr,
4580 DbgDeclare->getDebugLoc(), &AI);
4587 void SROA::clobberUse(
Use &U) {
4595 if (
Instruction *OldI = dyn_cast<Instruction>(OldV))
4597 DeadInsts.push_back(OldI);
4608 ++NumAllocasAnalyzed;
4620 DL.getTypeAllocSize(AT).getFixedSize() == 0)
4623 bool Changed =
false;
4627 AggLoadStoreRewriter AggRewriter(
DL);
4628 Changed |= AggRewriter.rewrite(AI);
4639 for (
Use &DeadOp : DeadUser->operands())
4646 DeadInsts.push_back(DeadUser);
4649 for (
Use *DeadOp : AS.getDeadOperands()) {
4650 clobberUse(*DeadOp);
4655 if (AS.begin() == AS.end())
4658 Changed |= splitAlloca(AI, AS);
4661 while (!SpeculatablePHIs.empty())
4665 while (!SpeculatableSelects.empty())
4680 bool SROA::deleteDeadInstructions(
4682 bool Changed =
false;
4683 while (!DeadInsts.empty()) {
4684 Instruction *
I = dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val());
4692 DeletedAllocas.
insert(AI);
4694 OldDII->eraseFromParent();
4699 for (
Use &Operand :
I->operands())
4700 if (
Instruction *U = dyn_cast<Instruction>(Operand)) {
4704 DeadInsts.push_back(U);
4708 I->eraseFromParent();
4719 bool SROA::promoteAllocas(
Function &
F) {
4720 if (PromotableAllocas.empty())
4723 NumPromoted += PromotableAllocas.size();
4727 PromotableAllocas.clear();
4734 C = &
F.getContext();
4744 PromotableAllocas.push_back(AI);
4746 Worklist.insert(AI);
4751 bool Changed =
false;
4757 while (!Worklist.empty()) {
4758 Changed |= runOnAlloca(*Worklist.pop_back_val());
4759 Changed |= deleteDeadInstructions(DeletedAllocas);
4763 if (!DeletedAllocas.
empty()) {
4764 auto IsInSet = [&](
AllocaInst *AI) {
return DeletedAllocas.
count(AI); };
4765 Worklist.remove_if(IsInSet);
4766 PostPromotionWorklist.remove_if(IsInSet);
4768 DeletedAllocas.
clear();
4772 Changed |= promoteAllocas(
F);
4774 Worklist = PostPromotionWorklist;
4775 PostPromotionWorklist.clear();
4776 }
while (!Worklist.empty());
4808 if (skipFunction(
F))
4811 auto PA = Impl.runImpl(
4812 F, getAnalysis<DominatorTreeWrapperPass>().getDomTree(),
4813 getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
F));
4832 "Scalar Replacement Of Aggregates",
false,
false)