46#include "llvm/Config/llvm-config.h"
101#define DEBUG_TYPE "sroa"
103STATISTIC(NumAllocasAnalyzed,
"Number of allocas analyzed for replacement");
104STATISTIC(NumAllocaPartitions,
"Number of alloca partitions formed");
105STATISTIC(MaxPartitionsPerAlloca,
"Maximum number of partitions per alloca");
106STATISTIC(NumAllocaPartitionUses,
"Number of alloca partition uses rewritten");
107STATISTIC(MaxUsesPerAllocaPartition,
"Maximum number of uses of a partition");
108STATISTIC(NumNewAllocas,
"Number of new, smaller allocas introduced");
109STATISTIC(NumPromoted,
"Number of allocas promoted to SSA values");
110STATISTIC(NumLoadsSpeculated,
"Number of loads speculated to allow promotion");
112 "Number of loads rewritten into predicated loads to allow promotion");
115 "Number of stores rewritten into predicated loads to allow promotion");
117STATISTIC(NumVectorized,
"Number of vectorized aggregates");
124class AllocaSliceRewriter;
128class SelectHandSpeculativity {
129 unsigned char Storage = 0;
133 SelectHandSpeculativity() =
default;
134 SelectHandSpeculativity &setAsSpeculatable(
bool isTrueVal);
135 bool isSpeculatable(
bool isTrueVal)
const;
136 bool areAllSpeculatable()
const;
137 bool areAnySpeculatable()
const;
138 bool areNoneSpeculatable()
const;
140 explicit operator intptr_t()
const {
return static_cast<intptr_t
>(Storage); }
141 explicit SelectHandSpeculativity(intptr_t Storage_) : Storage(Storage_) {}
143static_assert(
sizeof(SelectHandSpeculativity) ==
sizeof(
unsigned char));
145using PossiblySpeculatableLoad =
148using RewriteableMemOp =
149 std::variant<PossiblySpeculatableLoad, UnspeculatableStore>;
201 std::vector<AllocaInst *> PromotableAllocas;
230 static std::optional<RewriteableMemOps>
231 isSafeSelectToSpeculate(
SelectInst &SI,
bool PreserveCFG);
236 :
C(
C), DTU(DTU), AC(AC),
240 std::pair<
bool ,
bool > runSROA(
Function &
F);
243 friend class AllocaSliceRewriter;
245 bool presplitLoadsAndStores(
AllocaInst &AI, AllocaSlices &AS);
247 bool splitAlloca(
AllocaInst &AI, AllocaSlices &AS);
248 std::pair<
bool ,
bool > runOnAlloca(
AllocaInst &AI);
249 void clobberUse(
Use &U);
265enum FragCalcResult { UseFrag, UseNoFrag,
Skip };
269 uint64_t NewStorageSliceOffsetInBits,
271 std::optional<DIExpression::FragmentInfo> StorageFragment,
272 std::optional<DIExpression::FragmentInfo> CurrentFragment,
276 if (StorageFragment) {
278 std::min(NewStorageSliceSizeInBits, StorageFragment->SizeInBits);
280 NewStorageSliceOffsetInBits + StorageFragment->OffsetInBits;
282 Target.SizeInBits = NewStorageSliceSizeInBits;
283 Target.OffsetInBits = NewStorageSliceOffsetInBits;
289 if (!CurrentFragment) {
293 if (
Target == CurrentFragment)
300 if (!CurrentFragment || *CurrentFragment ==
Target)
306 if (
Target.startInBits() < CurrentFragment->startInBits() ||
307 Target.endInBits() > CurrentFragment->endInBits())
357 if (MarkerRange.empty() && DVRAssignMarkerRange.empty())
363 LLVM_DEBUG(
dbgs() <<
" OldAllocaOffsetInBits: " << OldAllocaOffsetInBits
365 LLVM_DEBUG(
dbgs() <<
" SliceSizeInBits: " << SliceSizeInBits <<
"\n");
377 DAI->getExpression()->getFragmentInfo();
380 DVR->getExpression()->getFragmentInfo();
390 auto MigrateDbgAssign = [&](
auto *DbgAssign) {
393 auto *Expr = DbgAssign->getExpression();
394 bool SetKillLocation =
false;
397 std::optional<DIExpression::FragmentInfo> BaseFragment;
400 if (R == BaseFragments.
end())
402 BaseFragment = R->second;
404 std::optional<DIExpression::FragmentInfo> CurrentFragment =
405 Expr->getFragmentInfo();
408 DbgAssign->getVariable(), OldAllocaOffsetInBits, SliceSizeInBits,
409 BaseFragment, CurrentFragment, NewFragment);
413 if (Result == UseFrag && !(NewFragment == CurrentFragment)) {
414 if (CurrentFragment) {
419 NewFragment.
OffsetInBits -= CurrentFragment->OffsetInBits;
430 DIExpression::get(Expr->getContext(), std::nullopt),
432 SetKillLocation =
true;
440 Inst->
setMetadata(LLVMContext::MD_DIAssignID, NewID);
447 DIExpression::get(Expr->getContext(), std::nullopt),
448 DbgAssign->getDebugLoc()),
462 Value && (DbgAssign->hasArgList() ||
463 !DbgAssign->getExpression()->isSingleLocationExpression());
465 NewAssign->setKillLocation();
480 NewAssign->moveBefore(DbgAssign);
482 NewAssign->setDebugLoc(DbgAssign->getDebugLoc());
483 LLVM_DEBUG(
dbgs() <<
"Created new assign: " << *NewAssign <<
"\n");
486 for_each(MarkerRange, MigrateDbgAssign);
487 for_each(DVRAssignMarkerRange, MigrateDbgAssign);
535 : BeginOffset(BeginOffset), EndOffset(EndOffset),
536 UseAndIsSplittable(
U, IsSplittable) {}
538 uint64_t beginOffset()
const {
return BeginOffset; }
539 uint64_t endOffset()
const {
return EndOffset; }
541 bool isSplittable()
const {
return UseAndIsSplittable.
getInt(); }
542 void makeUnsplittable() { UseAndIsSplittable.
setInt(
false); }
544 Use *getUse()
const {
return UseAndIsSplittable.
getPointer(); }
546 bool isDead()
const {
return getUse() ==
nullptr; }
547 void kill() { UseAndIsSplittable.
setPointer(
nullptr); }
556 if (beginOffset() <
RHS.beginOffset())
558 if (beginOffset() >
RHS.beginOffset())
560 if (isSplittable() !=
RHS.isSplittable())
561 return !isSplittable();
562 if (endOffset() >
RHS.endOffset())
570 return LHS.beginOffset() < RHSOffset;
574 return LHSOffset <
RHS.beginOffset();
578 return isSplittable() ==
RHS.isSplittable() &&
579 beginOffset() ==
RHS.beginOffset() && endOffset() ==
RHS.endOffset();
600 bool isEscaped()
const {
return PointerEscapingInstr; }
608 iterator
end() {
return Slices.
end(); }
618 void erase(iterator Start, iterator Stop) { Slices.erase(Start, Stop); }
626 int OldSize = Slices.size();
627 Slices.append(NewSlices.
begin(), NewSlices.
end());
628 auto SliceI = Slices.begin() + OldSize;
629 std::stable_sort(SliceI, Slices.end());
630 std::inplace_merge(Slices.begin(), SliceI, Slices.end());
635 class partition_iterator;
643 return DeadUseIfPromotable;
654#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
666 template <
typename DerivedT,
typename RetT =
void>
class BuilderBase;
671#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
725 friend class AllocaSlices;
728 using iterator = AllocaSlices::iterator;
732 uint64_t BeginOffset = 0, EndOffset = 0;
742 Partition(iterator SI) :
SI(
SI), SJ(
SI) {}
748 uint64_t beginOffset()
const {
return BeginOffset; }
753 uint64_t endOffset()
const {
return EndOffset; }
759 assert(BeginOffset < EndOffset &&
"Partitions must span some bytes!");
760 return EndOffset - BeginOffset;
765 bool empty()
const {
return SI == SJ; }
776 iterator
begin()
const {
return SI; }
777 iterator
end()
const {
return SJ; }
809 AllocaSlices::iterator SE;
813 uint64_t MaxSplitSliceEndOffset = 0;
829 assert((
P.SI != SE || !
P.SplitTails.empty()) &&
830 "Cannot advance past the end of the slices!");
833 if (!
P.SplitTails.empty()) {
834 if (
P.EndOffset >= MaxSplitSliceEndOffset) {
836 P.SplitTails.clear();
837 MaxSplitSliceEndOffset = 0;
843 [&](Slice *S) { return S->endOffset() <= P.EndOffset; });
846 return S->endOffset() == MaxSplitSliceEndOffset;
848 "Could not find the current max split slice offset!");
851 return S->endOffset() <= MaxSplitSliceEndOffset;
853 "Max split slice end offset is not actually the max!");
860 assert(
P.SplitTails.empty() &&
"Failed to clear the split slices!");
870 if (S.isSplittable() && S.endOffset() >
P.EndOffset) {
871 P.SplitTails.push_back(&S);
872 MaxSplitSliceEndOffset =
873 std::max(S.endOffset(), MaxSplitSliceEndOffset);
881 P.BeginOffset =
P.EndOffset;
882 P.EndOffset = MaxSplitSliceEndOffset;
889 if (!
P.SplitTails.empty() &&
P.SI->beginOffset() !=
P.EndOffset &&
890 !
P.SI->isSplittable()) {
891 P.BeginOffset =
P.EndOffset;
892 P.EndOffset =
P.SI->beginOffset();
902 P.BeginOffset =
P.SplitTails.empty() ?
P.SI->beginOffset() :
P.EndOffset;
903 P.EndOffset =
P.SI->endOffset();
908 if (!
P.SI->isSplittable()) {
911 assert(
P.BeginOffset ==
P.SI->beginOffset());
915 while (
P.SJ != SE &&
P.SJ->beginOffset() <
P.EndOffset) {
916 if (!
P.SJ->isSplittable())
917 P.EndOffset = std::max(
P.EndOffset,
P.SJ->endOffset());
929 assert(
P.SI->isSplittable() &&
"Forming a splittable partition!");
932 while (
P.SJ != SE &&
P.SJ->beginOffset() <
P.EndOffset &&
933 P.SJ->isSplittable()) {
934 P.EndOffset = std::max(
P.EndOffset,
P.SJ->endOffset());
941 if (
P.SJ != SE &&
P.SJ->beginOffset() <
P.EndOffset) {
943 P.EndOffset =
P.SJ->beginOffset();
950 "End iterators don't match between compared partition iterators!");
957 if (
P.SI ==
RHS.P.SI &&
P.SplitTails.empty() ==
RHS.P.SplitTails.empty()) {
959 "Same set of slices formed two different sized partitions!");
960 assert(
P.SplitTails.size() ==
RHS.P.SplitTails.size() &&
961 "Same slice position with differently sized non-empty split "
984 return make_range(partition_iterator(begin(), end()),
985 partition_iterator(end(), end()));
992 if (
ConstantInt *CI = dyn_cast<ConstantInt>(SI.getCondition()))
993 return SI.getOperand(1 + CI->isZero());
994 if (SI.getOperand(1) == SI.getOperand(2))
995 return SI.getOperand(1);
1002 if (
PHINode *PN = dyn_cast<PHINode>(&
I)) {
1004 return PN->hasConstantValue();
1031 AllocSize(
DL.getTypeAllocSize(AI.getAllocatedType()).getFixedValue()),
1036 if (VisitedDeadInsts.
insert(&
I).second)
1037 AS.DeadUsers.push_back(&
I);
1041 bool IsSplittable =
false) {
1047 <<
" which has zero size or starts outside of the "
1048 << AllocSize <<
" byte alloca:\n"
1049 <<
" alloca: " << AS.AI <<
"\n"
1050 <<
" use: " <<
I <<
"\n");
1051 return markAsDead(
I);
1063 assert(AllocSize >= BeginOffset);
1064 if (
Size > AllocSize - BeginOffset) {
1066 <<
Offset <<
" to remain within the " << AllocSize
1067 <<
" byte alloca:\n"
1068 <<
" alloca: " << AS.AI <<
"\n"
1069 <<
" use: " <<
I <<
"\n");
1070 EndOffset = AllocSize;
1073 AS.Slices.push_back(Slice(BeginOffset, EndOffset,
U, IsSplittable));
1078 return markAsDead(BC);
1085 return markAsDead(ASC);
1092 return markAsDead(GEPI);
1110 "All simple FCA loads should have been pre-split");
1116 if (
Size.isScalable())
1124 Value *ValOp =
SI.getValueOperand();
1145 <<
Offset <<
" which extends past the end of the "
1146 << AllocSize <<
" byte alloca:\n"
1147 <<
" alloca: " << AS.AI <<
"\n"
1148 <<
" use: " << SI <<
"\n");
1149 return markAsDead(SI);
1153 "All simple FCA stores should have been pre-split");
1158 assert(
II.getRawDest() == *
U &&
"Pointer use is not the destination?");
1163 return markAsDead(
II);
1178 return markAsDead(
II);
1182 if (VisitedDeadInsts.
count(&
II))
1195 MemTransferSliceMap.
find(&
II);
1196 if (MTPI != MemTransferSliceMap.
end())
1197 AS.Slices[MTPI->second].kill();
1198 return markAsDead(
II);
1206 if (*
U ==
II.getRawDest() && *
U ==
II.getRawSource()) {
1208 if (!
II.isVolatile())
1209 return markAsDead(
II);
1218 std::tie(MTPI, Inserted) =
1219 MemTransferSliceMap.
insert(std::make_pair(&
II, AS.Slices.size()));
1220 unsigned PrevIdx = MTPI->second;
1222 Slice &PrevP = AS.Slices[PrevIdx];
1226 if (!
II.isVolatile() && PrevP.beginOffset() == RawOffset) {
1228 return markAsDead(
II);
1233 PrevP.makeUnsplittable();
1240 assert(AS.Slices[PrevIdx].getUse()->getUser() == &
II &&
1241 "Map index doesn't point back to a slice with this user.");
1249 if (
II.isDroppable()) {
1250 AS.DeadUseIfPromotable.push_back(
U);
1257 if (
II.isLifetimeStartOrEnd()) {
1260 Length->getLimitedValue());
1265 if (
II.isLaunderOrStripInvariantGroup()) {
1266 insertUse(
II,
Offset, AllocSize,
true);
1282 Uses.push_back(std::make_pair(cast<Instruction>(*
U), Root));
1289 std::tie(UsedI,
I) =
Uses.pop_back_val();
1291 if (
LoadInst *LI = dyn_cast<LoadInst>(
I)) {
1300 if (
StoreInst *SI = dyn_cast<StoreInst>(
I)) {
1314 if (!
GEP->hasAllZeroIndices())
1316 }
else if (!isa<BitCastInst>(
I) && !isa<PHINode>(
I) &&
1317 !isa<SelectInst>(
I) && !isa<AddrSpaceCastInst>(
I)) {
1321 for (
User *
U :
I->users())
1322 if (Visited.
insert(cast<Instruction>(
U)).second)
1323 Uses.push_back(std::make_pair(
I, cast<Instruction>(
U)));
1324 }
while (!
Uses.empty());
1330 assert(isa<PHINode>(
I) || isa<SelectInst>(
I));
1332 return markAsDead(
I);
1337 if (isa<PHINode>(
I) &&
1338 I.getParent()->getFirstInsertionPt() ==
I.getParent()->end())
1357 AS.DeadOperands.push_back(
U);
1380 AS.DeadOperands.push_back(
U);
1387 void visitPHINode(
PHINode &PN) { visitPHINodeOrSelectInst(PN); }
1389 void visitSelectInst(
SelectInst &SI) { visitPHINodeOrSelectInst(SI); }
1397#
if !defined(
NDEBUG) || defined(LLVM_ENABLE_DUMP)
1400 PointerEscapingInstr(nullptr) {
1401 SliceBuilder
PB(
DL, AI, *
this);
1402 SliceBuilder::PtrInfo PtrI =
PB.visitPtr(AI);
1403 if (PtrI.isEscaped() || PtrI.isAborted()) {
1406 PointerEscapingInstr = PtrI.getEscapingInst() ? PtrI.getEscapingInst()
1407 : PtrI.getAbortingInst();
1408 assert(PointerEscapingInstr &&
"Did not track a bad instruction");
1412 llvm::erase_if(Slices, [](
const Slice &S) {
return S.isDead(); });
1419#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1423 printSlice(
OS,
I, Indent);
1425 printUse(
OS,
I, Indent);
1430 OS << Indent <<
"[" <<
I->beginOffset() <<
"," <<
I->endOffset() <<
")"
1431 <<
" slice #" << (
I -
begin())
1432 << (
I->isSplittable() ?
" (splittable)" :
"");
1437 OS << Indent <<
" used by: " << *
I->getUse()->getUser() <<
"\n";
1441 if (PointerEscapingInstr) {
1442 OS <<
"Can't analyze slices for alloca: " << AI <<
"\n"
1443 <<
" A pointer to this alloca escaped by:\n"
1444 <<
" " << *PointerEscapingInstr <<
"\n";
1448 OS <<
"Slices of alloca: " << AI <<
"\n";
1462static std::pair<Type *, IntegerType *>
1466 bool TyIsCommon =
true;
1471 for (AllocaSlices::const_iterator
I =
B;
I !=
E; ++
I) {
1472 Use *U =
I->getUse();
1473 if (isa<IntrinsicInst>(*U->getUser()))
1475 if (
I->beginOffset() !=
B->beginOffset() ||
I->endOffset() != EndOffset)
1478 Type *UserTy =
nullptr;
1479 if (
LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
1481 }
else if (
StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
1482 UserTy = SI->getValueOperand()->getType();
1485 if (
IntegerType *UserITy = dyn_cast_or_null<IntegerType>(UserTy)) {
1490 if (UserITy->getBitWidth() % 8 != 0 ||
1491 UserITy->getBitWidth() / 8 > (EndOffset -
B->beginOffset()))
1496 if (!ITy || ITy->
getBitWidth() < UserITy->getBitWidth())
1502 if (!UserTy || (Ty && Ty != UserTy))
1508 return {TyIsCommon ? Ty :
nullptr, ITy};
1539 Type *LoadType =
nullptr;
1541 LoadInst *LI = dyn_cast<LoadInst>(U);
1552 if (LoadType != LI->
getType())
1561 if (BBI->mayWriteToMemory())
1564 MaxAlign = std::max(MaxAlign, LI->
getAlign());
1571 APInt(APWidth,
DL.getTypeStoreSize(LoadType).getFixedValue());
1608 IRB.SetInsertPoint(&PN);
1610 PN.
getName() +
".sroa.speculated");
1640 IRB.SetInsertPoint(TI);
1642 LoadInst *Load = IRB.CreateAlignedLoad(
1643 LoadTy, InVal, Alignment,
1645 ++NumLoadsSpeculated;
1647 Load->setAAMetadata(AATags);
1649 InjectedLoads[Pred] = Load;
1656SelectHandSpeculativity &
1657SelectHandSpeculativity::setAsSpeculatable(
bool isTrueVal) {
1659 Bitfield::set<SelectHandSpeculativity::TrueVal>(Storage,
true);
1661 Bitfield::set<SelectHandSpeculativity::FalseVal>(Storage,
true);
1665bool SelectHandSpeculativity::isSpeculatable(
bool isTrueVal)
const {
1666 return isTrueVal ? Bitfield::get<SelectHandSpeculativity::TrueVal>(Storage)
1670bool SelectHandSpeculativity::areAllSpeculatable()
const {
1671 return isSpeculatable(
true) &&
1672 isSpeculatable(
false);
1675bool SelectHandSpeculativity::areAnySpeculatable()
const {
1676 return isSpeculatable(
true) ||
1677 isSpeculatable(
false);
1679bool SelectHandSpeculativity::areNoneSpeculatable()
const {
1680 return !areAnySpeculatable();
1683static SelectHandSpeculativity
1686 SelectHandSpeculativity
Spec;
1689 for (
Value *
Value : {SI.getTrueValue(), SI.getFalseValue()})
1692 Spec.setAsSpeculatable(
Value == SI.getTrueValue());
1699std::optional<RewriteableMemOps>
1701 RewriteableMemOps Ops;
1703 for (
User *U :
SI.users()) {
1704 if (
auto *BC = dyn_cast<BitCastInst>(U); BC && BC->
hasOneUse())
1707 if (
auto *Store = dyn_cast<StoreInst>(U)) {
1713 Ops.emplace_back(Store);
1717 auto *LI = dyn_cast<LoadInst>(U);
1724 PossiblySpeculatableLoad
Load(LI);
1730 Ops.emplace_back(Load);
1734 SelectHandSpeculativity
Spec =
1740 Ops.emplace_back(Load);
1750 Value *TV = SI.getTrueValue();
1751 Value *FV = SI.getFalseValue();
1756 IRB.SetInsertPoint(&LI);
1760 LI.
getName() +
".sroa.speculate.load.true");
1763 LI.
getName() +
".sroa.speculate.load.false");
1764 NumLoadsSpeculated += 2;
1776 Value *V = IRB.CreateSelect(SI.getCondition(), TL, FL,
1777 LI.
getName() +
".sroa.speculated");
1783template <
typename T>
1785 SelectHandSpeculativity
Spec,
1787 assert((isa<LoadInst>(
I) || isa<StoreInst>(
I)) &&
"Only for load and store!");
1792 if (
Spec.areNoneSpeculatable())
1794 SI.getMetadata(LLVMContext::MD_prof), &DTU);
1797 SI.getMetadata(LLVMContext::MD_prof), &DTU,
1799 if (
Spec.isSpeculatable(
true))
1807 if (isa<LoadInst>(
I))
1810 bool IsThen = SuccBB == HeadBI->getSuccessor(0);
1811 int SuccIdx = IsThen ? 0 : 1;
1812 auto *NewMemOpBB = SuccBB ==
Tail ? Head : SuccBB;
1813 auto &CondMemOp = cast<T>(*
I.clone());
1814 if (NewMemOpBB != Head) {
1815 NewMemOpBB->setName(Head->
getName() + (IsThen ?
".then" :
".else"));
1816 if (isa<LoadInst>(
I))
1817 ++NumLoadsPredicated;
1819 ++NumStoresPredicated;
1821 CondMemOp.dropUBImplyingAttrsAndMetadata();
1822 ++NumLoadsSpeculated;
1824 CondMemOp.insertBefore(NewMemOpBB->getTerminator());
1825 Value *
Ptr = SI.getOperand(1 + SuccIdx);
1826 CondMemOp.setOperand(
I.getPointerOperandIndex(),
Ptr);
1827 if (isa<LoadInst>(
I)) {
1828 CondMemOp.setName(
I.getName() + (IsThen ?
".then" :
".else") +
".val");
1833 if (isa<LoadInst>(
I)) {
1836 I.replaceAllUsesWith(PN);
1841 SelectHandSpeculativity
Spec,
1843 if (
auto *LI = dyn_cast<LoadInst>(&
I))
1845 else if (
auto *SI = dyn_cast<StoreInst>(&
I))
1852 const RewriteableMemOps &Ops,
1854 bool CFGChanged =
false;
1857 for (
const RewriteableMemOp &
Op : Ops) {
1858 SelectHandSpeculativity
Spec;
1860 if (
auto *
const *US = std::get_if<UnspeculatableStore>(&
Op)) {
1863 auto PSL = std::get<PossiblySpeculatableLoad>(
Op);
1864 I = PSL.getPointer();
1865 Spec = PSL.getInt();
1867 if (
Spec.areAllSpeculatable()) {
1870 assert(DTU &&
"Should not get here when not allowed to modify the CFG!");
1874 I->eraseFromParent();
1878 cast<BitCastInst>(U)->eraseFromParent();
1879 SI.eraseFromParent();
1887 const Twine &NamePrefix) {
1889 Ptr = IRB.CreateInBoundsPtrAdd(
Ptr, IRB.getInt(
Offset),
1890 NamePrefix +
"sroa_idx");
1891 return IRB.CreatePointerBitCastOrAddrSpaceCast(
Ptr,
PointerTy,
1892 NamePrefix +
"sroa_cast");
1913 if (isa<IntegerType>(OldTy) && isa<IntegerType>(NewTy)) {
1916 "We can't have the same bitwidth for different int types");
1920 if (
DL.getTypeSizeInBits(NewTy).getFixedValue() !=
1921 DL.getTypeSizeInBits(OldTy).getFixedValue())
1937 return OldAS == NewAS ||
1938 (!
DL.isNonIntegralAddressSpace(OldAS) &&
1939 !
DL.isNonIntegralAddressSpace(NewAS) &&
1940 DL.getPointerSize(OldAS) ==
DL.getPointerSize(NewAS));
1946 return !
DL.isNonIntegralPointerType(NewTy);
1950 if (!
DL.isNonIntegralPointerType(OldTy))
1970 Type *OldTy = V->getType();
1976 assert(!(isa<IntegerType>(OldTy) && isa<IntegerType>(NewTy)) &&
1977 "Integer types must be the exact same to convert.");
1985 return IRB.CreateIntToPtr(IRB.CreateBitCast(V,
DL.getIntPtrType(NewTy)),
1995 return IRB.CreateBitCast(IRB.CreatePtrToInt(V,
DL.getIntPtrType(OldTy)),
2008 if (OldAS != NewAS) {
2009 assert(
DL.getPointerSize(OldAS) ==
DL.getPointerSize(NewAS));
2010 return IRB.CreateIntToPtr(IRB.CreatePtrToInt(V,
DL.getIntPtrType(OldTy)),
2015 return IRB.CreateBitCast(V, NewTy);
2028 std::max(S.beginOffset(),
P.beginOffset()) -
P.beginOffset();
2029 uint64_t BeginIndex = BeginOffset / ElementSize;
2030 if (BeginIndex * ElementSize != BeginOffset ||
2033 uint64_t EndOffset = std::min(S.endOffset(),
P.endOffset()) -
P.beginOffset();
2034 uint64_t EndIndex = EndOffset / ElementSize;
2035 if (EndIndex * ElementSize != EndOffset ||
2039 assert(EndIndex > BeginIndex &&
"Empty vector!");
2040 uint64_t NumElements = EndIndex - BeginIndex;
2041 Type *SliceTy = (NumElements == 1)
2042 ? Ty->getElementType()
2048 Use *U = S.getUse();
2051 if (
MI->isVolatile())
2053 if (!S.isSplittable())
2055 }
else if (
IntrinsicInst *
II = dyn_cast<IntrinsicInst>(U->getUser())) {
2056 if (!
II->isLifetimeStartOrEnd() && !
II->isDroppable())
2058 }
else if (
LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
2065 if (
P.beginOffset() > S.beginOffset() ||
P.endOffset() < S.endOffset()) {
2071 }
else if (
StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
2072 if (SI->isVolatile())
2074 Type *STy = SI->getValueOperand()->getType();
2078 if (
P.beginOffset() > S.beginOffset() ||
P.endOffset() < S.endOffset()) {
2099 DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
2103 if (ElementSize % 8)
2105 assert((
DL.getTypeSizeInBits(VTy).getFixedValue() % 8) == 0 &&
2106 "vector size not a multiple of element size?");
2109 for (
const Slice &S :
P)
2113 for (
const Slice *S :
P.splitSliceTails())
2127 bool HaveCommonEltTy,
Type *CommonEltTy,
2128 bool HaveVecPtrTy,
bool HaveCommonVecPtrTy,
2131 if (CandidateTys.
empty())
2138 if (HaveVecPtrTy && !HaveCommonVecPtrTy)
2142 if (!HaveCommonEltTy && HaveVecPtrTy) {
2144 CandidateTys.
clear();
2146 }
else if (!HaveCommonEltTy && !HaveVecPtrTy) {
2149 if (!VTy->getElementType()->isIntegerTy())
2150 VTy = cast<VectorType>(VTy->getWithNewType(IntegerType::getIntNTy(
2151 VTy->getContext(), VTy->getScalarSizeInBits())));
2158 assert(
DL.getTypeSizeInBits(RHSTy).getFixedValue() ==
2159 DL.getTypeSizeInBits(LHSTy).getFixedValue() &&
2160 "Cannot have vector types of different sizes!");
2161 assert(RHSTy->getElementType()->isIntegerTy() &&
2162 "All non-integer types eliminated!");
2163 assert(LHSTy->getElementType()->isIntegerTy() &&
2164 "All non-integer types eliminated!");
2165 return cast<FixedVectorType>(RHSTy)->getNumElements() <
2166 cast<FixedVectorType>(LHSTy)->getNumElements();
2170 assert(
DL.getTypeSizeInBits(RHSTy).getFixedValue() ==
2171 DL.getTypeSizeInBits(LHSTy).getFixedValue() &&
2172 "Cannot have vector types of different sizes!");
2173 assert(RHSTy->getElementType()->isIntegerTy() &&
2174 "All non-integer types eliminated!");
2175 assert(LHSTy->getElementType()->isIntegerTy() &&
2176 "All non-integer types eliminated!");
2177 return cast<FixedVectorType>(RHSTy)->getNumElements() ==
2178 cast<FixedVectorType>(LHSTy)->getNumElements();
2180 llvm::sort(CandidateTys, RankVectorTypesComp);
2181 CandidateTys.erase(
llvm::unique(CandidateTys, RankVectorTypesEq),
2182 CandidateTys.end());
2188 assert(VTy->getElementType() == CommonEltTy &&
2189 "Unaccounted for element type!");
2190 assert(VTy == CandidateTys[0] &&
2191 "Different vector types with the same element type!");
2194 CandidateTys.resize(1);
2200 return cast<FixedVectorType>(VTy)->getNumElements() >
2201 std::numeric_limits<unsigned short>::max();
2215 bool &HaveCommonEltTy,
Type *&CommonEltTy,
bool &HaveVecPtrTy,
2216 bool &HaveCommonVecPtrTy,
VectorType *&CommonVecPtrTy) {
2218 CandidateTysCopy.
size() ? CandidateTysCopy[0] :
nullptr;
2221 for (
Type *Ty : OtherTys) {
2222 if (!VectorType::isValidElementType(Ty))
2224 unsigned TypeSize =
DL.getTypeSizeInBits(Ty).getFixedValue();
2227 for (
VectorType *
const VTy : CandidateTysCopy) {
2229 assert(CandidateTysCopy[0] == OriginalElt &&
"Different Element");
2230 unsigned VectorSize =
DL.getTypeSizeInBits(VTy).getFixedValue();
2231 unsigned ElementSize =
2232 DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
2236 CheckCandidateType(NewVTy);
2242 CommonEltTy, HaveVecPtrTy,
2243 HaveCommonVecPtrTy, CommonVecPtrTy);
2261 Type *CommonEltTy =
nullptr;
2263 bool HaveVecPtrTy =
false;
2264 bool HaveCommonEltTy =
true;
2265 bool HaveCommonVecPtrTy =
true;
2266 auto CheckCandidateType = [&](
Type *Ty) {
2267 if (
auto *VTy = dyn_cast<VectorType>(Ty)) {
2269 if (!CandidateTys.
empty()) {
2271 if (
DL.getTypeSizeInBits(VTy).getFixedValue() !=
2272 DL.getTypeSizeInBits(V).getFixedValue()) {
2273 CandidateTys.
clear();
2278 Type *EltTy = VTy->getElementType();
2281 CommonEltTy = EltTy;
2282 else if (CommonEltTy != EltTy)
2283 HaveCommonEltTy =
false;
2286 HaveVecPtrTy =
true;
2287 if (!CommonVecPtrTy)
2288 CommonVecPtrTy = VTy;
2289 else if (CommonVecPtrTy != VTy)
2290 HaveCommonVecPtrTy =
false;
2296 for (
const Slice &S :
P) {
2298 if (
auto *LI = dyn_cast<LoadInst>(S.getUse()->getUser()))
2300 else if (
auto *SI = dyn_cast<StoreInst>(S.getUse()->getUser()))
2301 Ty = SI->getValueOperand()->getType();
2306 if (CandTy->isPointerTy() && (S.beginOffset() !=
P.beginOffset() ||
2307 S.endOffset() !=
P.endOffset())) {
2314 if (S.beginOffset() ==
P.beginOffset() && S.endOffset() ==
P.endOffset())
2315 CheckCandidateType(Ty);
2320 LoadStoreTys, CandidateTysCopy, CheckCandidateType,
P,
DL,
2321 CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
2322 HaveCommonVecPtrTy, CommonVecPtrTy))
2325 CandidateTys.
clear();
2327 DeferredTys, CandidateTysCopy, CheckCandidateType,
P,
DL, CandidateTys,
2328 HaveCommonEltTy, CommonEltTy, HaveVecPtrTy, HaveCommonVecPtrTy,
2340 bool &WholeAllocaOp) {
2343 uint64_t RelBegin = S.beginOffset() - AllocBeginOffset;
2344 uint64_t RelEnd = S.endOffset() - AllocBeginOffset;
2346 Use *U = S.getUse();
2353 if (
II->isLifetimeStartOrEnd() ||
II->isDroppable())
2362 if (
LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
2366 if (
DL.getTypeStoreSize(LI->
getType()).getFixedValue() >
Size)
2370 if (S.beginOffset() < AllocBeginOffset)
2375 if (!isa<VectorType>(LI->
getType()) && RelBegin == 0 && RelEnd ==
Size)
2376 WholeAllocaOp =
true;
2378 if (ITy->getBitWidth() <
DL.getTypeStoreSizeInBits(ITy).getFixedValue())
2380 }
else if (RelBegin != 0 || RelEnd !=
Size ||
2386 }
else if (
StoreInst *SI = dyn_cast<StoreInst>(U->getUser())) {
2387 Type *ValueTy = SI->getValueOperand()->getType();
2388 if (SI->isVolatile())
2391 if (
DL.getTypeStoreSize(ValueTy).getFixedValue() >
Size)
2395 if (S.beginOffset() < AllocBeginOffset)
2400 if (!isa<VectorType>(ValueTy) && RelBegin == 0 && RelEnd ==
Size)
2401 WholeAllocaOp =
true;
2402 if (
IntegerType *ITy = dyn_cast<IntegerType>(ValueTy)) {
2403 if (ITy->getBitWidth() <
DL.getTypeStoreSizeInBits(ITy).getFixedValue())
2405 }
else if (RelBegin != 0 || RelEnd !=
Size ||
2411 }
else if (
MemIntrinsic *
MI = dyn_cast<MemIntrinsic>(U->getUser())) {
2412 if (
MI->isVolatile() || !isa<Constant>(
MI->getLength()))
2414 if (!S.isSplittable())
2431 uint64_t SizeInBits =
DL.getTypeSizeInBits(AllocaTy).getFixedValue();
2437 if (SizeInBits !=
DL.getTypeStoreSizeInBits(AllocaTy).getFixedValue())
2455 bool WholeAllocaOp =
P.empty() &&
DL.isLegalInteger(SizeInBits);
2457 for (
const Slice &S :
P)
2462 for (
const Slice *S :
P.splitSliceTails())
2467 return WholeAllocaOp;
2474 IntegerType *IntTy = cast<IntegerType>(V->getType());
2476 DL.getTypeStoreSize(IntTy).getFixedValue() &&
2477 "Element extends past full value");
2479 if (
DL.isBigEndian())
2480 ShAmt = 8 * (
DL.getTypeStoreSize(IntTy).getFixedValue() -
2481 DL.getTypeStoreSize(Ty).getFixedValue() -
Offset);
2483 V = IRB.CreateLShr(V, ShAmt,
Name +
".shift");
2487 "Cannot extract to a larger integer!");
2489 V = IRB.CreateTrunc(V, Ty,
Name +
".trunc");
2498 IntegerType *Ty = cast<IntegerType>(V->getType());
2500 "Cannot insert a larger integer!");
2503 V = IRB.CreateZExt(V, IntTy,
Name +
".ext");
2507 DL.getTypeStoreSize(IntTy).getFixedValue() &&
2508 "Element store outside of alloca store");
2510 if (
DL.isBigEndian())
2511 ShAmt = 8 * (
DL.getTypeStoreSize(IntTy).getFixedValue() -
2512 DL.getTypeStoreSize(Ty).getFixedValue() -
Offset);
2514 V = IRB.CreateShl(V, ShAmt,
Name +
".shift");
2520 Old = IRB.CreateAnd(Old, Mask,
Name +
".mask");
2522 V = IRB.CreateOr(Old, V,
Name +
".insert");
2530 auto *VecTy = cast<FixedVectorType>(V->getType());
2531 unsigned NumElements = EndIndex - BeginIndex;
2534 if (NumElements == VecTy->getNumElements())
2537 if (NumElements == 1) {
2538 V = IRB.CreateExtractElement(V, IRB.getInt32(BeginIndex),
2544 auto Mask = llvm::to_vector<8>(llvm::seq<int>(BeginIndex, EndIndex));
2545 V = IRB.CreateShuffleVector(V, Mask,
Name +
".extract");
2551 unsigned BeginIndex,
const Twine &
Name) {
2553 assert(VecTy &&
"Can only insert a vector into a vector");
2555 VectorType *Ty = dyn_cast<VectorType>(V->getType());
2558 V = IRB.CreateInsertElement(Old, V, IRB.getInt32(BeginIndex),
2566 "Too many elements!");
2569 assert(V->getType() == VecTy &&
"Vector type mismatch");
2572 unsigned EndIndex = BeginIndex + cast<FixedVectorType>(Ty)->getNumElements();
2580 for (
unsigned i = 0; i != cast<FixedVectorType>(VecTy)->getNumElements(); ++i)
2581 if (i >= BeginIndex && i < EndIndex)
2582 Mask.push_back(i - BeginIndex);
2585 V = IRB.CreateShuffleVector(V, Mask,
Name +
".expand");
2590 for (
unsigned i = 0; i != cast<FixedVectorType>(VecTy)->getNumElements(); ++i)
2591 Mask2.
push_back(IRB.getInt1(i >= BeginIndex && i < EndIndex));
2607class AllocaSliceRewriter :
public InstVisitor<AllocaSliceRewriter, bool> {
2617 const uint64_t NewAllocaBeginOffset, NewAllocaEndOffset;
2646 uint64_t NewBeginOffset = 0, NewEndOffset = 0;
2649 bool IsSplittable =
false;
2650 bool IsSplit =
false;
2651 Use *OldUse =
nullptr;
2664 Value *getPtrToNewAI(
unsigned AddrSpace,
bool IsVolatile) {
2668 Type *AccessTy = IRB.getPtrTy(AddrSpace);
2669 return IRB.CreateAddrSpaceCast(&NewAI, AccessTy);
2676 uint64_t NewAllocaEndOffset,
bool IsIntegerPromotable,
2680 :
DL(
DL), AS(AS),
Pass(
Pass), OldAI(OldAI), NewAI(NewAI),
2681 NewAllocaBeginOffset(NewAllocaBeginOffset),
2682 NewAllocaEndOffset(NewAllocaEndOffset),
2683 NewAllocaTy(NewAI.getAllocatedType()),
2686 ?
Type::getIntNTy(NewAI.getContext(),
2687 DL.getTypeSizeInBits(NewAI.getAllocatedType())
2690 VecTy(PromotableVecTy),
2691 ElementTy(VecTy ? VecTy->getElementType() : nullptr),
2692 ElementSize(VecTy ?
DL.getTypeSizeInBits(ElementTy).getFixedValue() / 8
2694 PHIUsers(PHIUsers), SelectUsers(SelectUsers),
2697 assert((
DL.getTypeSizeInBits(ElementTy).getFixedValue() % 8) == 0 &&
2698 "Only multiple-of-8 sized vector elements are viable");
2701 assert((!IntTy && !VecTy) || (IntTy && !VecTy) || (!IntTy && VecTy));
2704 bool visit(AllocaSlices::const_iterator
I) {
2705 bool CanSROA =
true;
2706 BeginOffset =
I->beginOffset();
2707 EndOffset =
I->endOffset();
2708 IsSplittable =
I->isSplittable();
2710 BeginOffset < NewAllocaBeginOffset || EndOffset > NewAllocaEndOffset;
2711 LLVM_DEBUG(
dbgs() <<
" rewriting " << (IsSplit ?
"split " :
""));
2716 assert(BeginOffset < NewAllocaEndOffset);
2717 assert(EndOffset > NewAllocaBeginOffset);
2718 NewBeginOffset = std::max(BeginOffset, NewAllocaBeginOffset);
2719 NewEndOffset = std::min(EndOffset, NewAllocaEndOffset);
2721 SliceSize = NewEndOffset - NewBeginOffset;
2722 LLVM_DEBUG(
dbgs() <<
" Begin:(" << BeginOffset <<
", " << EndOffset
2723 <<
") NewBegin:(" << NewBeginOffset <<
", "
2724 << NewEndOffset <<
") NewAllocaBegin:("
2725 << NewAllocaBeginOffset <<
", " << NewAllocaEndOffset
2727 assert(IsSplit || NewBeginOffset == BeginOffset);
2728 OldUse =
I->getUse();
2729 OldPtr = cast<Instruction>(OldUse->get());
2731 Instruction *OldUserI = cast<Instruction>(OldUse->getUser());
2732 IRB.SetInsertPoint(OldUserI);
2733 IRB.SetCurrentDebugLocation(OldUserI->
getDebugLoc());
2734 IRB.getInserter().SetNamePrefix(
Twine(NewAI.
getName()) +
"." +
2735 Twine(BeginOffset) +
".");
2737 CanSROA &=
visit(cast<Instruction>(OldUse->getUser()));
2756 assert(IsSplit || BeginOffset == NewBeginOffset);
2762 size_t LastSROAPrefix = OldName.
rfind(
".sroa.");
2764 OldName = OldName.
substr(LastSROAPrefix + strlen(
".sroa."));
2769 OldName = OldName.
substr(IndexEnd + 1);
2773 OldName = OldName.
substr(OffsetEnd + 1);
2777 OldName = OldName.
substr(0, OldName.
find(
".sroa_"));
2784 Twine(OldName) +
"."
2796 Align getSliceAlign() {
2798 NewBeginOffset - NewAllocaBeginOffset);
2802 assert(VecTy &&
"Can only call getIndex when rewriting a vector");
2804 assert(RelOffset / ElementSize < UINT32_MAX &&
"Index out of bounds");
2810 void deleteIfTriviallyDead(
Value *V) {
2813 Pass.DeadInsts.push_back(
I);
2817 unsigned BeginIndex = getIndex(NewBeginOffset);
2818 unsigned EndIndex = getIndex(NewEndOffset);
2819 assert(EndIndex > BeginIndex &&
"Empty vector!");
2824 Load->copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access,
2825 LLVMContext::MD_access_group});
2826 return extractVector(IRB, Load, BeginIndex, EndIndex,
"vec");
2830 assert(IntTy &&
"We cannot insert an integer to the alloca");
2835 assert(NewBeginOffset >= NewAllocaBeginOffset &&
"Out of bounds offset");
2837 if (
Offset > 0 || NewEndOffset < NewAllocaEndOffset) {
2846 assert(cast<IntegerType>(LI.
getType())->getBitWidth() >= SliceSize * 8 &&
2847 "Can only handle an extract for an overly wide load");
2848 if (cast<IntegerType>(LI.
getType())->getBitWidth() > SliceSize * 8)
2849 V = IRB.CreateZExt(V, LI.
getType());
2864 const bool IsLoadPastEnd =
2865 DL.getTypeStoreSize(TargetTy).getFixedValue() > SliceSize;
2866 bool IsPtrAdjusted =
false;
2869 V = rewriteVectorizedLoadInst(LI);
2871 V = rewriteIntegerLoad(LI);
2872 }
else if (NewBeginOffset == NewAllocaBeginOffset &&
2873 NewEndOffset == NewAllocaEndOffset &&
2895 NewBeginOffset - BeginOffset, NewLI->
getType(),
DL));
2903 if (
auto *AITy = dyn_cast<IntegerType>(NewAllocaTy))
2904 if (
auto *TITy = dyn_cast<IntegerType>(TargetTy))
2905 if (AITy->getBitWidth() < TITy->getBitWidth()) {
2906 V = IRB.CreateZExt(V, TITy,
"load.ext");
2907 if (
DL.isBigEndian())
2908 V = IRB.CreateShl(V, TITy->getBitWidth() - AITy->getBitWidth(),
2912 Type *LTy = IRB.getPtrTy(AS);
2914 IRB.CreateAlignedLoad(TargetTy, getNewAllocaSlicePtr(IRB, LTy),
2919 NewBeginOffset - BeginOffset, NewLI->
getType(),
DL));
2923 NewLI->
copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access,
2924 LLVMContext::MD_access_group});
2927 IsPtrAdjusted =
true;
2934 "Only integer type loads and stores are split");
2935 assert(SliceSize <
DL.getTypeStoreSize(LI.
getType()).getFixedValue() &&
2936 "Split load isn't smaller than original load");
2938 "Non-byte-multiple bit width");
2944 LIIt.setHeadBit(
true);
2945 IRB.SetInsertPoint(LI.
getParent(), LIIt);
2950 Value *Placeholder =
2956 Placeholder->replaceAllUsesWith(&LI);
2957 Placeholder->deleteValue();
2962 Pass.DeadInsts.push_back(&LI);
2963 deleteIfTriviallyDead(OldOp);
2973 if (
V->getType() != VecTy) {
2974 unsigned BeginIndex = getIndex(NewBeginOffset);
2975 unsigned EndIndex = getIndex(NewEndOffset);
2976 assert(EndIndex > BeginIndex &&
"Empty vector!");
2977 unsigned NumElements = EndIndex - BeginIndex;
2979 "Too many elements!");
2980 Type *SliceTy = (NumElements == 1)
2983 if (
V->getType() != SliceTy)
2992 Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
2993 LLVMContext::MD_access_group});
2997 Pass.DeadInsts.push_back(&SI);
3001 Store,
Store->getPointerOperand(), OrigV,
DL);
3007 assert(IntTy &&
"We cannot extract an integer from the alloca");
3009 if (
DL.getTypeSizeInBits(
V->getType()).getFixedValue() !=
3014 assert(BeginOffset >= NewAllocaBeginOffset &&
"Out of bounds offset");
3020 Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
3021 LLVMContext::MD_access_group});
3027 Store,
Store->getPointerOperand(),
3028 Store->getValueOperand(),
DL);
3030 Pass.DeadInsts.push_back(&SI);
3037 Value *OldOp =
SI.getOperand(1);
3045 if (
V->getType()->isPointerTy())
3046 if (
AllocaInst *AI = dyn_cast<AllocaInst>(
V->stripInBoundsOffsets()))
3047 Pass.PostPromotionWorklist.insert(AI);
3049 if (SliceSize <
DL.getTypeStoreSize(
V->getType()).getFixedValue()) {
3051 assert(
V->getType()->isIntegerTy() &&
3052 "Only integer type loads and stores are split");
3053 assert(
DL.typeSizeEqualsStoreSize(
V->getType()) &&
3054 "Non-byte-multiple bit width");
3061 return rewriteVectorizedStoreInst(V, SI, OldOp, AATags);
3062 if (IntTy &&
V->getType()->isIntegerTy())
3063 return rewriteIntegerStore(V, SI, AATags);
3066 if (NewBeginOffset == NewAllocaBeginOffset &&
3067 NewEndOffset == NewAllocaEndOffset &&
3071 getPtrToNewAI(
SI.getPointerAddressSpace(),
SI.isVolatile());
3074 IRB.CreateAlignedStore(V, NewPtr, NewAI.
getAlign(),
SI.isVolatile());
3076 unsigned AS =
SI.getPointerAddressSpace();
3077 Value *NewPtr = getNewAllocaSlicePtr(IRB, IRB.getPtrTy(AS));
3079 IRB.CreateAlignedStore(V, NewPtr, getSliceAlign(),
SI.isVolatile());
3081 NewSI->
copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
3082 LLVMContext::MD_access_group});
3086 if (
SI.isVolatile())
3095 Pass.DeadInsts.push_back(&SI);
3096 deleteIfTriviallyDead(OldOp);
3114 assert(
Size > 0 &&
"Expected a positive number of bytes.");
3122 IRB.CreateZExt(V, SplatIntTy,
"zext"),
3132 V = IRB.CreateVectorSplat(NumElements, V,
"vsplat");
3145 if (!isa<ConstantInt>(
II.getLength())) {
3147 assert(NewBeginOffset == BeginOffset);
3148 II.setDest(getNewAllocaSlicePtr(IRB, OldPtr->
getType()));
3149 II.setDestAlignment(getSliceAlign());
3155 "AT: Unexpected link to non-const GEP");
3156 deleteIfTriviallyDead(OldPtr);
3161 Pass.DeadInsts.push_back(&
II);
3166 const bool CanContinue = [&]() {
3169 if (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset)
3172 auto *
C = cast<ConstantInt>(
II.getLength());
3174 if (Len > std::numeric_limits<unsigned>::max())
3176 auto *Int8Ty = IntegerType::getInt8Ty(NewAI.
getContext());
3179 DL.isLegalInteger(
DL.getTypeSizeInBits(ScalarTy).getFixedValue());
3185 Type *SizeTy =
II.getLength()->getType();
3186 unsigned Sz = NewEndOffset - NewBeginOffset;
3189 getNewAllocaSlicePtr(IRB, OldPtr->
getType()),
II.getValue(),
Size,
3196 New,
New->getRawDest(),
nullptr,
DL);
3211 assert(ElementTy == ScalarTy);
3213 unsigned BeginIndex = getIndex(NewBeginOffset);
3214 unsigned EndIndex = getIndex(NewEndOffset);
3215 assert(EndIndex > BeginIndex &&
"Empty vector!");
3216 unsigned NumElements = EndIndex - BeginIndex;
3218 "Too many elements!");
3221 II.getValue(),
DL.getTypeSizeInBits(ElementTy).getFixedValue() / 8);
3223 if (NumElements > 1)
3235 V = getIntegerSplat(
II.getValue(),
Size);
3237 if (IntTy && (BeginOffset != NewAllocaBeginOffset ||
3238 EndOffset != NewAllocaBeginOffset)) {
3245 assert(
V->getType() == IntTy &&
3246 "Wrong type for an alloca wide integer!");
3251 assert(NewBeginOffset == NewAllocaBeginOffset);
3252 assert(NewEndOffset == NewAllocaEndOffset);
3254 V = getIntegerSplat(
II.getValue(),
3255 DL.getTypeSizeInBits(ScalarTy).getFixedValue() / 8);
3256 if (
VectorType *AllocaVecTy = dyn_cast<VectorType>(AllocaTy))
3263 Value *NewPtr = getPtrToNewAI(
II.getDestAddressSpace(),
II.isVolatile());
3265 IRB.CreateAlignedStore(V, NewPtr, NewAI.
getAlign(),
II.isVolatile());
3266 New->copyMetadata(
II, {LLVMContext::MD_mem_parallel_loop_access,
3267 LLVMContext::MD_access_group});
3273 New,
New->getPointerOperand(), V,
DL);
3276 return !
II.isVolatile();
3287 bool IsDest = &
II.getRawDestUse() == OldUse;
3288 assert((IsDest &&
II.getRawDest() == OldPtr) ||
3289 (!IsDest &&
II.getRawSource() == OldPtr));
3291 Align SliceAlign = getSliceAlign();
3299 if (!IsSplittable) {
3300 Value *AdjustedPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
3303 auto UpdateAssignAddress = [&](
auto *DbgAssign) {
3305 DbgAssign->getAddress() ==
II.getDest())
3306 DbgAssign->replaceVariableLocationOp(
II.getDest(), AdjustedPtr);
3310 II.setDest(AdjustedPtr);
3311 II.setDestAlignment(SliceAlign);
3313 II.setSource(AdjustedPtr);
3314 II.setSourceAlignment(SliceAlign);
3318 deleteIfTriviallyDead(OldPtr);
3331 (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset ||
3340 if (EmitMemCpy && &OldAI == &NewAI) {
3342 assert(NewBeginOffset == BeginOffset);
3345 if (NewEndOffset != EndOffset)
3346 II.setLength(ConstantInt::get(
II.getLength()->getType(),
3347 NewEndOffset - NewBeginOffset));
3351 Pass.DeadInsts.push_back(&
II);
3355 Value *OtherPtr = IsDest ?
II.getRawSource() :
II.getRawDest();
3358 assert(AI != &OldAI && AI != &NewAI &&
3359 "Splittable transfers cannot reach the same alloca on both ends.");
3360 Pass.Worklist.insert(AI);
3367 unsigned OffsetWidth =
DL.getIndexSizeInBits(OtherAS);
3368 APInt OtherOffset(OffsetWidth, NewBeginOffset - BeginOffset);
3370 (IsDest ?
II.getSourceAlign() :
II.getDestAlign()).valueOrOne();
3372 commonAlignment(OtherAlign, OtherOffset.zextOrTrunc(64).getZExtValue());
3380 Value *OurPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
3381 Type *SizeTy =
II.getLength()->getType();
3382 Constant *
Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset);
3384 Value *DestPtr, *SrcPtr;
3389 DestAlign = SliceAlign;
3391 SrcAlign = OtherAlign;
3394 DestAlign = OtherAlign;
3396 SrcAlign = SliceAlign;
3398 CallInst *
New = IRB.CreateMemCpy(DestPtr, DestAlign, SrcPtr, SrcAlign,
3401 New->setAAMetadata(AATags.
shift(NewBeginOffset - BeginOffset));
3406 &
II, New, DestPtr,
nullptr,
DL);
3411 SliceSize * 8, &
II, New, DestPtr,
nullptr,
DL);
3417 bool IsWholeAlloca = NewBeginOffset == NewAllocaBeginOffset &&
3418 NewEndOffset == NewAllocaEndOffset;
3420 unsigned BeginIndex = VecTy ? getIndex(NewBeginOffset) : 0;
3421 unsigned EndIndex = VecTy ? getIndex(NewEndOffset) : 0;
3422 unsigned NumElements = EndIndex - BeginIndex;
3429 if (VecTy && !IsWholeAlloca) {
3430 if (NumElements == 1)
3431 OtherTy = VecTy->getElementType();
3434 }
else if (IntTy && !IsWholeAlloca) {
3437 OtherTy = NewAllocaTy;
3451 DstPtr = getPtrToNewAI(
II.getDestAddressSpace(),
II.isVolatile());
3455 SrcPtr = getPtrToNewAI(
II.getSourceAddressSpace(),
II.isVolatile());
3459 if (VecTy && !IsWholeAlloca && !IsDest) {
3463 }
else if (IntTy && !IsWholeAlloca && !IsDest) {
3470 LoadInst *
Load = IRB.CreateAlignedLoad(OtherTy, SrcPtr, SrcAlign,
3471 II.isVolatile(),
"copyload");
3472 Load->copyMetadata(
II, {LLVMContext::MD_mem_parallel_loop_access,
3473 LLVMContext::MD_access_group});
3480 if (VecTy && !IsWholeAlloca && IsDest) {
3484 }
else if (IntTy && !IsWholeAlloca && IsDest) {
3494 IRB.CreateAlignedStore(Src, DstPtr, DstAlign,
II.isVolatile()));
3495 Store->copyMetadata(
II, {LLVMContext::MD_mem_parallel_loop_access,
3496 LLVMContext::MD_access_group});
3499 Src->getType(),
DL));
3505 Store, DstPtr, Src,
DL);
3510 &
II, Store, DstPtr, Src,
DL);
3514 return !
II.isVolatile();
3518 assert((
II.isLifetimeStartOrEnd() ||
II.isLaunderOrStripInvariantGroup() ||
3519 II.isDroppable()) &&
3520 "Unexpected intrinsic!");
3524 Pass.DeadInsts.push_back(&
II);
3526 if (
II.isDroppable()) {
3527 assert(
II.getIntrinsicID() == Intrinsic::assume &&
"Expected assume");
3533 if (
II.isLaunderOrStripInvariantGroup())
3536 assert(
II.getArgOperand(1) == OldPtr);
3544 if (NewBeginOffset != NewAllocaBeginOffset ||
3545 NewEndOffset != NewAllocaEndOffset)
3549 ConstantInt::get(cast<IntegerType>(
II.getArgOperand(0)->getType()),
3550 NewEndOffset - NewBeginOffset);
3556 if (
II.getIntrinsicID() == Intrinsic::lifetime_start)
3574 Uses.push_back(&Root);
3578 if (
LoadInst *LI = dyn_cast<LoadInst>(
I)) {
3582 if (
StoreInst *SI = dyn_cast<StoreInst>(
I)) {
3583 SI->setAlignment(std::min(
SI->getAlign(), getSliceAlign()));
3587 assert(isa<BitCastInst>(
I) || isa<AddrSpaceCastInst>(
I) ||
3588 isa<PHINode>(
I) || isa<SelectInst>(
I) ||
3589 isa<GetElementPtrInst>(
I));
3590 for (
User *U :
I->users())
3591 if (Visited.
insert(cast<Instruction>(U)).second)
3592 Uses.push_back(cast<Instruction>(U));
3593 }
while (!
Uses.empty());
3596 bool visitPHINode(
PHINode &PN) {
3598 assert(BeginOffset >= NewAllocaBeginOffset &&
"PHIs are unsplittable");
3599 assert(EndOffset <= NewAllocaEndOffset &&
"PHIs are unsplittable");
3606 if (isa<PHINode>(OldPtr))
3608 OldPtr->
getParent()->getFirstInsertionPt());
3610 IRB.SetInsertPoint(OldPtr);
3611 IRB.SetCurrentDebugLocation(OldPtr->
getDebugLoc());
3613 Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
3615 std::replace(PN.
op_begin(), PN.
op_end(), cast<Value>(OldPtr), NewPtr);
3618 deleteIfTriviallyDead(OldPtr);
3621 fixLoadStoreAlign(PN);
3632 assert((
SI.getTrueValue() == OldPtr ||
SI.getFalseValue() == OldPtr) &&
3633 "Pointer isn't an operand!");
3634 assert(BeginOffset >= NewAllocaBeginOffset &&
"Selects are unsplittable");
3635 assert(EndOffset <= NewAllocaEndOffset &&
"Selects are unsplittable");
3637 Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
3639 if (
SI.getOperand(1) == OldPtr)
3640 SI.setOperand(1, NewPtr);
3641 if (
SI.getOperand(2) == OldPtr)
3642 SI.setOperand(2, NewPtr);
3645 deleteIfTriviallyDead(OldPtr);
3648 fixLoadStoreAlign(SI);
3663class AggLoadStoreRewriter :
public InstVisitor<AggLoadStoreRewriter, bool> {
3683 AggLoadStoreRewriter(
const DataLayout &
DL, IRBuilderTy &IRB)
3684 :
DL(
DL), IRB(IRB) {}
3691 bool Changed =
false;
3692 while (!
Queue.empty()) {
3693 U =
Queue.pop_back_val();
3694 Changed |=
visit(cast<Instruction>(
U->getUser()));
3703 for (
Use &U :
I.uses())
3704 if (Visited.
insert(
U.getUser()).second)
3705 Queue.push_back(&U);
3709 bool visitInstruction(
Instruction &
I) {
return false; }
3712 template <
typename Derived>
class OpSplitter {
3744 BaseAlign(BaseAlign),
DL(
DL) {
3745 IRB.SetInsertPoint(InsertionPoint);
3765 return static_cast<Derived *
>(
this)->emitFunc(
3769 if (
ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
3770 unsigned OldSize = Indices.
size();
3772 for (
unsigned Idx = 0,
Size = ATy->getNumElements();
Idx !=
Size;
3774 assert(Indices.
size() == OldSize &&
"Did not return to the old size");
3777 emitSplitOps(ATy->getElementType(), Agg,
Name +
"." +
Twine(
Idx));
3784 if (
StructType *STy = dyn_cast<StructType>(Ty)) {
3785 unsigned OldSize = Indices.
size();
3787 for (
unsigned Idx = 0,
Size = STy->getNumElements();
Idx !=
Size;
3789 assert(Indices.
size() == OldSize &&
"Did not return to the old size");
3803 struct LoadOpSplitter :
public OpSplitter<LoadOpSplitter> {
3809 : OpSplitter<LoadOpSplitter>(InsertionPoint,
Ptr,
BaseTy, BaseAlign,
DL,
3819 IRB.CreateInBoundsGEP(
BaseTy,
Ptr, GEPIndices,
Name +
".gep");
3821 IRB.CreateAlignedLoad(Ty,
GEP, Alignment,
Name +
".load");
3824 DL.getIndexSizeInBits(
Ptr->getType()->getPointerAddressSpace()), 0);
3827 Load->setAAMetadata(
3830 Agg = IRB.CreateInsertValue(Agg, Load, Indices,
Name +
".insert");
3852 struct StoreOpSplitter :
public OpSplitter<StoreOpSplitter> {
3856 : OpSplitter<StoreOpSplitter>(InsertionPoint,
Ptr,
BaseTy, BaseAlign,
3858 AATags(AATags), AggStore(AggStore) {}
3869 Value *ExtractValue =
3870 IRB.CreateExtractValue(Agg, Indices,
Name +
".extract");
3871 Value *InBoundsGEP =
3872 IRB.CreateInBoundsGEP(
BaseTy,
Ptr, GEPIndices,
Name +
".gep");
3874 IRB.CreateAlignedStore(ExtractValue, InBoundsGEP, Alignment);
3877 DL.getIndexSizeInBits(
Ptr->getType()->getPointerAddressSpace()), 0);
3889 if (
auto *OldAI = dyn_cast<AllocaInst>(
Base)) {
3891 DL.getTypeSizeInBits(
Store->getValueOperand()->getType());
3893 SizeInBits, AggStore, Store,
3894 Store->getPointerOperand(),
Store->getValueOperand(),
3899 "AT: unexpected debug.assign linked to store through "
3907 if (!
SI.isSimple() ||
SI.getPointerOperand() != *U)
3910 if (
V->getType()->isSingleValueType())
3915 StoreOpSplitter Splitter(&SI, *U,
V->getType(),
SI.getAAMetadata(), &SI,
3917 Splitter.emitSplitOps(
V->getType(), V,
V->getName() +
".fca");
3922 SI.eraseFromParent();
3945 if (
auto *SI = dyn_cast<SelectInst>(
Op)) {
3956 if (!isa<ConstantInt>(
Op))
3964 dbgs() <<
" original: " << *Sel <<
"\n";
3965 dbgs() <<
" " << GEPI <<
"\n";);
3967 auto GetNewOps = [&](
Value *SelOp) {
3977 Value *True = Sel->getTrueValue();
3978 Value *False = Sel->getFalseValue();
3982 IRB.SetInsertPoint(&GEPI);
3986 Value *NTrue = IRB.CreateGEP(Ty, TrueOps[0],
ArrayRef(TrueOps).drop_front(),
3987 True->
getName() +
".sroa.gep", NW);
3990 IRB.CreateGEP(Ty, FalseOps[0],
ArrayRef(FalseOps).drop_front(),
3991 False->
getName() +
".sroa.gep", NW);
3993 Value *NSel = IRB.CreateSelect(Sel->getCondition(), NTrue, NFalse,
3994 Sel->getName() +
".sroa.sel");
3995 Visited.
erase(&GEPI);
4000 enqueueUsers(*NSelI);
4003 dbgs() <<
" " << *NFalse <<
"\n";
4004 dbgs() <<
" " << *NSel <<
"\n";);
4018 auto IsInvalidPointerOperand = [](
Value *
V) {
4019 if (!isa<Instruction>(V))
4021 if (
auto *AI = dyn_cast<AllocaInst>(V))
4022 return !AI->isStaticAlloca();
4026 if (
any_of(
Phi->operands(), IsInvalidPointerOperand))
4035 if (
auto *SI = dyn_cast<PHINode>(
Op)) {
4041 [](
Value *V) { return isa<ConstantInt>(V); }))
4046 if (!isa<ConstantInt>(
Op))
4054 dbgs() <<
" original: " << *
Phi <<
"\n";
4055 dbgs() <<
" " << GEPI <<
"\n";);
4057 auto GetNewOps = [&](
Value *PhiOp) {
4067 IRB.SetInsertPoint(Phi);
4069 Phi->getName() +
".sroa.phi");
4075 for (
unsigned I = 0,
E =
Phi->getNumIncomingValues();
I !=
E; ++
I) {
4084 IRB.CreateGEP(SourceTy, NewOps[0],
ArrayRef(NewOps).drop_front(),
4090 Visited.
erase(&GEPI);
4094 enqueueUsers(*NewPhi);
4100 dbgs() <<
"\n " << *NewPhi <<
'\n');
4106 if (unfoldGEPSelect(GEPI))
4109 if (unfoldGEPPhi(GEPI))
4116 bool visitPHINode(
PHINode &PN) {
4138 uint64_t AllocSize =
DL.getTypeAllocSize(Ty).getFixedValue();
4142 if (
ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) {
4143 InnerTy = ArrTy->getElementType();
4144 }
else if (
StructType *STy = dyn_cast<StructType>(Ty)) {
4147 InnerTy = STy->getElementType(
Index);
4152 if (AllocSize >
DL.getTypeAllocSize(InnerTy).getFixedValue() ||
4153 TypeSize >
DL.getTypeSizeInBits(InnerTy).getFixedValue())
4174 if (
Offset == 0 &&
DL.getTypeAllocSize(Ty).getFixedValue() ==
Size)
4176 if (
Offset >
DL.getTypeAllocSize(Ty).getFixedValue() ||
4177 (
DL.getTypeAllocSize(Ty).getFixedValue() -
Offset) <
Size)
4180 if (isa<ArrayType>(Ty) || isa<VectorType>(Ty)) {
4183 if (
auto *AT = dyn_cast<ArrayType>(Ty)) {
4184 ElementTy = AT->getElementType();
4185 TyNumElements = AT->getNumElements();
4189 auto *VT = cast<FixedVectorType>(Ty);
4190 ElementTy = VT->getElementType();
4191 TyNumElements = VT->getNumElements();
4193 uint64_t ElementSize =
DL.getTypeAllocSize(ElementTy).getFixedValue();
4195 if (NumSkippedElements >= TyNumElements)
4197 Offset -= NumSkippedElements * ElementSize;
4209 if (
Size == ElementSize)
4213 if (NumElements * ElementSize !=
Size)
4215 return ArrayType::get(ElementTy, NumElements);
4237 uint64_t ElementSize =
DL.getTypeAllocSize(ElementTy).getFixedValue();
4238 if (
Offset >= ElementSize)
4249 if (
Size == ElementSize)
4256 if (
Index == EndIndex)
4305bool SROA::presplitLoadsAndStores(
AllocaInst &AI, AllocaSlices &AS) {
4319 struct SplitOffsets {
4321 std::vector<uint64_t> Splits;
4338 LLVM_DEBUG(
dbgs() <<
" Searching for candidate loads and stores\n");
4339 for (
auto &
P : AS.partitions()) {
4340 for (Slice &S :
P) {
4341 Instruction *
I = cast<Instruction>(S.getUse()->getUser());
4342 if (!S.isSplittable() || S.endOffset() <=
P.endOffset()) {
4346 if (
auto *LI = dyn_cast<LoadInst>(
I))
4347 UnsplittableLoads.
insert(LI);
4348 else if (
auto *SI = dyn_cast<StoreInst>(
I))
4349 if (
auto *LI = dyn_cast<LoadInst>(
SI->getValueOperand()))
4350 UnsplittableLoads.
insert(LI);
4353 assert(
P.endOffset() > S.beginOffset() &&
4354 "Empty or backwards partition!");
4357 if (
auto *LI = dyn_cast<LoadInst>(
I)) {
4363 auto IsLoadSimplyStored = [](
LoadInst *LI) {
4365 auto *
SI = dyn_cast<StoreInst>(LU);
4366 if (!SI || !
SI->isSimple())
4371 if (!IsLoadSimplyStored(LI)) {
4372 UnsplittableLoads.
insert(LI);
4377 }
else if (
auto *SI = dyn_cast<StoreInst>(
I)) {
4378 if (S.getUse() != &
SI->getOperandUse(
SI->getPointerOperandIndex()))
4381 auto *StoredLoad = dyn_cast<LoadInst>(
SI->getValueOperand());
4382 if (!StoredLoad || !StoredLoad->isSimple())
4384 assert(!
SI->isVolatile() &&
"Cannot split volatile stores!");
4394 auto &
Offsets = SplitOffsetsMap[
I];
4396 "Should not have splits the first time we see an instruction!");
4398 Offsets.Splits.push_back(
P.endOffset() - S.beginOffset());
4403 for (Slice *S :
P.splitSliceTails()) {
4404 auto SplitOffsetsMapI =
4405 SplitOffsetsMap.
find(cast<Instruction>(S->getUse()->getUser()));
4406 if (SplitOffsetsMapI == SplitOffsetsMap.
end())
4408 auto &
Offsets = SplitOffsetsMapI->second;
4412 "Cannot have an empty set of splits on the second partition!");
4414 P.beginOffset() -
Offsets.S->beginOffset() &&
4415 "Previous split does not end where this one begins!");
4419 if (S->endOffset() >
P.endOffset())
4431 auto *LI = cast<LoadInst>(
SI->getValueOperand());
4434 if (UnsplittableLoads.
count(LI))
4437 auto LoadOffsetsI = SplitOffsetsMap.
find(LI);
4438 if (LoadOffsetsI == SplitOffsetsMap.
end())
4440 auto &LoadOffsets = LoadOffsetsI->second;
4443 auto &StoreOffsets = SplitOffsetsMap[
SI];
4448 if (LoadOffsets.Splits == StoreOffsets.Splits)
4452 <<
" " << *LI <<
"\n"
4453 <<
" " << *SI <<
"\n");
4459 UnsplittableLoads.
insert(LI);
4467 auto *LI = cast<LoadInst>(
SI->getValueOperand());
4468 return UnsplittableLoads.
count(LI);
4473 return UnsplittableLoads.
count(LI);
4483 IRBuilderTy IRB(&AI);
4501 std::vector<LoadInst *> SplitLoads;
4506 auto &
Offsets = SplitOffsetsMap[LI];
4507 unsigned SliceSize =
Offsets.S->endOffset() -
Offsets.S->beginOffset();
4509 "Load must have type size equal to store size");
4511 "Load must be >= slice size");
4514 assert(BaseOffset + SliceSize > BaseOffset &&
4515 "Cannot represent alloca access size using 64-bit integers!");
4518 IRB.SetInsertPoint(LI);
4528 LoadInst *PLoad = IRB.CreateAlignedLoad(
4531 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
4532 PartPtrTy,
BasePtr->getName() +
"."),
4535 PLoad->
copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
4536 LLVMContext::MD_access_group});
4540 SplitLoads.push_back(PLoad);
4544 Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
4548 <<
", " << NewSlices.
back().endOffset()
4549 <<
"): " << *PLoad <<
"\n");
4564 bool DeferredStores =
false;
4567 if (!Stores.
empty() && SplitOffsetsMap.
count(SI)) {
4568 DeferredStores =
true;
4574 Value *StoreBasePtr =
SI->getPointerOperand();
4575 IRB.SetInsertPoint(SI);
4578 LLVM_DEBUG(
dbgs() <<
" Splitting store of load: " << *SI <<
"\n");
4583 auto *PartPtrTy =
SI->getPointerOperandType();
4585 auto AS =
SI->getPointerAddressSpace();
4586 StoreInst *PStore = IRB.CreateAlignedStore(
4589 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
4590 PartPtrTy, StoreBasePtr->
getName() +
"."),
4593 PStore->
copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access,
4594 LLVMContext::MD_access_group,
4595 LLVMContext::MD_DIAssignID});
4600 LLVM_DEBUG(
dbgs() <<
" +" << PartOffset <<
":" << *PStore <<
"\n");
4607 if (
AllocaInst *OtherAI = dyn_cast<AllocaInst>(StoreBasePtr)) {
4608 ResplitPromotableAllocas.
insert(OtherAI);
4609 Worklist.insert(OtherAI);
4610 }
else if (
AllocaInst *OtherAI = dyn_cast<AllocaInst>(
4612 Worklist.insert(OtherAI);
4616 DeadInsts.push_back(SI);
4621 SplitLoadsMap.
insert(std::make_pair(LI, std::move(SplitLoads)));
4624 DeadInsts.push_back(LI);
4634 auto *LI = cast<LoadInst>(
SI->getValueOperand());
4638 assert(StoreSize > 0 &&
"Cannot have a zero-sized integer store!");
4642 "Slice size should always match load size exactly!");
4644 assert(BaseOffset + StoreSize > BaseOffset &&
4645 "Cannot represent alloca access size using 64-bit integers!");
4648 Instruction *StoreBasePtr = cast<Instruction>(
SI->getPointerOperand());
4653 auto SplitLoadsMapI = SplitLoadsMap.
find(LI);
4654 std::vector<LoadInst *> *SplitLoads =
nullptr;
4655 if (SplitLoadsMapI != SplitLoadsMap.
end()) {
4656 SplitLoads = &SplitLoadsMapI->second;
4658 "Too few split loads for the number of splits in the store!");
4668 auto *StorePartPtrTy =
SI->getPointerOperandType();
4673 PLoad = (*SplitLoads)[
Idx];
4675 IRB.SetInsertPoint(LI);
4677 PLoad = IRB.CreateAlignedLoad(
4680 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
4681 LoadPartPtrTy, LoadBasePtr->
getName() +
"."),
4684 PLoad->
copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
4685 LLVMContext::MD_access_group});
4689 IRB.SetInsertPoint(SI);
4690 auto AS =
SI->getPointerAddressSpace();
4691 StoreInst *PStore = IRB.CreateAlignedStore(
4694 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
4695 StorePartPtrTy, StoreBasePtr->
getName() +
"."),
4698 PStore->
copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access,
4699 LLVMContext::MD_access_group});
4703 Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
4707 <<
", " << NewSlices.
back().endOffset()
4708 <<
"): " << *PStore <<
"\n");
4729 if (
AllocaInst *OtherAI = dyn_cast<AllocaInst>(LoadBasePtr)) {
4730 assert(OtherAI != &AI &&
"We can't re-split our own alloca!");
4731 ResplitPromotableAllocas.
insert(OtherAI);
4732 Worklist.insert(OtherAI);
4733 }
else if (
AllocaInst *OtherAI = dyn_cast<AllocaInst>(
4735 assert(OtherAI != &AI &&
"We can't re-split our own alloca!");
4736 Worklist.insert(OtherAI);
4751 DeadInsts.push_back(LI);
4753 DeadInsts.push_back(SI);
4762 AS.insert(NewSlices);
4766 for (
auto I = AS.begin(), E = AS.end();
I != E; ++
I)
4773 return ResplitPromotableAllocas.
count(AI);
4794 Type *SliceTy =
nullptr;
4797 std::pair<Type *, IntegerType *> CommonUseTy =
4800 if (CommonUseTy.first)