45#include "llvm/Config/llvm-config.h"
97using namespace llvm::sroa;
99#define DEBUG_TYPE "sroa"
101STATISTIC(NumAllocasAnalyzed,
"Number of allocas analyzed for replacement");
102STATISTIC(NumAllocaPartitions,
"Number of alloca partitions formed");
103STATISTIC(MaxPartitionsPerAlloca,
"Maximum number of partitions per alloca");
104STATISTIC(NumAllocaPartitionUses,
"Number of alloca partition uses rewritten");
105STATISTIC(MaxUsesPerAllocaPartition,
"Maximum number of uses of a partition");
106STATISTIC(NumNewAllocas,
"Number of new, smaller allocas introduced");
107STATISTIC(NumPromoted,
"Number of allocas promoted to SSA values");
108STATISTIC(NumLoadsSpeculated,
"Number of loads speculated to allow promotion");
110 "Number of loads rewritten into predicated loads to allow promotion");
113 "Number of stores rewritten into predicated loads to allow promotion");
115STATISTIC(NumVectorized,
"Number of vectorized aggregates");
134static std::optional<DIExpression::FragmentInfo>
135calculateFragment(
uint64_t NewStorageSliceOffsetInBits,
137 std::optional<DIExpression::FragmentInfo> StorageFragment,
138 std::optional<DIExpression::FragmentInfo> CurrentFragment) {
142 if (StorageFragment) {
144 std::min(NewStorageSliceSizeInBits, StorageFragment->SizeInBits);
146 NewStorageSliceOffsetInBits + StorageFragment->OffsetInBits;
148 Target.SizeInBits = NewStorageSliceSizeInBits;
149 Target.OffsetInBits = NewStorageSliceOffsetInBits;
154 if (!CurrentFragment || *CurrentFragment ==
Target)
160 if (
Target.startInBits() < CurrentFragment->startInBits() ||
161 Target.endInBits() > CurrentFragment->endInBits())
187static void migrateDebugInfo(
AllocaInst *OldAlloca,
bool IsSplit,
194 if (MarkerRange.empty())
200 LLVM_DEBUG(
dbgs() <<
" OldAllocaOffsetInBits: " << OldAllocaOffsetInBits
202 LLVM_DEBUG(
dbgs() <<
" SliceSizeInBits: " << SliceSizeInBits <<
"\n");
213 BaseFragments[getAggregateVariable(DAI)] =
214 DAI->getExpression()->getFragmentInfo();
227 auto *Expr = DbgAssign->getExpression();
230 std::optional<DIExpression::FragmentInfo> BaseFragment = std::nullopt;
232 auto R = BaseFragments.
find(getAggregateVariable(DbgAssign));
233 if (R == BaseFragments.
end())
235 BaseFragment =
R->second;
237 std::optional<DIExpression::FragmentInfo> CurrentFragment =
238 Expr->getFragmentInfo();
239 std::optional<DIExpression::FragmentInfo> NewFragment =
240 calculateFragment(OldAllocaOffsetInBits, SliceSizeInBits,
241 BaseFragment, CurrentFragment);
247 if (!(NewFragment == CurrentFragment)) {
248 if (CurrentFragment) {
253 NewFragment->OffsetInBits -= CurrentFragment->OffsetInBits;
257 Expr, NewFragment->OffsetInBits, NewFragment->SizeInBits);
258 assert(
E &&
"Failed to create fragment expr!");
266 Inst->
setMetadata(LLVMContext::MD_DIAssignID, NewID);
270 auto *NewAssign = DIB.insertDbgAssign(
271 Inst,
Value, DbgAssign->getVariable(), Expr, Dest,
272 DIExpression::get(Ctx, std::nullopt), DbgAssign->getDebugLoc());
287 NewAssign->moveBefore(DbgAssign);
289 NewAssign->setDebugLoc(DbgAssign->getDebugLoc());
290 LLVM_DEBUG(
dbgs() <<
"Created new assign intrinsic: " << *NewAssign
338 : BeginOffset(BeginOffset), EndOffset(EndOffset),
339 UseAndIsSplittable(
U, IsSplittable) {}
341 uint64_t beginOffset()
const {
return BeginOffset; }
342 uint64_t endOffset()
const {
return EndOffset; }
344 bool isSplittable()
const {
return UseAndIsSplittable.
getInt(); }
345 void makeUnsplittable() { UseAndIsSplittable.
setInt(
false); }
347 Use *getUse()
const {
return UseAndIsSplittable.
getPointer(); }
349 bool isDead()
const {
return getUse() ==
nullptr; }
350 void kill() { UseAndIsSplittable.
setPointer(
nullptr); }
359 if (beginOffset() <
RHS.beginOffset())
361 if (beginOffset() >
RHS.beginOffset())
363 if (isSplittable() !=
RHS.isSplittable())
364 return !isSplittable();
365 if (endOffset() >
RHS.endOffset())
373 return LHS.beginOffset() < RHSOffset;
377 return LHSOffset <
RHS.beginOffset();
381 return isSplittable() ==
RHS.isSplittable() &&
382 beginOffset() ==
RHS.beginOffset() && endOffset() ==
RHS.endOffset();
431 int OldSize = Slices.
size();
433 auto SliceI = Slices.
begin() + OldSize;
435 std::inplace_merge(Slices.
begin(), SliceI, Slices.
end());
440 class partition_iterator;
448 return DeadUseIfPromotable;
459#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
471 template <
typename DerivedT,
typename RetT =
void>
class BuilderBase;
476#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
537 uint64_t BeginOffset = 0, EndOffset = 0;
564 assert(BeginOffset < EndOffset &&
"Partitions must span some bytes!");
565 return EndOffset - BeginOffset;
582 iterator
end()
const {
return SJ; }
616 uint64_t MaxSplitSliceEndOffset = 0;
632 assert((
P.SI != SE || !
P.SplitTails.empty()) &&
633 "Cannot advance past the end of the slices!");
636 if (!
P.SplitTails.empty()) {
637 if (
P.EndOffset >= MaxSplitSliceEndOffset) {
639 P.SplitTails.clear();
640 MaxSplitSliceEndOffset = 0;
646 [&](Slice *S) { return S->endOffset() <= P.EndOffset; });
649 return S->endOffset() == MaxSplitSliceEndOffset;
651 "Could not find the current max split slice offset!");
654 return S->endOffset() <= MaxSplitSliceEndOffset;
656 "Max split slice end offset is not actually the max!");
663 assert(
P.SplitTails.empty() &&
"Failed to clear the split slices!");
673 if (S.isSplittable() && S.endOffset() >
P.EndOffset) {
674 P.SplitTails.push_back(&S);
675 MaxSplitSliceEndOffset =
676 std::max(S.endOffset(), MaxSplitSliceEndOffset);
684 P.BeginOffset =
P.EndOffset;
685 P.EndOffset = MaxSplitSliceEndOffset;
692 if (!
P.SplitTails.empty() &&
P.SI->beginOffset() !=
P.EndOffset &&
693 !
P.SI->isSplittable()) {
694 P.BeginOffset =
P.EndOffset;
695 P.EndOffset =
P.SI->beginOffset();
705 P.BeginOffset =
P.SplitTails.empty() ?
P.SI->beginOffset() :
P.EndOffset;
706 P.EndOffset =
P.SI->endOffset();
711 if (!
P.SI->isSplittable()) {
714 assert(
P.BeginOffset ==
P.SI->beginOffset());
718 while (
P.SJ != SE &&
P.SJ->beginOffset() <
P.EndOffset) {
719 if (!
P.SJ->isSplittable())
720 P.EndOffset = std::max(
P.EndOffset,
P.SJ->endOffset());
732 assert(
P.SI->isSplittable() &&
"Forming a splittable partition!");
735 while (
P.SJ != SE &&
P.SJ->beginOffset() <
P.EndOffset &&
736 P.SJ->isSplittable()) {
737 P.EndOffset = std::max(
P.EndOffset,
P.SJ->endOffset());
744 if (
P.SJ != SE &&
P.SJ->beginOffset() <
P.EndOffset) {
746 P.EndOffset =
P.SJ->beginOffset();
753 "End iterators don't match between compared partition iterators!");
760 if (
P.SI ==
RHS.P.SI &&
P.SplitTails.empty() ==
RHS.P.SplitTails.empty()) {
762 "Same set of slices formed two different sized partitions!");
763 assert(
P.SplitTails.size() ==
RHS.P.SplitTails.size() &&
764 "Same slice position with differently sized non-empty split "
788 partition_iterator(
end(),
end()));
795 if (
ConstantInt *CI = dyn_cast<ConstantInt>(
SI.getCondition()))
796 return SI.getOperand(1 + CI->isZero());
797 if (
SI.getOperand(1) ==
SI.getOperand(2))
798 return SI.getOperand(1);
805 if (
PHINode *PN = dyn_cast<PHINode>(&
I)) {
807 return PN->hasConstantValue();
834 AllocSize(
DL.getTypeAllocSize(AI.getAllocatedType()).getFixedValue()),
839 if (VisitedDeadInsts.
insert(&
I).second)
844 bool IsSplittable =
false) {
850 <<
" which has zero size or starts outside of the "
851 << AllocSize <<
" byte alloca:\n"
852 <<
" alloca: " << AS.AI <<
"\n"
853 <<
" use: " <<
I <<
"\n");
854 return markAsDead(
I);
866 assert(AllocSize >= BeginOffset);
867 if (
Size > AllocSize - BeginOffset) {
869 <<
Offset <<
" to remain within the " << AllocSize
871 <<
" alloca: " << AS.AI <<
"\n"
872 <<
" use: " <<
I <<
"\n");
873 EndOffset = AllocSize;
876 AS.Slices.
push_back(Slice(BeginOffset, EndOffset, U, IsSplittable));
881 return markAsDead(BC);
883 return Base::visitBitCastInst(BC);
888 return markAsDead(ASC);
890 return Base::visitAddrSpaceCastInst(ASC);
895 return markAsDead(GEPI);
910 ConstantInt *OpC = dyn_cast<ConstantInt>(GTI.getOperand());
915 if (
StructType *STy = GTI.getStructTypeOrNull()) {
927 DL.getTypeAllocSize(GTI.getIndexedType()).getFixedValue());
933 if (GEPOffset.
ugt(AllocSize))
934 return markAsDead(GEPI);
938 return Base::visitGetElementPtrInst(GEPI);
954 "All simple FCA loads should have been pre-split");
957 return PI.setAborted(&LI);
960 if (
Size.isScalable())
961 return PI.setAborted(&LI);
968 Value *ValOp =
SI.getValueOperand();
970 return PI.setEscapedAndAborted(&SI);
972 return PI.setAborted(&SI);
976 return PI.setAborted(&SI);
989 <<
Offset <<
" which extends past the end of the "
990 << AllocSize <<
" byte alloca:\n"
991 <<
" alloca: " << AS.AI <<
"\n"
992 <<
" use: " << SI <<
"\n");
993 return markAsDead(SI);
997 "All simple FCA stores should have been pre-split");
1005 (IsOffsetKnown &&
Offset.uge(AllocSize)))
1007 return markAsDead(II);
1010 return PI.setAborted(&II);
1013 : AllocSize -
Offset.getLimitedValue(),
1021 return markAsDead(II);
1025 if (VisitedDeadInsts.
count(&II))
1029 return PI.setAborted(&II);
1036 if (
Offset.uge(AllocSize)) {
1038 MemTransferSliceMap.
find(&II);
1039 if (MTPI != MemTransferSliceMap.
end())
1040 AS.Slices[MTPI->second].kill();
1041 return markAsDead(II);
1052 return markAsDead(II);
1061 std::tie(MTPI, Inserted) =
1062 MemTransferSliceMap.
insert(std::make_pair(&II, AS.Slices.
size()));
1063 unsigned PrevIdx = MTPI->second;
1065 Slice &PrevP = AS.Slices[PrevIdx];
1069 if (!II.
isVolatile() && PrevP.beginOffset() == RawOffset) {
1071 return markAsDead(II);
1076 PrevP.makeUnsplittable();
1083 assert(AS.Slices[PrevIdx].getUse()->getUser() == &II &&
1084 "Map index doesn't point back to a slice with this user.");
1093 AS.DeadUseIfPromotable.push_back(U);
1098 return PI.setAborted(&II);
1103 Length->getLimitedValue());
1113 Base::visitIntrinsicInst(II);
1124 Uses.push_back(std::make_pair(cast<Instruction>(*U), Root));
1131 std::tie(UsedI,
I) =
Uses.pop_back_val();
1133 if (
LoadInst *LI = dyn_cast<LoadInst>(
I)) {
1135 std::max(
Size,
DL.getTypeStoreSize(LI->
getType()).getFixedValue());
1138 if (
StoreInst *SI = dyn_cast<StoreInst>(
I)) {
1143 std::max(
Size,
DL.getTypeStoreSize(
Op->getType()).getFixedValue());
1148 if (!
GEP->hasAllZeroIndices())
1150 }
else if (!isa<BitCastInst>(
I) && !isa<PHINode>(
I) &&
1151 !isa<SelectInst>(
I) && !isa<AddrSpaceCastInst>(
I)) {
1155 for (
User *U :
I->users())
1156 if (Visited.
insert(cast<Instruction>(U)).second)
1157 Uses.push_back(std::make_pair(
I, cast<Instruction>(U)));
1158 }
while (!
Uses.empty());
1164 assert(isa<PHINode>(
I) || isa<SelectInst>(
I));
1166 return markAsDead(
I);
1171 if (isa<PHINode>(
I) &&
1172 I.getParent()->getFirstInsertionPt() ==
I.getParent()->end())
1173 return PI.setAborted(&
I);
1191 AS.DeadOperands.push_back(U);
1197 return PI.setAborted(&
I);
1204 return PI.setAborted(UnsafeI);
1213 if (
Offset.uge(AllocSize)) {
1214 AS.DeadOperands.push_back(U);
1221 void visitPHINode(
PHINode &PN) { visitPHINodeOrSelectInst(PN); }
1223 void visitSelectInst(
SelectInst &SI) { visitPHINodeOrSelectInst(SI); }
1231#
if !defined(
NDEBUG) || defined(LLVM_ENABLE_DUMP)
1234 PointerEscapingInstr(nullptr) {
1235 SliceBuilder
PB(
DL, AI, *
this);
1236 SliceBuilder::PtrInfo PtrI =
PB.visitPtr(AI);
1237 if (PtrI.isEscaped() || PtrI.isAborted()) {
1240 PointerEscapingInstr = PtrI.getEscapingInst() ? PtrI.getEscapingInst()
1241 : PtrI.getAbortingInst();
1242 assert(PointerEscapingInstr &&
"Did not track a bad instruction");
1246 llvm::erase_if(Slices, [](
const Slice &S) {
return S.isDead(); });
1253#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1264 OS << Indent <<
"[" <<
I->beginOffset() <<
"," <<
I->endOffset() <<
")"
1265 <<
" slice #" << (
I -
begin())
1266 << (
I->isSplittable() ?
" (splittable)" :
"");
1271 OS << Indent <<
" used by: " << *
I->getUse()->getUser() <<
"\n";
1275 if (PointerEscapingInstr) {
1276 OS <<
"Can't analyze slices for alloca: " << AI <<
"\n"
1277 <<
" A pointer to this alloca escaped by:\n"
1278 <<
" " << *PointerEscapingInstr <<
"\n";
1282 OS <<
"Slices of alloca: " << AI <<
"\n";
1296static std::pair<Type *, IntegerType *>
1300 bool TyIsCommon =
true;
1306 Use *U =
I->getUse();
1307 if (isa<IntrinsicInst>(*U->getUser()))
1309 if (
I->beginOffset() !=
B->beginOffset() ||
I->endOffset() != EndOffset)
1312 Type *UserTy =
nullptr;
1313 if (
LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
1315 }
else if (
StoreInst *
SI = dyn_cast<StoreInst>(U->getUser())) {
1316 UserTy =
SI->getValueOperand()->getType();
1319 if (
IntegerType *UserITy = dyn_cast_or_null<IntegerType>(UserTy)) {
1324 if (UserITy->getBitWidth() % 8 != 0 ||
1325 UserITy->getBitWidth() / 8 > (EndOffset -
B->beginOffset()))
1330 if (!ITy || ITy->
getBitWidth() < UserITy->getBitWidth())
1336 if (!UserTy || (Ty && Ty != UserTy))
1342 return {TyIsCommon ? Ty :
nullptr, ITy};
1373 Type *LoadType =
nullptr;
1375 LoadInst *LI = dyn_cast<LoadInst>(U);
1386 if (LoadType != LI->
getType())
1395 if (BBI->mayWriteToMemory())
1398 MaxAlign = std::max(MaxAlign, LI->
getAlign());
1405 APInt(APWidth,
DL.getTypeStoreSize(LoadType).getFixedValue());
1442 IRB.SetInsertPoint(&PN);
1444 PN.
getName() +
".sroa.speculated");
1474 IRB.SetInsertPoint(TI);
1476 LoadInst *Load = IRB.CreateAlignedLoad(
1477 LoadTy, InVal, Alignment,
1479 ++NumLoadsSpeculated;
1481 Load->setAAMetadata(AATags);
1483 InjectedLoads[Pred] = Load;
1490sroa::SelectHandSpeculativity &
1491sroa::SelectHandSpeculativity::setAsSpeculatable(
bool isTrueVal) {
1493 Bitfield::set<sroa::SelectHandSpeculativity::TrueVal>(Storage,
true);
1495 Bitfield::set<sroa::SelectHandSpeculativity::FalseVal>(Storage,
true);
1499bool sroa::SelectHandSpeculativity::isSpeculatable(
bool isTrueVal)
const {
1501 ? Bitfield::get<sroa::SelectHandSpeculativity::TrueVal>(Storage)
1505bool sroa::SelectHandSpeculativity::areAllSpeculatable()
const {
1506 return isSpeculatable(
true) &&
1507 isSpeculatable(
false);
1510bool sroa::SelectHandSpeculativity::areAnySpeculatable()
const {
1511 return isSpeculatable(
true) ||
1512 isSpeculatable(
false);
1514bool sroa::SelectHandSpeculativity::areNoneSpeculatable()
const {
1515 return !areAnySpeculatable();
1518static sroa::SelectHandSpeculativity
1521 sroa::SelectHandSpeculativity
Spec;
1527 Spec.setAsSpeculatable(
Value ==
SI.getTrueValue());
1534std::optional<sroa::RewriteableMemOps>
1538 for (
User *U :
SI.users()) {
1539 if (
auto *BC = dyn_cast<BitCastInst>(U); BC && BC->
hasOneUse())
1542 if (
auto *Store = dyn_cast<StoreInst>(U)) {
1548 Ops.emplace_back(Store);
1552 auto *LI = dyn_cast<LoadInst>(U);
1565 Ops.emplace_back(Load);
1569 sroa::SelectHandSpeculativity
Spec =
1575 Ops.emplace_back(Load);
1585 Value *TV =
SI.getTrueValue();
1586 Value *FV =
SI.getFalseValue();
1591 IRB.SetInsertPoint(&LI);
1595 TV = IRB.CreateBitOrPointerCast(TV, TypedPtrTy,
"");
1596 FV = IRB.CreateBitOrPointerCast(FV, TypedPtrTy,
"");
1601 LI.
getName() +
".sroa.speculate.load.true");
1604 LI.
getName() +
".sroa.speculate.load.false");
1605 NumLoadsSpeculated += 2;
1617 Value *V = IRB.CreateSelect(
SI.getCondition(), TL, FL,
1618 LI.
getName() +
".sroa.speculated");
1624template <
typename T>
1626 sroa::SelectHandSpeculativity
Spec,
1628 assert((isa<LoadInst>(
I) || isa<StoreInst>(
I)) &&
"Only for load and store!");
1633 if (
Spec.areNoneSpeculatable())
1635 SI.getMetadata(LLVMContext::MD_prof), &DTU);
1638 SI.getMetadata(LLVMContext::MD_prof), &DTU,
1640 if (
Spec.isSpeculatable(
true))
1648 if (isa<LoadInst>(
I))
1651 bool IsThen = SuccBB == HeadBI->getSuccessor(0);
1652 int SuccIdx = IsThen ? 0 : 1;
1653 auto *NewMemOpBB = SuccBB ==
Tail ? Head : SuccBB;
1654 if (NewMemOpBB != Head) {
1655 NewMemOpBB->
setName(Head->
getName() + (IsThen ?
".then" :
".else"));
1656 if (isa<LoadInst>(
I))
1657 ++NumLoadsPredicated;
1659 ++NumStoresPredicated;
1661 ++NumLoadsSpeculated;
1662 auto &CondMemOp = cast<T>(*
I.clone());
1663 CondMemOp.insertBefore(NewMemOpBB->getTerminator());
1665 if (
auto *PtrTy =
Ptr->getType();
1666 !PtrTy->isOpaquePointerTy() &&
1667 PtrTy != CondMemOp.getPointerOperandType())
1669 Ptr, CondMemOp.getPointerOperandType(),
"", &CondMemOp);
1670 CondMemOp.setOperand(
I.getPointerOperandIndex(),
Ptr);
1671 if (isa<LoadInst>(
I)) {
1672 CondMemOp.setName(
I.getName() + (IsThen ?
".then" :
".else") +
".val");
1677 if (isa<LoadInst>(
I)) {
1680 I.replaceAllUsesWith(PN);
1685 sroa::SelectHandSpeculativity
Spec,
1687 if (
auto *LI = dyn_cast<LoadInst>(&
I))
1689 else if (
auto *
SI = dyn_cast<StoreInst>(&
I))
1696 const sroa::RewriteableMemOps &Ops,
1698 bool CFGChanged =
false;
1701 for (
const RewriteableMemOp &Op : Ops) {
1702 sroa::SelectHandSpeculativity
Spec;
1704 if (
auto *
const *US = std::get_if<UnspeculatableStore>(&Op)) {
1707 auto PSL = std::get<PossiblySpeculatableLoad>(Op);
1708 I = PSL.getPointer();
1709 Spec = PSL.getInt();
1711 if (
Spec.areAllSpeculatable()) {
1714 assert(DTU &&
"Should not get here when not allowed to modify the CFG!");
1718 I->eraseFromParent();
1722 cast<BitCastInst>(U)->eraseFromParent();
1723 SI.eraseFromParent();
1731 const Twine &NamePrefix) {
1732 assert(
Ptr->getType()->isOpaquePointerTy() &&
1733 "Only opaque pointers supported");
1735 Ptr = IRB.CreateInBoundsGEP(IRB.getInt8Ty(),
Ptr, IRB.getInt(
Offset),
1736 NamePrefix +
"sroa_idx");
1737 return IRB.CreatePointerBitCastOrAddrSpaceCast(
Ptr,
PointerTy,
1738 NamePrefix +
"sroa_cast");
1759 if (isa<IntegerType>(OldTy) && isa<IntegerType>(NewTy)) {
1762 "We can't have the same bitwidth for different int types");
1766 if (
DL.getTypeSizeInBits(NewTy).getFixedValue() !=
1767 DL.getTypeSizeInBits(OldTy).getFixedValue())
1783 return OldAS == NewAS ||
1784 (!
DL.isNonIntegralAddressSpace(OldAS) &&
1785 !
DL.isNonIntegralAddressSpace(NewAS) &&
1786 DL.getPointerSize(OldAS) ==
DL.getPointerSize(NewAS));
1792 return !
DL.isNonIntegralPointerType(NewTy);
1796 if (!
DL.isNonIntegralPointerType(OldTy))
1816 Type *OldTy = V->getType();
1822 assert(!(isa<IntegerType>(OldTy) && isa<IntegerType>(NewTy)) &&
1823 "Integer types must be the exact same to convert.");
1831 return IRB.CreateIntToPtr(IRB.CreateBitCast(V,
DL.getIntPtrType(NewTy)),
1841 return IRB.CreateBitCast(IRB.CreatePtrToInt(V,
DL.getIntPtrType(OldTy)),
1854 if (OldAS != NewAS) {
1855 assert(
DL.getPointerSize(OldAS) ==
DL.getPointerSize(NewAS));
1856 return IRB.CreateIntToPtr(IRB.CreatePtrToInt(V,
DL.getIntPtrType(OldTy)),
1861 return IRB.CreateBitCast(V, NewTy);
1874 std::max(S.beginOffset(),
P.beginOffset()) -
P.beginOffset();
1875 uint64_t BeginIndex = BeginOffset / ElementSize;
1876 if (BeginIndex * ElementSize != BeginOffset ||
1877 BeginIndex >= cast<FixedVectorType>(Ty)->getNumElements())
1880 std::min(S.endOffset(),
P.endOffset()) -
P.beginOffset();
1881 uint64_t EndIndex = EndOffset / ElementSize;
1882 if (EndIndex * ElementSize != EndOffset ||
1883 EndIndex > cast<FixedVectorType>(Ty)->getNumElements())
1886 assert(EndIndex > BeginIndex &&
"Empty vector!");
1887 uint64_t NumElements = EndIndex - BeginIndex;
1888 Type *SliceTy = (NumElements == 1)
1889 ? Ty->getElementType()
1895 Use *U = S.getUse();
1898 if (
MI->isVolatile())
1900 if (!S.isSplittable())
1902 }
else if (
IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
1905 }
else if (
LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
1912 if (
P.beginOffset() > S.beginOffset() ||
P.endOffset() < S.endOffset()) {
1918 }
else if (
StoreInst *
SI = dyn_cast<StoreInst>(U->getUser())) {
1919 if (
SI->isVolatile())
1921 Type *STy =
SI->getValueOperand()->getType();
1925 if (
P.beginOffset() > S.beginOffset() ||
P.endOffset() < S.endOffset()) {
1945 DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
1949 if (ElementSize % 8)
1951 assert((
DL.getTypeSizeInBits(VTy).getFixedValue() % 8) == 0 &&
1952 "vector size not a multiple of element size?");
1955 for (
const Slice &S :
P)
1959 for (
const Slice *S :
P.splitSliceTails())
1980 Type *CommonEltTy =
nullptr;
1982 bool HaveVecPtrTy =
false;
1983 bool HaveCommonEltTy =
true;
1984 bool HaveCommonVecPtrTy =
true;
1985 auto CheckCandidateType = [&](
Type *Ty) {
1986 if (
auto *VTy = dyn_cast<VectorType>(Ty)) {
1988 if (!CandidateTys.
empty()) {
1990 if (
DL.getTypeSizeInBits(VTy).getFixedValue() !=
1991 DL.getTypeSizeInBits(V).getFixedValue()) {
1992 CandidateTys.
clear();
1997 Type *EltTy = VTy->getElementType();
2000 CommonEltTy = EltTy;
2001 else if (CommonEltTy != EltTy)
2002 HaveCommonEltTy =
false;
2005 HaveVecPtrTy =
true;
2006 if (!CommonVecPtrTy)
2007 CommonVecPtrTy = VTy;
2008 else if (CommonVecPtrTy != VTy)
2009 HaveCommonVecPtrTy =
false;
2014 for (
const Slice &S :
P) {
2016 if (
auto *LI = dyn_cast<LoadInst>(S.getUse()->getUser()))
2018 else if (
auto *
SI = dyn_cast<StoreInst>(S.getUse()->getUser()))
2019 Ty =
SI->getValueOperand()->getType();
2024 if (S.beginOffset() ==
P.beginOffset() && S.endOffset() ==
P.endOffset())
2025 CheckCandidateType(Ty);
2029 for (
Type *Ty : LoadStoreTys) {
2032 unsigned TypeSize =
DL.getTypeSizeInBits(Ty).getFixedValue();
2037 unsigned VectorSize =
DL.getTypeSizeInBits(VTy).getFixedValue();
2038 unsigned ElementSize =
2039 DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
2043 CheckCandidateType(NewVTy);
2049 if (CandidateTys.
empty())
2056 if (HaveVecPtrTy && !HaveCommonVecPtrTy)
2060 if (!HaveCommonEltTy && HaveVecPtrTy) {
2062 CandidateTys.
clear();
2064 }
else if (!HaveCommonEltTy && !HaveVecPtrTy) {
2067 if (!VTy->getElementType()->isIntegerTy())
2069 VTy->getContext(), VTy->getScalarSizeInBits())));
2076 assert(
DL.getTypeSizeInBits(RHSTy).getFixedValue() ==
2077 DL.getTypeSizeInBits(LHSTy).getFixedValue() &&
2078 "Cannot have vector types of different sizes!");
2079 assert(RHSTy->getElementType()->isIntegerTy() &&
2080 "All non-integer types eliminated!");
2081 assert(LHSTy->getElementType()->isIntegerTy() &&
2082 "All non-integer types eliminated!");
2083 return cast<FixedVectorType>(RHSTy)->getNumElements() <
2084 cast<FixedVectorType>(LHSTy)->getNumElements();
2088 assert(
DL.getTypeSizeInBits(RHSTy).getFixedValue() ==
2089 DL.getTypeSizeInBits(LHSTy).getFixedValue() &&
2090 "Cannot have vector types of different sizes!");
2091 assert(RHSTy->getElementType()->isIntegerTy() &&
2092 "All non-integer types eliminated!");
2093 assert(LHSTy->getElementType()->isIntegerTy() &&
2094 "All non-integer types eliminated!");
2095 return cast<FixedVectorType>(RHSTy)->getNumElements() ==
2096 cast<FixedVectorType>(LHSTy)->getNumElements();
2098 llvm::sort(CandidateTys, RankVectorTypesComp);
2099 CandidateTys.erase(std::unique(CandidateTys.begin(), CandidateTys.end(),
2101 CandidateTys.end());
2107 assert(VTy->getElementType() == CommonEltTy &&
2108 "Unaccounted for element type!");
2109 assert(VTy == CandidateTys[0] &&
2110 "Different vector types with the same element type!");
2113 CandidateTys.resize(1);
2119 return cast<FixedVectorType>(VTy)->getNumElements() >
2120 std::numeric_limits<unsigned short>::max();
2138 bool &WholeAllocaOp) {
2141 uint64_t RelBegin = S.beginOffset() - AllocBeginOffset;
2142 uint64_t RelEnd = S.endOffset() - AllocBeginOffset;
2144 Use *U = S.getUse();
2150 if (
IntrinsicInst *II = dyn_cast<IntrinsicInst>(U->getUser())) {
2160 if (
LoadInst *LI = dyn_cast<LoadInst>(U->getUser())) {
2164 if (
DL.getTypeStoreSize(LI->
getType()).getFixedValue() >
Size)
2168 if (S.beginOffset() < AllocBeginOffset)
2173 if (!isa<VectorType>(LI->
getType()) && RelBegin == 0 && RelEnd ==
Size)
2174 WholeAllocaOp =
true;
2176 if (ITy->getBitWidth() <
DL.getTypeStoreSizeInBits(ITy).getFixedValue())
2178 }
else if (RelBegin != 0 || RelEnd !=
Size ||
2184 }
else if (
StoreInst *
SI = dyn_cast<StoreInst>(U->getUser())) {
2185 Type *ValueTy =
SI->getValueOperand()->getType();
2186 if (
SI->isVolatile())
2189 if (
DL.getTypeStoreSize(ValueTy).getFixedValue() >
Size)
2193 if (S.beginOffset() < AllocBeginOffset)
2198 if (!isa<VectorType>(ValueTy) && RelBegin == 0 && RelEnd ==
Size)
2199 WholeAllocaOp =
true;
2200 if (
IntegerType *ITy = dyn_cast<IntegerType>(ValueTy)) {
2201 if (ITy->getBitWidth() <
DL.getTypeStoreSizeInBits(ITy).getFixedValue())
2203 }
else if (RelBegin != 0 || RelEnd !=
Size ||
2209 }
else if (
MemIntrinsic *
MI = dyn_cast<MemIntrinsic>(U->getUser())) {
2210 if (
MI->isVolatile() || !isa<Constant>(
MI->getLength()))
2212 if (!S.isSplittable())
2229 uint64_t SizeInBits =
DL.getTypeSizeInBits(AllocaTy).getFixedValue();
2235 if (SizeInBits !=
DL.getTypeStoreSizeInBits(AllocaTy).getFixedValue())
2253 bool WholeAllocaOp =
P.empty() &&
DL.isLegalInteger(SizeInBits);
2255 for (
const Slice &S :
P)
2260 for (
const Slice *S :
P.splitSliceTails())
2265 return WholeAllocaOp;
2272 IntegerType *IntTy = cast<IntegerType>(V->getType());
2274 DL.getTypeStoreSize(IntTy).getFixedValue() &&
2275 "Element extends past full value");
2277 if (
DL.isBigEndian())
2278 ShAmt = 8 * (
DL.getTypeStoreSize(IntTy).getFixedValue() -
2279 DL.getTypeStoreSize(Ty).getFixedValue() -
Offset);
2281 V = IRB.CreateLShr(V, ShAmt,
Name +
".shift");
2285 "Cannot extract to a larger integer!");
2287 V = IRB.CreateTrunc(V, Ty,
Name +
".trunc");
2296 IntegerType *Ty = cast<IntegerType>(V->getType());
2298 "Cannot insert a larger integer!");
2301 V = IRB.CreateZExt(V, IntTy,
Name +
".ext");
2305 DL.getTypeStoreSize(IntTy).getFixedValue() &&
2306 "Element store outside of alloca store");
2308 if (
DL.isBigEndian())
2309 ShAmt = 8 * (
DL.getTypeStoreSize(IntTy).getFixedValue() -
2310 DL.getTypeStoreSize(Ty).getFixedValue() -
Offset);
2312 V = IRB.CreateShl(V, ShAmt,
Name +
".shift");
2318 Old = IRB.CreateAnd(Old, Mask,
Name +
".mask");
2320 V = IRB.CreateOr(Old, V,
Name +
".insert");
2328 auto *VecTy = cast<FixedVectorType>(V->getType());
2329 unsigned NumElements = EndIndex - BeginIndex;
2330 assert(NumElements <= VecTy->getNumElements() &&
"Too many elements!");
2332 if (NumElements == VecTy->getNumElements())
2335 if (NumElements == 1) {
2336 V = IRB.CreateExtractElement(V, IRB.getInt32(BeginIndex),
2342 auto Mask = llvm::to_vector<8>(llvm::seq<int>(BeginIndex, EndIndex));
2343 V = IRB.CreateShuffleVector(V, Mask,
Name +
".extract");
2349 unsigned BeginIndex,
const Twine &
Name) {
2351 assert(VecTy &&
"Can only insert a vector into a vector");
2353 VectorType *Ty = dyn_cast<VectorType>(V->getType());
2356 V = IRB.CreateInsertElement(Old, V, IRB.getInt32(BeginIndex),
2362 assert(cast<FixedVectorType>(Ty)->getNumElements() <=
2363 cast<FixedVectorType>(VecTy)->getNumElements() &&
2364 "Too many elements!");
2365 if (cast<FixedVectorType>(Ty)->getNumElements() ==
2366 cast<FixedVectorType>(VecTy)->getNumElements()) {
2367 assert(V->getType() == VecTy &&
"Vector type mismatch");
2370 unsigned EndIndex = BeginIndex + cast<FixedVectorType>(Ty)->getNumElements();
2377 Mask.reserve(cast<FixedVectorType>(VecTy)->getNumElements());
2378 for (
unsigned i = 0; i != cast<FixedVectorType>(VecTy)->getNumElements(); ++i)
2379 if (i >= BeginIndex && i < EndIndex)
2380 Mask.push_back(i - BeginIndex);
2383 V = IRB.CreateShuffleVector(V, Mask,
Name +
".expand");
2387 Mask2.
reserve(cast<FixedVectorType>(VecTy)->getNumElements());
2388 for (
unsigned i = 0; i != cast<FixedVectorType>(VecTy)->getNumElements(); ++i)
2389 Mask2.
push_back(IRB.getInt1(i >= BeginIndex && i < EndIndex));
2414 const uint64_t NewAllocaBeginOffset, NewAllocaEndOffset;
2443 uint64_t NewBeginOffset = 0, NewEndOffset = 0;
2446 bool IsSplittable =
false;
2447 bool IsSplit =
false;
2448 Use *OldUse =
nullptr;
2461 Value *getPtrToNewAI(
unsigned AddrSpace,
bool IsVolatile) {
2466 return IRB.CreateAddrSpaceCast(&NewAI, AccessTy);
2473 uint64_t NewAllocaEndOffset,
bool IsIntegerPromotable,
2477 :
DL(
DL), AS(AS),
Pass(
Pass), OldAI(OldAI), NewAI(NewAI),
2478 NewAllocaBeginOffset(NewAllocaBeginOffset),
2479 NewAllocaEndOffset(NewAllocaEndOffset),
2480 NewAllocaTy(NewAI.getAllocatedType()),
2483 ?
Type::getIntNTy(NewAI.getContext(),
2484 DL.getTypeSizeInBits(NewAI.getAllocatedType())
2487 VecTy(PromotableVecTy),
2488 ElementTy(VecTy ? VecTy->getElementType() : nullptr),
2489 ElementSize(VecTy ?
DL.getTypeSizeInBits(ElementTy).getFixedValue() / 8
2491 PHIUsers(PHIUsers), SelectUsers(SelectUsers),
2494 assert((
DL.getTypeSizeInBits(ElementTy).getFixedValue() % 8) == 0 &&
2495 "Only multiple-of-8 sized vector elements are viable");
2498 assert((!IntTy && !VecTy) || (IntTy && !VecTy) || (!IntTy && VecTy));
2502 bool CanSROA =
true;
2503 BeginOffset =
I->beginOffset();
2504 EndOffset =
I->endOffset();
2505 IsSplittable =
I->isSplittable();
2507 BeginOffset < NewAllocaBeginOffset || EndOffset > NewAllocaEndOffset;
2508 LLVM_DEBUG(
dbgs() <<
" rewriting " << (IsSplit ?
"split " :
""));
2513 assert(BeginOffset < NewAllocaEndOffset);
2514 assert(EndOffset > NewAllocaBeginOffset);
2515 NewBeginOffset = std::max(BeginOffset, NewAllocaBeginOffset);
2516 NewEndOffset = std::min(EndOffset, NewAllocaEndOffset);
2518 SliceSize = NewEndOffset - NewBeginOffset;
2519 LLVM_DEBUG(
dbgs() <<
" Begin:(" << BeginOffset <<
", " << EndOffset
2520 <<
") NewBegin:(" << NewBeginOffset <<
", "
2521 << NewEndOffset <<
") NewAllocaBegin:("
2522 << NewAllocaBeginOffset <<
", " << NewAllocaEndOffset
2524 assert(IsSplit || NewBeginOffset == BeginOffset);
2525 OldUse =
I->getUse();
2526 OldPtr = cast<Instruction>(OldUse->get());
2528 Instruction *OldUserI = cast<Instruction>(OldUse->getUser());
2529 IRB.SetInsertPoint(OldUserI);
2530 IRB.SetCurrentDebugLocation(OldUserI->
getDebugLoc());
2531 IRB.getInserter().SetNamePrefix(
2534 CanSROA &=
visit(cast<Instruction>(OldUse->getUser()));
2553 assert(IsSplit || BeginOffset == NewBeginOffset);
2559 size_t LastSROAPrefix = OldName.
rfind(
".sroa.");
2561 OldName = OldName.
substr(LastSROAPrefix + strlen(
".sroa."));
2566 OldName = OldName.
substr(IndexEnd + 1);
2570 OldName = OldName.
substr(OffsetEnd + 1);
2574 OldName = OldName.
substr(0, OldName.
find(
".sroa_"));
2581 Twine(OldName) +
"."
2593 Align getSliceAlign() {
2595 NewBeginOffset - NewAllocaBeginOffset);
2599 assert(VecTy &&
"Can only call getIndex when rewriting a vector");
2601 assert(RelOffset / ElementSize < UINT32_MAX &&
"Index out of bounds");
2607 void deleteIfTriviallyDead(
Value *V) {
2610 Pass.DeadInsts.push_back(
I);
2614 unsigned BeginIndex = getIndex(NewBeginOffset);
2615 unsigned EndIndex = getIndex(NewEndOffset);
2616 assert(EndIndex > BeginIndex &&
"Empty vector!");
2621 Load->copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access,
2622 LLVMContext::MD_access_group});
2623 return extractVector(IRB, Load, BeginIndex, EndIndex,
"vec");
2627 assert(IntTy &&
"We cannot insert an integer to the alloca");
2632 assert(NewBeginOffset >= NewAllocaBeginOffset &&
"Out of bounds offset");
2634 if (
Offset > 0 || NewEndOffset < NewAllocaEndOffset) {
2643 assert(cast<IntegerType>(LI.
getType())->getBitWidth() >= SliceSize * 8 &&
2644 "Can only handle an extract for an overly wide load");
2645 if (cast<IntegerType>(LI.
getType())->getBitWidth() > SliceSize * 8)
2646 V = IRB.CreateZExt(V, LI.
getType());
2661 const bool IsLoadPastEnd =
2662 DL.getTypeStoreSize(TargetTy).getFixedValue() > SliceSize;
2663 bool IsPtrAdjusted =
false;
2666 V = rewriteVectorizedLoadInst(LI);
2668 V = rewriteIntegerLoad(LI);
2669 }
else if (NewBeginOffset == NewAllocaBeginOffset &&
2670 NewEndOffset == NewAllocaEndOffset &&
2699 if (
auto *AITy = dyn_cast<IntegerType>(NewAllocaTy))
2700 if (
auto *TITy = dyn_cast<IntegerType>(TargetTy))
2701 if (AITy->getBitWidth() < TITy->getBitWidth()) {
2702 V = IRB.CreateZExt(V, TITy,
"load.ext");
2703 if (
DL.isBigEndian())
2704 V = IRB.CreateShl(V, TITy->getBitWidth() - AITy->getBitWidth(),
2710 IRB.CreateAlignedLoad(TargetTy, getNewAllocaSlicePtr(IRB, LTy),
2716 NewLI->
copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access,
2717 LLVMContext::MD_access_group});
2720 IsPtrAdjusted =
true;
2727 "Only integer type loads and stores are split");
2728 assert(SliceSize <
DL.getTypeStoreSize(LI.
getType()).getFixedValue() &&
2729 "Split load isn't smaller than original load");
2731 "Non-byte-multiple bit width");
2744 Placeholder->replaceAllUsesWith(&LI);
2745 Placeholder->deleteValue();
2750 Pass.DeadInsts.push_back(&LI);
2751 deleteIfTriviallyDead(OldOp);
2761 if (
V->getType() != VecTy) {
2762 unsigned BeginIndex = getIndex(NewBeginOffset);
2763 unsigned EndIndex = getIndex(NewEndOffset);
2764 assert(EndIndex > BeginIndex &&
"Empty vector!");
2765 unsigned NumElements = EndIndex - BeginIndex;
2766 assert(NumElements <= cast<FixedVectorType>(VecTy)->getNumElements() &&
2767 "Too many elements!");
2768 Type *SliceTy = (NumElements == 1)
2771 if (
V->getType() != SliceTy)
2780 Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
2781 LLVMContext::MD_access_group});
2783 Store->setAAMetadata(AATags.
shift(NewBeginOffset - BeginOffset));
2784 Pass.DeadInsts.push_back(&SI);
2787 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &SI,
2788 Store,
Store->getPointerOperand(), OrigV,
DL);
2794 assert(IntTy &&
"We cannot extract an integer from the alloca");
2796 if (
DL.getTypeSizeInBits(
V->getType()).getFixedValue() !=
2801 assert(BeginOffset >= NewAllocaBeginOffset &&
"Out of bounds offset");
2807 Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
2808 LLVMContext::MD_access_group});
2810 Store->setAAMetadata(AATags.
shift(NewBeginOffset - BeginOffset));
2812 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &SI,
2813 Store,
Store->getPointerOperand(),
2814 Store->getValueOperand(),
DL);
2816 Pass.DeadInsts.push_back(&SI);
2823 Value *OldOp =
SI.getOperand(1);
2831 if (
V->getType()->isPointerTy())
2832 if (
AllocaInst *AI = dyn_cast<AllocaInst>(
V->stripInBoundsOffsets()))
2833 Pass.PostPromotionWorklist.insert(AI);
2835 if (SliceSize <
DL.getTypeStoreSize(
V->getType()).getFixedValue()) {
2837 assert(
V->getType()->isIntegerTy() &&
2838 "Only integer type loads and stores are split");
2839 assert(
DL.typeSizeEqualsStoreSize(
V->getType()) &&
2840 "Non-byte-multiple bit width");
2847 return rewriteVectorizedStoreInst(V, SI, OldOp, AATags);
2848 if (IntTy &&
V->getType()->isIntegerTy())
2849 return rewriteIntegerStore(V, SI, AATags);
2851 const bool IsStorePastEnd =
2852 DL.getTypeStoreSize(
V->getType()).getFixedValue() > SliceSize;
2854 if (NewBeginOffset == NewAllocaBeginOffset &&
2855 NewEndOffset == NewAllocaEndOffset &&
2858 V->getType()->isIntegerTy()))) {
2862 if (
auto *VITy = dyn_cast<IntegerType>(
V->getType()))
2863 if (
auto *AITy = dyn_cast<IntegerType>(NewAllocaTy))
2864 if (VITy->getBitWidth() > AITy->getBitWidth()) {
2865 if (
DL.isBigEndian())
2866 V = IRB.CreateLShr(V, VITy->getBitWidth() - AITy->getBitWidth(),
2868 V = IRB.CreateTrunc(V, AITy,
"load.trunc");
2873 getPtrToNewAI(
SI.getPointerAddressSpace(),
SI.isVolatile());
2876 IRB.CreateAlignedStore(V, NewPtr, NewAI.
getAlign(),
SI.isVolatile());
2878 unsigned AS =
SI.getPointerAddressSpace();
2879 Value *NewPtr = getNewAllocaSlicePtr(IRB,
V->getType()->getPointerTo(AS));
2881 IRB.CreateAlignedStore(V, NewPtr, getSliceAlign(),
SI.isVolatile());
2883 NewSI->
copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
2884 LLVMContext::MD_access_group});
2887 if (
SI.isVolatile())
2892 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &SI,
2896 Pass.DeadInsts.push_back(&SI);
2897 deleteIfTriviallyDead(OldOp);
2915 assert(
Size > 0 &&
"Expected a positive number of bytes.");
2923 IRB.CreateZExt(V, SplatIntTy,
"zext"),
2933 V = IRB.CreateVectorSplat(NumElements, V,
"vsplat");
2946 if (!isa<ConstantInt>(II.
getLength())) {
2948 assert(NewBeginOffset == BeginOffset);
2955 "AT: Unexpected link to non-const GEP");
2956 deleteIfTriviallyDead(OldPtr);
2961 Pass.DeadInsts.push_back(&II);
2966 const bool CanContinue = [&]() {
2969 if (BeginOffset > NewAllocaBeginOffset ||
2970 EndOffset < NewAllocaEndOffset)
2975 if (Len > std::numeric_limits<unsigned>::max())
2980 DL.isLegalInteger(
DL.getTypeSizeInBits(ScalarTy).getFixedValue());
2992 New->setAAMetadata(AATags.
shift(NewBeginOffset - BeginOffset));
2994 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &II,
2995 New,
New->getRawDest(),
nullptr,
DL);
3010 assert(ElementTy == ScalarTy);
3012 unsigned BeginIndex = getIndex(NewBeginOffset);
3013 unsigned EndIndex = getIndex(NewEndOffset);
3014 assert(EndIndex > BeginIndex &&
"Empty vector!");
3015 unsigned NumElements = EndIndex - BeginIndex;
3016 assert(NumElements <= cast<FixedVectorType>(VecTy)->getNumElements() &&
3017 "Too many elements!");
3019 Value *Splat = getIntegerSplat(
3020 II.
getValue(),
DL.getTypeSizeInBits(ElementTy).getFixedValue() / 8);
3022 if (NumElements > 1)
3036 if (IntTy && (BeginOffset != NewAllocaBeginOffset ||
3037 EndOffset != NewAllocaBeginOffset)) {
3044 assert(
V->getType() == IntTy &&
3045 "Wrong type for an alloca wide integer!");
3050 assert(NewBeginOffset == NewAllocaBeginOffset);
3051 assert(NewEndOffset == NewAllocaEndOffset);
3054 DL.getTypeSizeInBits(ScalarTy).getFixedValue() / 8);
3055 if (
VectorType *AllocaVecTy = dyn_cast<VectorType>(AllocaTy))
3057 V, cast<FixedVectorType>(AllocaVecTy)->getNumElements());
3065 New->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
3066 LLVMContext::MD_access_group});
3068 New->setAAMetadata(AATags.
shift(NewBeginOffset - BeginOffset));
3070 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &II,
3071 New,
New->getPointerOperand(), V,
DL);
3089 Align SliceAlign = getSliceAlign();
3097 if (!IsSplittable) {
3098 Value *AdjustedPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
3102 if (
any_of(DAI->location_ops(),
3103 [&](
Value *V) { return V == II.getDest(); }) ||
3104 DAI->getAddress() == II.
getDest())
3105 DAI->replaceVariableLocationOp(II.
getDest(), AdjustedPtr);
3115 deleteIfTriviallyDead(OldPtr);
3128 (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset ||
3136 if (EmitMemCpy && &OldAI == &NewAI) {
3138 assert(NewBeginOffset == BeginOffset);
3141 if (NewEndOffset != EndOffset)
3143 NewEndOffset - NewBeginOffset));
3147 Pass.DeadInsts.push_back(&II);
3154 assert(AI != &OldAI && AI != &NewAI &&
3155 "Splittable transfers cannot reach the same alloca on both ends.");
3156 Pass.Worklist.insert(AI);
3163 unsigned OffsetWidth =
DL.getIndexSizeInBits(OtherAS);
3164 APInt OtherOffset(OffsetWidth, NewBeginOffset - BeginOffset);
3168 commonAlignment(OtherAlign, OtherOffset.zextOrTrunc(64).getZExtValue());
3176 Value *OurPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
3180 Value *DestPtr, *SrcPtr;
3185 DestAlign = SliceAlign;
3187 SrcAlign = OtherAlign;
3190 DestAlign = OtherAlign;
3192 SrcAlign = SliceAlign;
3194 CallInst *
New = IRB.CreateMemCpy(DestPtr, DestAlign, SrcPtr, SrcAlign,
3197 New->setAAMetadata(AATags.
shift(NewBeginOffset - BeginOffset));
3201 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8,
3202 &II, New, DestPtr,
nullptr,
DL);
3206 migrateDebugInfo(
Base, IsSplit,
Offset.getZExtValue() * 8,
3207 SliceSize * 8, &II, New, DestPtr,
nullptr,
DL);
3213 bool IsWholeAlloca = NewBeginOffset == NewAllocaBeginOffset &&
3214 NewEndOffset == NewAllocaEndOffset;
3216 unsigned BeginIndex = VecTy ? getIndex(NewBeginOffset) : 0;
3217 unsigned EndIndex = VecTy ? getIndex(NewEndOffset) : 0;
3218 unsigned NumElements = EndIndex - BeginIndex;
3225 if (VecTy && !IsWholeAlloca) {
3226 if (NumElements == 1)
3230 }
else if (IntTy && !IsWholeAlloca) {
3233 OtherTy = NewAllocaTy;
3256 if (VecTy && !IsWholeAlloca && !IsDest) {
3260 }
else if (IntTy && !IsWholeAlloca && !IsDest) {
3267 LoadInst *
Load = IRB.CreateAlignedLoad(OtherTy, SrcPtr, SrcAlign,
3269 Load->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
3270 LLVMContext::MD_access_group});
3272 Load->setAAMetadata(AATags.
shift(NewBeginOffset - BeginOffset));
3276 if (VecTy && !IsWholeAlloca && IsDest) {
3280 }
else if (IntTy && !IsWholeAlloca && IsDest) {
3290 IRB.CreateAlignedStore(Src, DstPtr, DstAlign, II.
isVolatile()));
3291 Store->copyMetadata(II, {LLVMContext::MD_mem_parallel_loop_access,
3292 LLVMContext::MD_access_group});
3294 Store->setAAMetadata(AATags.
shift(NewBeginOffset - BeginOffset));
3299 migrateDebugInfo(&OldAI, IsSplit, NewBeginOffset * 8, SliceSize * 8, &II,
3300 Store, DstPtr, Src,
DL);
3304 migrateDebugInfo(
Base, IsSplit,
Offset.getZExtValue() * 8, SliceSize * 8,
3305 &II, Store, DstPtr, Src,
DL);
3314 "Unexpected intrinsic!");
3318 Pass.DeadInsts.push_back(&II);
3335 if (NewBeginOffset != NewAllocaBeginOffset ||
3336 NewEndOffset != NewAllocaEndOffset)
3341 NewEndOffset - NewBeginOffset);
3365 Uses.push_back(&Root);
3369 if (
LoadInst *LI = dyn_cast<LoadInst>(
I)) {
3373 if (
StoreInst *SI = dyn_cast<StoreInst>(
I)) {
3374 SI->setAlignment(std::min(
SI->getAlign(), getSliceAlign()));
3378 assert(isa<BitCastInst>(
I) || isa<AddrSpaceCastInst>(
I) ||
3379 isa<PHINode>(
I) || isa<SelectInst>(
I) ||
3380 isa<GetElementPtrInst>(
I));
3381 for (
User *U :
I->users())
3382 if (Visited.
insert(cast<Instruction>(U)).second)
3383 Uses.push_back(cast<Instruction>(U));
3384 }
while (!
Uses.empty());
3387 bool visitPHINode(
PHINode &PN) {
3389 assert(BeginOffset >= NewAllocaBeginOffset &&
"PHIs are unsplittable");
3390 assert(EndOffset <= NewAllocaEndOffset &&
"PHIs are unsplittable");
3397 if (isa<PHINode>(OldPtr))
3400 IRB.SetInsertPoint(OldPtr);
3401 IRB.SetCurrentDebugLocation(OldPtr->
getDebugLoc());
3403 Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
3405 std::replace(PN.
op_begin(), PN.
op_end(), cast<Value>(OldPtr), NewPtr);
3408 deleteIfTriviallyDead(OldPtr);
3411 fixLoadStoreAlign(PN);
3422 assert((
SI.getTrueValue() == OldPtr ||
SI.getFalseValue() == OldPtr) &&
3423 "Pointer isn't an operand!");
3424 assert(BeginOffset >= NewAllocaBeginOffset &&
"Selects are unsplittable");
3425 assert(EndOffset <= NewAllocaEndOffset &&
"Selects are unsplittable");
3427 Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
3429 if (
SI.getOperand(1) == OldPtr)
3430 SI.setOperand(1, NewPtr);
3431 if (
SI.getOperand(2) == OldPtr)
3432 SI.setOperand(2, NewPtr);
3435 deleteIfTriviallyDead(OldPtr);
3438 fixLoadStoreAlign(SI);
3455class AggLoadStoreRewriter :
public InstVisitor<AggLoadStoreRewriter, bool> {
3475 AggLoadStoreRewriter(
const DataLayout &
DL, IRBuilderTy &IRB)
3476 :
DL(
DL), IRB(IRB) {}
3483 bool Changed =
false;
3484 while (!
Queue.empty()) {
3485 U =
Queue.pop_back_val();
3486 Changed |= visit(cast<Instruction>(
U->getUser()));
3495 for (
Use &U :
I.uses())
3496 if (Visited.
insert(
U.getUser()).second)
3497 Queue.push_back(&U);
3501 bool visitInstruction(
Instruction &
I) {
return false; }
3504 template <
typename Derived>
class OpSplitter {
3536 BaseAlign(BaseAlign),
DL(
DL) {
3537 IRB.SetInsertPoint(InsertionPoint);
3557 return static_cast<Derived *
>(
this)->emitFunc(
3561 if (
ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
3562 unsigned OldSize = Indices.
size();
3564 for (
unsigned Idx = 0,
Size = ATy->getNumElements();
Idx !=
Size;
3566 assert(Indices.
size() == OldSize &&
"Did not return to the old size");
3569 emitSplitOps(ATy->getElementType(), Agg,
Name +
"." +
Twine(
Idx));
3576 if (
StructType *STy = dyn_cast<StructType>(Ty)) {
3577 unsigned OldSize = Indices.
size();
3579 for (
unsigned Idx = 0,
Size = STy->getNumElements();
Idx !=
Size;
3581 assert(Indices.
size() == OldSize &&
"Did not return to the old size");
3595 struct LoadOpSplitter :
public OpSplitter<LoadOpSplitter> {
3601 : OpSplitter<LoadOpSplitter>(InsertionPoint,
Ptr,
BaseTy, BaseAlign,
DL,
3611 IRB.CreateInBoundsGEP(
BaseTy,
Ptr, GEPIndices,
Name +
".gep");
3613 IRB.CreateAlignedLoad(Ty,
GEP, Alignment,
Name +
".load");
3616 DL.getIndexSizeInBits(
Ptr->getType()->getPointerAddressSpace()), 0);
3621 Agg = IRB.CreateInsertValue(Agg, Load, Indices,
Name +
".insert");
3643 struct StoreOpSplitter :
public OpSplitter<StoreOpSplitter> {
3647 : OpSplitter<StoreOpSplitter>(InsertionPoint,
Ptr,
BaseTy, BaseAlign,
3649 AATags(AATags), AggStore(AggStore) {}
3660 Value *ExtractValue =
3661 IRB.CreateExtractValue(Agg, Indices,
Name +
".extract");
3662 Value *InBoundsGEP =
3663 IRB.CreateInBoundsGEP(
BaseTy,
Ptr, GEPIndices,
Name +
".gep");
3665 IRB.CreateAlignedStore(ExtractValue, InBoundsGEP, Alignment);
3668 DL.getIndexSizeInBits(
Ptr->getType()->getPointerAddressSpace()), 0);
3678 if (
auto *OldAI = dyn_cast<AllocaInst>(
Base)) {
3680 DL.getTypeSizeInBits(
Store->getValueOperand()->getType());
3681 migrateDebugInfo(OldAI,
true,
Offset.getZExtValue() * 8,
3682 SizeInBits, AggStore, Store,
3683 Store->getPointerOperand(),
Store->getValueOperand(),
3687 "AT: unexpected debug.assign linked to store through "
3695 if (!
SI.isSimple() ||
SI.getPointerOperand() != *U)
3698 if (
V->getType()->isSingleValueType())
3703 StoreOpSplitter Splitter(&SI, *U,
V->getType(),
SI.getAAMetadata(), &SI,
3705 Splitter.emitSplitOps(
V->getType(), V,
V->getName() +
".fca");
3710 SI.eraseFromParent();
3732 <<
"\n original: " << *Sel
3735 IRB.SetInsertPoint(&GEPI);
3746 Value *NFalse = IRB.CreateGEP(Ty, False,
Index,
3747 False->
getName() +
".sroa.gep", IsInBounds);
3750 Sel->
getName() +
".sroa.sel");
3751 Visited.
erase(&GEPI);
3756 enqueueUsers(*NSelI);
3760 <<
"\n " << *NSel <<
'\n');
3773 { Instruction *I = dyn_cast<Instruction>(In);
3774 return !I || isa<GetElementPtrInst>(I) || isa<PHINode>(I) ||
3775 succ_empty(I->getParent()) ||
3776 !I->getParent()->isLegalToHoistInto();
3781 <<
"\n original: " << *
PHI
3789 PHI->getName() +
".sroa.phi");
3790 for (
unsigned I = 0,
E =
PHI->getNumIncomingValues();
I !=
E; ++
I) {
3792 Value *NewVal =
nullptr;
3799 IRB.SetInsertPoint(
In->getParent(), std::next(
In->getIterator()));
3801 NewVal = IRB.CreateGEP(Ty, In,
Index,
In->getName() +
".sroa.gep",
3807 Visited.
erase(&GEPI);
3811 enqueueUsers(*NewPN);
3814 dbgs() <<
"\n " << *In;
3815 dbgs() <<
"\n " << *NewPN <<
'\n');
3822 foldGEPSelect(GEPI))
3833 bool visitPHINode(
PHINode &PN) {
3855 uint64_t AllocSize =
DL.getTypeAllocSize(Ty).getFixedValue();
3859 if (
ArrayType *ArrTy = dyn_cast<ArrayType>(Ty)) {
3860 InnerTy = ArrTy->getElementType();
3861 }
else if (
StructType *STy = dyn_cast<StructType>(Ty)) {
3864 InnerTy = STy->getElementType(
Index);
3869 if (AllocSize >
DL.getTypeAllocSize(InnerTy).getFixedValue() ||
3870 TypeSize >
DL.getTypeSizeInBits(InnerTy).getFixedValue())
3891 if (
Offset == 0 &&
DL.getTypeAllocSize(Ty).getFixedValue() ==
Size)
3893 if (
Offset >
DL.getTypeAllocSize(Ty).getFixedValue() ||
3894 (
DL.getTypeAllocSize(Ty).getFixedValue() -
Offset) <
Size)
3897 if (isa<ArrayType>(Ty) || isa<VectorType>(Ty)) {
3900 if (
auto *AT = dyn_cast<ArrayType>(Ty)) {
3901 ElementTy = AT->getElementType();
3902 TyNumElements = AT->getNumElements();
3906 auto *VT = cast<FixedVectorType>(Ty);
3907 ElementTy = VT->getElementType();
3908 TyNumElements = VT->getNumElements();
3910 uint64_t ElementSize =
DL.getTypeAllocSize(ElementTy).getFixedValue();
3912 if (NumSkippedElements >= TyNumElements)
3914 Offset -= NumSkippedElements * ElementSize;
3926 if (
Size == ElementSize)
3930 if (NumElements * ElementSize !=
Size)
3950 uint64_t ElementSize =
DL.getTypeAllocSize(ElementTy).getFixedValue();
3951 if (
Offset >= ElementSize)
3962 if (
Size == ElementSize)
3969 if (
Index == EndIndex)
4032 struct SplitOffsets {
4034 std::vector<uint64_t> Splits;
4051 LLVM_DEBUG(
dbgs() <<
" Searching for candidate loads and stores\n");
4053 for (Slice &S :
P) {
4054 Instruction *
I = cast<Instruction>(S.getUse()->getUser());
4055 if (!S.isSplittable() || S.endOffset() <=
P.endOffset()) {
4059 if (
auto *LI = dyn_cast<LoadInst>(
I))
4060 UnsplittableLoads.
insert(LI);
4061 else if (
auto *SI = dyn_cast<StoreInst>(
I))
4062 if (
auto *LI = dyn_cast<LoadInst>(
SI->getValueOperand()))
4063 UnsplittableLoads.
insert(LI);
4066 assert(
P.endOffset() > S.beginOffset() &&
4067 "Empty or backwards partition!");
4070 if (
auto *LI = dyn_cast<LoadInst>(
I)) {
4076 auto IsLoadSimplyStored = [](
LoadInst *LI) {
4078 auto *
SI = dyn_cast<StoreInst>(LU);
4079 if (!SI || !
SI->isSimple())
4084 if (!IsLoadSimplyStored(LI)) {
4085 UnsplittableLoads.
insert(LI);
4090 }
else if (
auto *SI = dyn_cast<StoreInst>(
I)) {
4091 if (S.getUse() != &
SI->getOperandUse(
SI->getPointerOperandIndex()))
4094 auto *StoredLoad = dyn_cast<LoadInst>(
SI->getValueOperand());
4095 if (!StoredLoad || !StoredLoad->isSimple())
4097 assert(!
SI->isVolatile() &&
"Cannot split volatile stores!");
4107 auto &
Offsets = SplitOffsetsMap[
I];
4109 "Should not have splits the first time we see an instruction!");
4111 Offsets.Splits.push_back(
P.endOffset() - S.beginOffset());
4116 for (Slice *S :
P.splitSliceTails()) {
4117 auto SplitOffsetsMapI =
4118 SplitOffsetsMap.
find(cast<Instruction>(S->getUse()->getUser()));
4119 if (SplitOffsetsMapI == SplitOffsetsMap.
end())
4121 auto &
Offsets = SplitOffsetsMapI->second;
4125 "Cannot have an empty set of splits on the second partition!");
4127 P.beginOffset() -
Offsets.S->beginOffset() &&
4128 "Previous split does not end where this one begins!");
4132 if (S->endOffset() >
P.endOffset())
4144 auto *LI = cast<LoadInst>(
SI->getValueOperand());
4147 if (UnsplittableLoads.
count(LI))
4150 auto LoadOffsetsI = SplitOffsetsMap.
find(LI);
4151 if (LoadOffsetsI == SplitOffsetsMap.
end())
4153 auto &LoadOffsets = LoadOffsetsI->second;
4156 auto &StoreOffsets = SplitOffsetsMap[
SI];
4161 if (LoadOffsets.Splits == StoreOffsets.Splits)
4165 <<
" " << *LI <<
"\n"
4166 <<
" " << *SI <<
"\n");
4172 UnsplittableLoads.
insert(LI);
4180 auto *LI = cast<LoadInst>(
SI->getValueOperand());
4181 return UnsplittableLoads.
count(LI);
4186 return UnsplittableLoads.
count(LI);
4196 IRBuilderTy IRB(&AI);
4214 std::vector<LoadInst *> SplitLoads;
4219 auto &
Offsets = SplitOffsetsMap[LI];
4220 unsigned SliceSize =
Offsets.S->endOffset() -
Offsets.S->beginOffset();
4222 "Load must have type size equal to store size");
4224 "Load must be >= slice size");
4227 assert(BaseOffset + SliceSize > BaseOffset &&
4228 "Cannot represent alloca access size using 64-bit integers!");
4231 IRB.SetInsertPoint(LI);
4240 auto *PartPtrTy = PartTy->getPointerTo(AS);
4241 LoadInst *PLoad = IRB.CreateAlignedLoad(
4244 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
4245 PartPtrTy,
BasePtr->getName() +
"."),
4248 PLoad->
copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
4249 LLVMContext::MD_access_group});
4253 SplitLoads.push_back(PLoad);
4257 Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
4261 <<
", " << NewSlices.
back().endOffset()
4262 <<
"): " << *PLoad <<
"\n");
4277 bool DeferredStores =
false;
4280 if (!Stores.
empty() && SplitOffsetsMap.
count(SI)) {
4281 DeferredStores =
true;
4287 Value *StoreBasePtr =
SI->getPointerOperand();
4288 IRB.SetInsertPoint(SI);
4290 LLVM_DEBUG(
dbgs() <<
" Splitting store of load: " << *SI <<
"\n");
4298 auto AS =
SI->getPointerAddressSpace();
4299 StoreInst *PStore = IRB.CreateAlignedStore(
4302 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
4303 PartPtrTy, StoreBasePtr->
getName() +
"."),
4306 PStore->
copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access,
4307 LLVMContext::MD_access_group,
4308 LLVMContext::MD_DIAssignID});
4309 LLVM_DEBUG(
dbgs() <<
" +" << PartOffset <<
":" << *PStore <<
"\n");
4316 if (
AllocaInst *OtherAI = dyn_cast<AllocaInst>(StoreBasePtr)) {
4317 ResplitPromotableAllocas.
insert(OtherAI);
4318 Worklist.insert(OtherAI);
4319 }
else if (
AllocaInst *OtherAI = dyn_cast<AllocaInst>(
4321 Worklist.insert(OtherAI);
4325 DeadInsts.push_back(SI);
4330 SplitLoadsMap.
insert(std::make_pair(LI, std::move(SplitLoads)));
4333 DeadInsts.push_back(LI);
4343 auto *LI = cast<LoadInst>(
SI->getValueOperand());
4347 assert(StoreSize > 0 &&
"Cannot have a zero-sized integer store!");
4351 "Slice size should always match load size exactly!");
4353 assert(BaseOffset + StoreSize > BaseOffset &&
4354 "Cannot represent alloca access size using 64-bit integers!");
4357 Instruction *StoreBasePtr = cast<Instruction>(
SI->getPointerOperand());
4362 auto SplitLoadsMapI = SplitLoadsMap.
find(LI);
4363 std::vector<LoadInst *> *SplitLoads =
nullptr;
4364 if (SplitLoadsMapI != SplitLoadsMap.
end()) {
4365 SplitLoads = &SplitLoadsMapI->second;
4367 "Too few split loads for the number of splits in the store!");
4377 auto *StorePartPtrTy = PartTy->getPointerTo(
SI->getPointerAddressSpace());
4382 PLoad = (*SplitLoads)[
Idx];
4384 IRB.SetInsertPoint(LI);
4386 PLoad = IRB.CreateAlignedLoad(
4389 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
4390 LoadPartPtrTy, LoadBasePtr->
getName() +
"."),
4393 PLoad->
copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
4394 LLVMContext::MD_access_group});
4398 IRB.SetInsertPoint(SI);
4399 auto AS =
SI->getPointerAddressSpace();
4400 StoreInst *PStore = IRB.CreateAlignedStore(
4403 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
4404 StorePartPtrTy, StoreBasePtr->
getName() +
"."),
4407 PStore->
copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access,
4408 LLVMContext::MD_access_group});
4412 Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
4416 <<
", " << NewSlices.
back().endOffset()
4417 <<
"): " << *PStore <<
"\n");
4438 if (
AllocaInst *OtherAI = dyn_cast<AllocaInst>(LoadBasePtr)) {
4439 assert(OtherAI != &AI &&
"We can't re-split our own alloca!");
4440 ResplitPromotableAllocas.
insert(OtherAI);
4441 Worklist.insert(OtherAI);
4442 }
else if (
AllocaInst *OtherAI = dyn_cast<AllocaInst>(
4444 assert(OtherAI != &AI &&
"We can't re-split our own alloca!");
4445 Worklist.insert(OtherAI);
4460 DeadInsts.push_back(LI);
4462 DeadInsts.push_back(SI);
4482 return ResplitPromotableAllocas.
count(AI);
4503 Type *SliceTy =
nullptr;
4506 std::pair<Type *, IntegerType *> CommonUseTy =
4509 if (CommonUseTy.first)
4510 if (
DL.getTypeAllocSize(CommonUseTy.first).getFixedValue() >=
P.size()) {
4511 SliceTy = CommonUseTy.first;
4512 SliceVecTy = dyn_cast<VectorType>(SliceTy);
4517 P.beginOffset(),
P.size()))
4518 SliceTy = TypePartitionTy;
4521 if (!SliceTy && CommonUseTy.second)
4522 if (
DL.getTypeAllocSize(CommonUseTy.second).getFixedValue() >=
P.size()) {
4523 SliceTy = CommonUseTy.second;
4524 SliceVecTy = dyn_cast<VectorType>(SliceTy);
4526 if ((!SliceTy || (SliceTy->
isArrayTy() &&
4528 DL.isLegalInteger(
P.size() * 8)) {
4536 P.beginOffset(),
P.size())) {
4537 VectorType *TypePartitionVecTy = dyn_cast<VectorType>(TypePartitionTy);
4538 if (TypePartitionVecTy &&
4540 SliceTy = TypePartitionTy;
4545 assert(
DL.getTypeAllocSize(SliceTy).getFixedValue() >=
P.size());
4571 const bool IsUnconstrained = Alignment <=
DL.getABITypeAlign(SliceTy);
4574 IsUnconstrained ?
DL.getPrefTypeAlign(SliceTy) : Alignment,
4582 <<
"[" <<
P.beginOffset() <<
"," <<
P.endOffset()
4583 <<
") to: " << *NewAI <<
"\n");
4588 unsigned PPWOldSize = PostPromotionWorklist.size();
4589 unsigned NumUses = 0;
4594 P.endOffset(), IsIntegerPromotable, VecTy,
4595 PHIUsers, SelectUsers);
4596 bool Promotable =
true;
4597 for (Slice *S :
P.splitSliceTails()) {
4601 for (Slice &S :
P) {
4606 NumAllocaPartitionUses += NumUses;
4607 MaxUsesPerAllocaPartition.updateMax(NumUses);
4615 SelectUsers.
clear();
4620 NewSelectsToRewrite;
4623 std::optional<RewriteableMemOps> Ops =
4628 SelectUsers.clear();
4629 NewSelectsToRewrite.
clear();
4632 NewSelectsToRewrite.
emplace_back(std::make_pair(Sel, *Ops));
4637 auto *OldInst = dyn_cast<Instruction>(
U->get());
4641 DeadInsts.push_back(OldInst);
4643 if (PHIUsers.empty() && SelectUsers.empty()) {
4645 PromotableAllocas.push_back(NewAI);
4650 for (
PHINode *PHIUser : PHIUsers)
4651 SpeculatablePHIs.insert(PHIUser);
4652 SelectsToRewrite.reserve(SelectsToRewrite.size() +
4653 NewSelectsToRewrite.
size());
4655 std::make_move_iterator(NewSelectsToRewrite.
begin()),
4656 std::make_move_iterator(NewSelectsToRewrite.
end())))
4657 SelectsToRewrite.insert(std::move(KV));
4658 Worklist.insert(NewAI);
4662 while (PostPromotionWorklist.size() > PPWOldSize)
4663 PostPromotionWorklist.pop_back();
4673 Worklist.insert(NewAI);
4685 unsigned NumPartitions = 0;
4686 bool Changed =
false;
4690 Changed |= presplitLoadsAndStores(AI, AS);
4698 bool IsSorted =
true;
4702 const uint64_t MaxBitVectorSize = 1024;
4703 if (AllocaSize <= MaxBitVectorSize) {
4708 for (
unsigned O = S.beginOffset() + 1;
4709 O < S.endOffset() && O < AllocaSize; O++)
4710 SplittableOffset.reset(O);
4712 for (Slice &S : AS) {
4713 if (!S.isSplittable())
4716 if ((S.beginOffset() > AllocaSize || SplittableOffset[S.beginOffset()]) &&
4717 (S.endOffset() > AllocaSize || SplittableOffset[S.endOffset()]))
4720 if (isa<LoadInst>(S.getUse()->getUser()) ||
4721 isa<StoreInst>(S.getUse()->getUser())) {
4722 S.makeUnsplittable();
4730 for (Slice &S : AS) {
4731 if (!S.isSplittable())
4734 if (S.beginOffset() == 0 && S.endOffset() >= AllocaSize)
4737 if (isa<LoadInst>(S.getUse()->getUser()) ||
4738 isa<StoreInst>(S.getUse()->getUser())) {
4739 S.makeUnsplittable();
4760 for (
auto &
P : AS.partitions()) {
4761 if (
AllocaInst *NewAI = rewritePartition(AI, AS,
P)) {
4768 uint64_t Size = std::min(AllocaSize,
P.size() * SizeOfByte);
4769 Fragments.
push_back(Fragment(NewAI,
P.beginOffset() * SizeOfByte,
Size));
4775 NumAllocaPartitions += NumPartitions;
4776 MaxPartitionsPerAlloca.updateMax(NumPartitions);