47#include "llvm/Config/llvm-config.h"
103#define DEBUG_TYPE "sroa"
105STATISTIC(NumAllocasAnalyzed,
"Number of allocas analyzed for replacement");
106STATISTIC(NumAllocaPartitions,
"Number of alloca partitions formed");
107STATISTIC(MaxPartitionsPerAlloca,
"Maximum number of partitions per alloca");
108STATISTIC(NumAllocaPartitionUses,
"Number of alloca partition uses rewritten");
109STATISTIC(MaxUsesPerAllocaPartition,
"Maximum number of uses of a partition");
110STATISTIC(NumNewAllocas,
"Number of new, smaller allocas introduced");
111STATISTIC(NumPromoted,
"Number of allocas promoted to SSA values");
112STATISTIC(NumLoadsSpeculated,
"Number of loads speculated to allow promotion");
114 "Number of loads rewritten into predicated loads to allow promotion");
117 "Number of stores rewritten into predicated loads to allow promotion");
119STATISTIC(NumVectorized,
"Number of vectorized aggregates");
126class AllocaSliceRewriter;
130class SelectHandSpeculativity {
131 unsigned char Storage = 0;
135 SelectHandSpeculativity() =
default;
136 SelectHandSpeculativity &setAsSpeculatable(
bool isTrueVal);
137 bool isSpeculatable(
bool isTrueVal)
const;
138 bool areAllSpeculatable()
const;
139 bool areAnySpeculatable()
const;
140 bool areNoneSpeculatable()
const;
142 explicit operator intptr_t()
const {
return static_cast<intptr_t
>(Storage); }
143 explicit SelectHandSpeculativity(intptr_t Storage_) : Storage(Storage_) {}
145static_assert(
sizeof(SelectHandSpeculativity) ==
sizeof(
unsigned char));
147using PossiblySpeculatableLoad =
150using RewriteableMemOp =
151 std::variant<PossiblySpeculatableLoad, UnspeculatableStore>;
173 LLVMContext *
const C;
174 DomTreeUpdater *
const DTU;
175 AssumptionCache *
const AC;
176 const bool PreserveCFG;
185 SmallSetVector<AllocaInst *, 16> Worklist;
200 SmallSetVector<AllocaInst *, 16> PostPromotionWorklist;
203 SetVector<AllocaInst *, SmallVector<AllocaInst *>,
204 SmallPtrSet<AllocaInst *, 16>, 16>
212 SmallSetVector<PHINode *, 8> SpeculatablePHIs;
216 SmallMapVector<SelectInst *, RewriteableMemOps, 8> SelectsToRewrite;
234 static std::optional<RewriteableMemOps>
235 isSafeSelectToSpeculate(SelectInst &SI,
bool PreserveCFG);
238 SROA(LLVMContext *C, DomTreeUpdater *DTU, AssumptionCache *AC,
240 : C(C), DTU(DTU), AC(AC),
241 PreserveCFG(PreserveCFG_ ==
SROAOptions::PreserveCFG) {}
244 std::pair<
bool ,
bool > runSROA(Function &
F);
247 friend class AllocaSliceRewriter;
249 bool presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS);
250 AllocaInst *rewritePartition(AllocaInst &AI, AllocaSlices &AS, Partition &
P);
251 bool splitAlloca(AllocaInst &AI, AllocaSlices &AS);
252 bool propagateStoredValuesToLoads(AllocaInst &AI, AllocaSlices &AS);
253 std::pair<
bool ,
bool > runOnAlloca(AllocaInst &AI);
254 void clobberUse(Use &U);
255 bool deleteDeadInstructions(SmallPtrSetImpl<AllocaInst *> &DeletedAllocas);
256 bool promoteAllocas();
270enum FragCalcResult { UseFrag, UseNoFrag,
Skip };
274 uint64_t NewStorageSliceOffsetInBits,
276 std::optional<DIExpression::FragmentInfo> StorageFragment,
277 std::optional<DIExpression::FragmentInfo> CurrentFragment,
281 if (StorageFragment) {
283 std::min(NewStorageSliceSizeInBits, StorageFragment->SizeInBits);
285 NewStorageSliceOffsetInBits + StorageFragment->OffsetInBits;
287 Target.SizeInBits = NewStorageSliceSizeInBits;
288 Target.OffsetInBits = NewStorageSliceOffsetInBits;
294 if (!CurrentFragment) {
295 if (
auto Size = Variable->getSizeInBits()) {
298 if (
Target == CurrentFragment)
305 if (!CurrentFragment || *CurrentFragment ==
Target)
311 if (
Target.startInBits() < CurrentFragment->startInBits() ||
312 Target.endInBits() > CurrentFragment->endInBits())
345 if (DVRAssignMarkerRange.empty())
351 LLVM_DEBUG(
dbgs() <<
" OldAllocaOffsetInBits: " << OldAllocaOffsetInBits
353 LLVM_DEBUG(
dbgs() <<
" SliceSizeInBits: " << SliceSizeInBits <<
"\n");
365 DVR->getExpression()->getFragmentInfo();
378 auto *Expr = DbgAssign->getExpression();
379 bool SetKillLocation =
false;
382 std::optional<DIExpression::FragmentInfo> BaseFragment;
385 if (R == BaseFragments.
end())
387 BaseFragment = R->second;
389 std::optional<DIExpression::FragmentInfo> CurrentFragment =
390 Expr->getFragmentInfo();
393 DbgAssign->getVariable(), OldAllocaOffsetInBits, SliceSizeInBits,
394 BaseFragment, CurrentFragment, NewFragment);
398 if (Result == UseFrag && !(NewFragment == CurrentFragment)) {
399 if (CurrentFragment) {
404 NewFragment.
OffsetInBits -= CurrentFragment->OffsetInBits;
417 SetKillLocation =
true;
425 Inst->
setMetadata(LLVMContext::MD_DIAssignID, NewID);
432 DbgAssign->getDebugLoc())));
445 Value && (DbgAssign->hasArgList() ||
446 !DbgAssign->getExpression()->isSingleLocationExpression());
463 NewAssign->
moveBefore(DbgAssign->getIterator());
466 LLVM_DEBUG(
dbgs() <<
"Created new assign: " << *NewAssign <<
"\n");
469 for_each(DVRAssignMarkerRange, MigrateDbgAssign);
479 Twine getNameWithPrefix(
const Twine &Name)
const {
484 void SetNamePrefix(
const Twine &
P) { Prefix =
P.str(); }
486 void InsertHelper(Instruction *
I,
const Twine &Name,
504 uint64_t BeginOffset = 0;
507 uint64_t EndOffset = 0;
511 PointerIntPair<Use *, 1, bool> UseAndIsSplittable;
516 Slice(uint64_t BeginOffset, uint64_t EndOffset, Use *U,
bool IsSplittable)
517 : BeginOffset(BeginOffset), EndOffset(EndOffset),
518 UseAndIsSplittable(
U, IsSplittable) {}
520 uint64_t beginOffset()
const {
return BeginOffset; }
521 uint64_t endOffset()
const {
return EndOffset; }
523 bool isSplittable()
const {
return UseAndIsSplittable.getInt(); }
524 void makeUnsplittable() { UseAndIsSplittable.setInt(
false); }
526 Use *getUse()
const {
return UseAndIsSplittable.getPointer(); }
528 bool isDead()
const {
return getUse() ==
nullptr; }
529 void kill() { UseAndIsSplittable.setPointer(
nullptr); }
538 if (beginOffset() <
RHS.beginOffset())
540 if (beginOffset() >
RHS.beginOffset())
542 if (isSplittable() !=
RHS.isSplittable())
543 return !isSplittable();
544 if (endOffset() >
RHS.endOffset())
551 uint64_t RHSOffset) {
552 return LHS.beginOffset() < RHSOffset;
556 return LHSOffset <
RHS.beginOffset();
560 return isSplittable() ==
RHS.isSplittable() &&
561 beginOffset() ==
RHS.beginOffset() && endOffset() ==
RHS.endOffset();
576 AllocaSlices(
const DataLayout &
DL, AllocaInst &AI);
582 bool isEscaped()
const {
return PointerEscapingInstr; }
583 bool isEscapedReadOnly()
const {
return PointerEscapingInstrReadOnly; }
588 using range = iterator_range<iterator>;
590 iterator
begin() {
return Slices.begin(); }
591 iterator
end() {
return Slices.end(); }
594 using const_range = iterator_range<const_iterator>;
596 const_iterator
begin()
const {
return Slices.begin(); }
597 const_iterator
end()
const {
return Slices.end(); }
601 void erase(iterator Start, iterator Stop) { Slices.erase(Start, Stop); }
609 int OldSize = Slices.size();
610 Slices.append(NewSlices.
begin(), NewSlices.
end());
611 auto SliceI = Slices.begin() + OldSize;
612 std::stable_sort(SliceI, Slices.end());
613 std::inplace_merge(Slices.begin(), SliceI, Slices.end());
626 return DeadUseIfPromotable;
637#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
638 void print(raw_ostream &OS, const_iterator
I, StringRef Indent =
" ")
const;
639 void printSlice(raw_ostream &OS, const_iterator
I,
640 StringRef Indent =
" ")
const;
641 void printUse(raw_ostream &OS, const_iterator
I,
642 StringRef Indent =
" ")
const;
643 void print(raw_ostream &OS)
const;
644 void dump(const_iterator
I)
const;
649 template <
typename DerivedT,
typename RetT =
void>
class BuilderBase;
652 friend class AllocaSlices::SliceBuilder;
654#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
682 SmallVector<Instruction *, 8> DeadUsers;
709 friend class AllocaSlices;
710 friend class AllocaSlices::partition_iterator;
712 using iterator = AllocaSlices::iterator;
716 uint64_t BeginOffset = 0, EndOffset = 0;
726 Partition(iterator SI) : SI(SI), SJ(SI) {}
732 uint64_t beginOffset()
const {
return BeginOffset; }
737 uint64_t endOffset()
const {
return EndOffset; }
742 uint64_t
size()
const {
743 assert(BeginOffset < EndOffset &&
"Partitions must span some bytes!");
744 return EndOffset - BeginOffset;
749 bool empty()
const {
return SI == SJ; }
760 iterator
begin()
const {
return SI; }
761 iterator
end()
const {
return SJ; }
793 AllocaSlices::iterator SE;
797 uint64_t MaxSplitSliceEndOffset = 0;
801 partition_iterator(AllocaSlices::iterator
SI, AllocaSlices::iterator SE)
813 assert((
P.SI != SE || !
P.SplitTails.empty()) &&
814 "Cannot advance past the end of the slices!");
817 if (!
P.SplitTails.empty()) {
818 if (
P.EndOffset >= MaxSplitSliceEndOffset) {
820 P.SplitTails.clear();
821 MaxSplitSliceEndOffset = 0;
827 [&](Slice *S) { return S->endOffset() <= P.EndOffset; });
830 return S->endOffset() == MaxSplitSliceEndOffset;
832 "Could not find the current max split slice offset!");
835 return S->endOffset() <= MaxSplitSliceEndOffset;
837 "Max split slice end offset is not actually the max!");
844 assert(P.SplitTails.empty() &&
"Failed to clear the split slices!");
854 if (S.isSplittable() && S.endOffset() > P.EndOffset) {
855 P.SplitTails.push_back(&S);
856 MaxSplitSliceEndOffset =
857 std::max(S.endOffset(), MaxSplitSliceEndOffset);
865 P.BeginOffset = P.EndOffset;
866 P.EndOffset = MaxSplitSliceEndOffset;
873 if (!P.SplitTails.empty() && P.SI->beginOffset() != P.EndOffset &&
874 !P.SI->isSplittable()) {
875 P.BeginOffset = P.EndOffset;
876 P.EndOffset = P.SI->beginOffset();
886 P.BeginOffset = P.SplitTails.empty() ? P.SI->beginOffset() : P.EndOffset;
887 P.EndOffset = P.SI->endOffset();
892 if (!P.SI->isSplittable()) {
895 assert(P.BeginOffset == P.SI->beginOffset());
899 while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
900 if (!P.SJ->isSplittable())
901 P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
913 assert(P.SI->isSplittable() &&
"Forming a splittable partition!");
916 while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset &&
917 P.SJ->isSplittable()) {
918 P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
925 if (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
926 assert(!P.SJ->isSplittable());
927 P.EndOffset = P.SJ->beginOffset();
934 "End iterators don't match between compared partition iterators!");
941 if (P.SI == RHS.P.SI && P.SplitTails.empty() == RHS.P.SplitTails.empty()) {
942 assert(P.SJ == RHS.P.SJ &&
943 "Same set of slices formed two different sized partitions!");
944 assert(P.SplitTails.size() == RHS.P.SplitTails.size() &&
945 "Same slice position with differently sized non-empty split "
968 return make_range(partition_iterator(begin(), end()),
969 partition_iterator(end(), end()));
977 return SI.getOperand(1 + CI->isZero());
978 if (
SI.getOperand(1) ==
SI.getOperand(2))
979 return SI.getOperand(1);
988 return PN->hasConstantValue();
1015 AllocSize(
DL.getTypeAllocSize(AI.getAllocatedType()).getFixedValue()),
1020 if (VisitedDeadInsts.
insert(&
I).second)
1025 bool IsSplittable =
false) {
1031 <<
" which has zero size or starts outside of the "
1032 << AllocSize <<
" byte alloca:\n"
1033 <<
" alloca: " << AS.AI <<
"\n"
1034 <<
" use: " <<
I <<
"\n");
1035 return markAsDead(
I);
1038 uint64_t BeginOffset =
Offset.getZExtValue();
1039 uint64_t EndOffset = BeginOffset +
Size;
1047 assert(AllocSize >= BeginOffset);
1048 if (
Size > AllocSize - BeginOffset) {
1050 <<
Offset <<
" to remain within the " << AllocSize
1051 <<
" byte alloca:\n"
1052 <<
" alloca: " << AS.AI <<
"\n"
1053 <<
" use: " <<
I <<
"\n");
1054 EndOffset = AllocSize;
1057 AS.Slices.push_back(Slice(BeginOffset, EndOffset, U, IsSplittable));
1060 void visitBitCastInst(BitCastInst &BC) {
1062 return markAsDead(BC);
1064 return Base::visitBitCastInst(BC);
1067 void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
1069 return markAsDead(ASC);
1071 return Base::visitAddrSpaceCastInst(ASC);
1074 void visitGetElementPtrInst(GetElementPtrInst &GEPI) {
1076 return markAsDead(GEPI);
1078 return Base::visitGetElementPtrInst(GEPI);
1081 void handleLoadOrStore(
Type *Ty, Instruction &
I,
const APInt &
Offset,
1082 uint64_t
Size,
bool IsVolatile) {
1092 void visitLoadInst(LoadInst &LI) {
1094 "All simple FCA loads should have been pre-split");
1099 return PI.setEscapedReadOnly(&LI);
1102 if (
Size.isScalable()) {
1105 return PI.setAborted(&LI);
1114 void visitStoreInst(StoreInst &SI) {
1115 Value *ValOp =
SI.getValueOperand();
1117 return PI.setEscapedAndAborted(&SI);
1119 return PI.setAborted(&SI);
1121 TypeSize StoreSize =
DL.getTypeStoreSize(ValOp->
getType());
1123 unsigned VScale =
SI.getFunction()->getVScaleValue();
1125 return PI.setAborted(&SI);
1141 <<
Offset <<
" which extends past the end of the "
1142 << AllocSize <<
" byte alloca:\n"
1143 <<
" alloca: " << AS.AI <<
"\n"
1144 <<
" use: " << SI <<
"\n");
1145 return markAsDead(SI);
1149 "All simple FCA stores should have been pre-split");
1153 void visitMemSetInst(MemSetInst &
II) {
1154 assert(
II.getRawDest() == *U &&
"Pointer use is not the destination?");
1157 (IsOffsetKnown &&
Offset.uge(AllocSize)))
1159 return markAsDead(
II);
1162 return PI.setAborted(&
II);
1166 : AllocSize -
Offset.getLimitedValue(),
1170 void visitMemTransferInst(MemTransferInst &
II) {
1174 return markAsDead(
II);
1178 if (VisitedDeadInsts.
count(&
II))
1182 return PI.setAborted(&
II);
1189 if (
Offset.uge(AllocSize)) {
1190 SmallDenseMap<Instruction *, unsigned>::iterator MTPI =
1191 MemTransferSliceMap.
find(&
II);
1192 if (MTPI != MemTransferSliceMap.
end())
1193 AS.Slices[MTPI->second].kill();
1194 return markAsDead(
II);
1197 uint64_t RawOffset =
Offset.getLimitedValue();
1198 uint64_t
Size =
Length ?
Length->getLimitedValue() : AllocSize - RawOffset;
1202 if (*U ==
II.getRawDest() && *U ==
II.getRawSource()) {
1204 if (!
II.isVolatile())
1205 return markAsDead(
II);
1213 SmallDenseMap<Instruction *, unsigned>::iterator MTPI;
1214 std::tie(MTPI, Inserted) =
1215 MemTransferSliceMap.
insert(std::make_pair(&
II, AS.Slices.size()));
1216 unsigned PrevIdx = MTPI->second;
1218 Slice &PrevP = AS.Slices[PrevIdx];
1222 if (!
II.isVolatile() && PrevP.beginOffset() == RawOffset) {
1224 return markAsDead(
II);
1229 PrevP.makeUnsplittable();
1236 assert(AS.Slices[PrevIdx].getUse()->getUser() == &
II &&
1237 "Map index doesn't point back to a slice with this user.");
1243 void visitIntrinsicInst(IntrinsicInst &
II) {
1244 if (
II.isDroppable()) {
1245 AS.DeadUseIfPromotable.push_back(U);
1250 return PI.setAborted(&
II);
1252 if (
II.isLifetimeStartOrEnd()) {
1253 insertUse(
II,
Offset, AllocSize,
true);
1257 Base::visitIntrinsicInst(
II);
1260 Instruction *hasUnsafePHIOrSelectUse(Instruction *Root, uint64_t &
Size) {
1265 SmallPtrSet<Instruction *, 4> Visited;
1275 std::tie(UsedI,
I) =
Uses.pop_back_val();
1278 TypeSize LoadSize =
DL.getTypeStoreSize(LI->
getType());
1290 TypeSize StoreSize =
DL.getTypeStoreSize(
Op->getType());
1300 if (!
GEP->hasAllZeroIndices())
1307 for (User *U :
I->users())
1310 }
while (!
Uses.empty());
1315 void visitPHINodeOrSelectInst(Instruction &
I) {
1318 return markAsDead(
I);
1324 I.getParent()->getFirstInsertionPt() ==
I.getParent()->end())
1325 return PI.setAborted(&
I);
1343 AS.DeadOperands.push_back(U);
1349 return PI.setAborted(&
I);
1352 uint64_t &
Size = PHIOrSelectSizes[&
I];
1355 if (Instruction *UnsafeI = hasUnsafePHIOrSelectUse(&
I,
Size))
1356 return PI.setAborted(UnsafeI);
1365 if (
Offset.uge(AllocSize)) {
1366 AS.DeadOperands.push_back(U);
1373 void visitPHINode(PHINode &PN) { visitPHINodeOrSelectInst(PN); }
1375 void visitSelectInst(SelectInst &SI) { visitPHINodeOrSelectInst(SI); }
1378 void visitInstruction(Instruction &
I) { PI.setAborted(&
I); }
1380 void visitCallBase(CallBase &CB) {
1386 PI.setEscapedReadOnly(&CB);
1390 Base::visitCallBase(CB);
1394AllocaSlices::AllocaSlices(
const DataLayout &
DL, AllocaInst &AI)
1396#
if !defined(
NDEBUG) || defined(LLVM_ENABLE_DUMP)
1399 PointerEscapingInstr(nullptr), PointerEscapingInstrReadOnly(nullptr) {
1401 SliceBuilder::PtrInfo PtrI =
PB.visitPtr(AI);
1402 if (PtrI.isEscaped() || PtrI.isAborted()) {
1405 PointerEscapingInstr = PtrI.getEscapingInst() ? PtrI.getEscapingInst()
1406 : PtrI.getAbortingInst();
1407 assert(PointerEscapingInstr &&
"Did not track a bad instruction");
1410 PointerEscapingInstrReadOnly = PtrI.getEscapedReadOnlyInst();
1412 llvm::erase_if(Slices, [](
const Slice &S) {
return S.isDead(); });
1419#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1421void AllocaSlices::print(raw_ostream &OS, const_iterator
I,
1422 StringRef Indent)
const {
1423 printSlice(OS,
I, Indent);
1425 printUse(OS,
I, Indent);
1428void AllocaSlices::printSlice(raw_ostream &OS, const_iterator
I,
1429 StringRef Indent)
const {
1430 OS << Indent <<
"[" <<
I->beginOffset() <<
"," <<
I->endOffset() <<
")"
1431 <<
" slice #" << (
I -
begin())
1432 << (
I->isSplittable() ?
" (splittable)" :
"");
1435void AllocaSlices::printUse(raw_ostream &OS, const_iterator
I,
1436 StringRef Indent)
const {
1437 OS << Indent <<
" used by: " << *
I->getUse()->getUser() <<
"\n";
1440void AllocaSlices::print(raw_ostream &OS)
const {
1441 if (PointerEscapingInstr) {
1442 OS <<
"Can't analyze slices for alloca: " << AI <<
"\n"
1443 <<
" A pointer to this alloca escaped by:\n"
1444 <<
" " << *PointerEscapingInstr <<
"\n";
1448 if (PointerEscapingInstrReadOnly)
1449 OS <<
"Escapes into ReadOnly: " << *PointerEscapingInstrReadOnly <<
"\n";
1451 OS <<
"Slices of alloca: " << AI <<
"\n";
1465static std::pair<Type *, IntegerType *>
1469 bool TyIsCommon =
true;
1474 for (AllocaSlices::const_iterator
I =
B;
I !=
E; ++
I) {
1475 Use *U =
I->getUse();
1478 if (
I->beginOffset() !=
B->beginOffset() ||
I->endOffset() != EndOffset)
1481 Type *UserTy =
nullptr;
1485 UserTy =
SI->getValueOperand()->getType();
1493 if (UserITy->getBitWidth() % 8 != 0 ||
1494 UserITy->getBitWidth() / 8 > (EndOffset -
B->beginOffset()))
1499 if (!ITy || ITy->
getBitWidth() < UserITy->getBitWidth())
1505 if (!UserTy || (Ty && Ty != UserTy))
1511 return {TyIsCommon ? Ty :
nullptr, ITy};
1542 Type *LoadType =
nullptr;
1555 if (LoadType != LI->
getType())
1564 if (BBI->mayWriteToMemory())
1567 MaxAlign = std::max(MaxAlign, LI->
getAlign());
1574 APInt(APWidth,
DL.getTypeStoreSize(LoadType).getFixedValue());
1611 IRB.SetInsertPoint(&PN);
1613 PN.
getName() +
".sroa.speculated");
1643 IRB.SetInsertPoint(TI);
1645 LoadInst *Load = IRB.CreateAlignedLoad(
1646 LoadTy, InVal, Alignment,
1647 (PN.
getName() +
".sroa.speculate.load." + Pred->getName()));
1648 ++NumLoadsSpeculated;
1650 Load->setAAMetadata(AATags);
1652 InjectedLoads[Pred] = Load;
1659SelectHandSpeculativity &
1660SelectHandSpeculativity::setAsSpeculatable(
bool isTrueVal) {
1668bool SelectHandSpeculativity::isSpeculatable(
bool isTrueVal)
const {
1673bool SelectHandSpeculativity::areAllSpeculatable()
const {
1674 return isSpeculatable(
true) &&
1675 isSpeculatable(
false);
1678bool SelectHandSpeculativity::areAnySpeculatable()
const {
1679 return isSpeculatable(
true) ||
1680 isSpeculatable(
false);
1682bool SelectHandSpeculativity::areNoneSpeculatable()
const {
1683 return !areAnySpeculatable();
1686static SelectHandSpeculativity
1689 SelectHandSpeculativity
Spec;
1695 Spec.setAsSpeculatable(
Value ==
SI.getTrueValue());
1702std::optional<RewriteableMemOps>
1703SROA::isSafeSelectToSpeculate(SelectInst &SI,
bool PreserveCFG) {
1704 RewriteableMemOps
Ops;
1706 for (User *U :
SI.users()) {
1716 Ops.emplace_back(Store);
1727 PossiblySpeculatableLoad
Load(LI);
1733 Ops.emplace_back(Load);
1737 SelectHandSpeculativity Spec =
1743 Ops.emplace_back(Load);
1753 Value *TV =
SI.getTrueValue();
1754 Value *FV =
SI.getFalseValue();
1759 IRB.SetInsertPoint(&LI);
1763 LI.
getName() +
".sroa.speculate.load.true");
1766 LI.
getName() +
".sroa.speculate.load.false");
1767 NumLoadsSpeculated += 2;
1779 Value *V = IRB.CreateSelect(
SI.getCondition(), TL, FL,
1780 LI.
getName() +
".sroa.speculated");
1786template <
typename T>
1788 SelectHandSpeculativity
Spec,
1795 if (
Spec.areNoneSpeculatable())
1797 SI.getMetadata(LLVMContext::MD_prof), &DTU);
1800 SI.getMetadata(LLVMContext::MD_prof), &DTU,
1802 if (
Spec.isSpeculatable(
true))
1813 bool IsThen = SuccBB == HeadBI->getSuccessor(0);
1814 int SuccIdx = IsThen ? 0 : 1;
1815 auto *NewMemOpBB = SuccBB ==
Tail ? Head : SuccBB;
1816 auto &CondMemOp =
cast<T>(*
I.clone());
1817 if (NewMemOpBB != Head) {
1818 NewMemOpBB->setName(Head->
getName() + (IsThen ?
".then" :
".else"));
1820 ++NumLoadsPredicated;
1822 ++NumStoresPredicated;
1824 CondMemOp.dropUBImplyingAttrsAndMetadata();
1825 ++NumLoadsSpeculated;
1827 CondMemOp.insertBefore(NewMemOpBB->getTerminator()->getIterator());
1829 CondMemOp.setOperand(
I.getPointerOperandIndex(),
Ptr);
1831 CondMemOp.setName(
I.getName() + (IsThen ?
".then" :
".else") +
".val");
1839 I.replaceAllUsesWith(PN);
1844 SelectHandSpeculativity
Spec,
1855 const RewriteableMemOps &
Ops,
1857 bool CFGChanged =
false;
1860 for (
const RewriteableMemOp &
Op :
Ops) {
1861 SelectHandSpeculativity
Spec;
1863 if (
auto *
const *US = std::get_if<UnspeculatableStore>(&
Op)) {
1866 auto PSL = std::get<PossiblySpeculatableLoad>(
Op);
1867 I = PSL.getPointer();
1868 Spec = PSL.getInt();
1870 if (
Spec.areAllSpeculatable()) {
1873 assert(DTU &&
"Should not get here when not allowed to modify the CFG!");
1877 I->eraseFromParent();
1882 SI.eraseFromParent();
1890 const Twine &NamePrefix) {
1892 Ptr = IRB.CreateInBoundsPtrAdd(
Ptr, IRB.getInt(
Offset),
1893 NamePrefix +
"sroa_idx");
1894 return IRB.CreatePointerBitCastOrAddrSpaceCast(
Ptr,
PointerTy,
1895 NamePrefix +
"sroa_cast");
1910 unsigned VScale = 0) {
1920 "We can't have the same bitwidth for different int types");
1924 TypeSize NewSize =
DL.getTypeSizeInBits(NewTy);
1925 TypeSize OldSize =
DL.getTypeSizeInBits(OldTy);
1952 if (NewSize != OldSize)
1968 return OldAS == NewAS ||
1969 (!
DL.isNonIntegralAddressSpace(OldAS) &&
1970 !
DL.isNonIntegralAddressSpace(NewAS) &&
1971 DL.getPointerSize(OldAS) ==
DL.getPointerSize(NewAS));
1977 return !
DL.isNonIntegralPointerType(NewTy);
1981 if (!
DL.isNonIntegralPointerType(OldTy))
2001 Type *OldTy = V->getType();
2008 "Value not convertable to type");
2015 "Integer types must be the exact same to convert.");
2019 auto CreateBitCastLike = [&IRB](
Value *In,
Type *Ty) ->
Value * {
2020 Type *InTy = In->getType();
2028 return IRB.CreateBitCast(IRB.CreateInsertVector(VTy,
2038 return IRB.CreateExtractVector(Ty, IRB.CreateBitCast(In, VTy),
2042 return IRB.CreateBitCast(In, Ty);
2051 return IRB.CreateIntToPtr(CreateBitCastLike(V,
DL.getIntPtrType(NewTy)),
2061 return CreateBitCastLike(IRB.CreatePtrToInt(V,
DL.getIntPtrType(OldTy)),
2074 if (OldAS != NewAS) {
2075 assert(
DL.getPointerSize(OldAS) ==
DL.getPointerSize(NewAS));
2076 return IRB.CreateIntToPtr(
2077 CreateBitCastLike(IRB.CreatePtrToInt(V,
DL.getIntPtrType(OldTy)),
2078 DL.getIntPtrType(NewTy)),
2083 return CreateBitCastLike(V, NewTy);
2097 std::max(S.beginOffset(),
P.beginOffset()) -
P.beginOffset();
2098 uint64_t BeginIndex = BeginOffset / ElementSize;
2099 if (BeginIndex * ElementSize != BeginOffset ||
2102 uint64_t EndOffset = std::min(S.endOffset(),
P.endOffset()) -
P.beginOffset();
2103 uint64_t EndIndex = EndOffset / ElementSize;
2104 if (EndIndex * ElementSize != EndOffset ||
2108 assert(EndIndex > BeginIndex &&
"Empty vector!");
2109 uint64_t NumElements = EndIndex - BeginIndex;
2110 Type *SliceTy = (NumElements == 1)
2111 ? Ty->getElementType()
2117 Use *U = S.getUse();
2120 if (
MI->isVolatile())
2122 if (!S.isSplittable())
2125 if (!
II->isLifetimeStartOrEnd() && !
II->isDroppable())
2132 if (LTy->isStructTy())
2134 if (
P.beginOffset() > S.beginOffset() ||
P.endOffset() < S.endOffset()) {
2135 assert(LTy->isIntegerTy());
2141 if (
SI->isVolatile())
2143 Type *STy =
SI->getValueOperand()->getType();
2147 if (
P.beginOffset() > S.beginOffset() ||
P.endOffset() < S.endOffset()) {
2168 DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
2172 if (ElementSize % 8)
2174 assert((
DL.getTypeSizeInBits(VTy).getFixedValue() % 8) == 0 &&
2175 "vector size not a multiple of element size?");
2178 for (
const Slice &S :
P)
2182 for (
const Slice *S :
P.splitSliceTails())
2196 bool HaveCommonEltTy,
Type *CommonEltTy,
2197 bool HaveVecPtrTy,
bool HaveCommonVecPtrTy,
2198 VectorType *CommonVecPtrTy,
unsigned VScale) {
2200 if (CandidateTys.
empty())
2207 if (HaveVecPtrTy && !HaveCommonVecPtrTy)
2211 if (!HaveCommonEltTy && HaveVecPtrTy) {
2213 CandidateTys.
clear();
2215 }
else if (!HaveCommonEltTy && !HaveVecPtrTy) {
2218 if (!VTy->getElementType()->isIntegerTy())
2220 VTy->getContext(), VTy->getScalarSizeInBits())));
2227 assert(
DL.getTypeSizeInBits(RHSTy).getFixedValue() ==
2228 DL.getTypeSizeInBits(LHSTy).getFixedValue() &&
2229 "Cannot have vector types of different sizes!");
2230 assert(RHSTy->getElementType()->isIntegerTy() &&
2231 "All non-integer types eliminated!");
2232 assert(LHSTy->getElementType()->isIntegerTy() &&
2233 "All non-integer types eliminated!");
2239 assert(
DL.getTypeSizeInBits(RHSTy).getFixedValue() ==
2240 DL.getTypeSizeInBits(LHSTy).getFixedValue() &&
2241 "Cannot have vector types of different sizes!");
2242 assert(RHSTy->getElementType()->isIntegerTy() &&
2243 "All non-integer types eliminated!");
2244 assert(LHSTy->getElementType()->isIntegerTy() &&
2245 "All non-integer types eliminated!");
2249 llvm::sort(CandidateTys, RankVectorTypesComp);
2250 CandidateTys.erase(
llvm::unique(CandidateTys, RankVectorTypesEq),
2251 CandidateTys.end());
2257 assert(VTy->getElementType() == CommonEltTy &&
2258 "Unaccounted for element type!");
2259 assert(VTy == CandidateTys[0] &&
2260 "Different vector types with the same element type!");
2263 CandidateTys.resize(1);
2270 std::numeric_limits<unsigned short>::max();
2284 bool &HaveCommonEltTy,
Type *&CommonEltTy,
bool &HaveVecPtrTy,
2285 bool &HaveCommonVecPtrTy,
VectorType *&CommonVecPtrTy,
unsigned VScale) {
2287 CandidateTysCopy.
size() ? CandidateTysCopy[0] :
nullptr;
2290 for (
Type *Ty : OtherTys) {
2293 unsigned TypeSize =
DL.getTypeSizeInBits(Ty).getFixedValue();
2296 for (
VectorType *
const VTy : CandidateTysCopy) {
2298 assert(CandidateTysCopy[0] == OriginalElt &&
"Different Element");
2299 unsigned VectorSize =
DL.getTypeSizeInBits(VTy).getFixedValue();
2300 unsigned ElementSize =
2301 DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
2305 CheckCandidateType(NewVTy);
2311 P,
DL, CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
2312 HaveCommonVecPtrTy, CommonVecPtrTy, VScale);
2331 Type *CommonEltTy =
nullptr;
2333 bool HaveVecPtrTy =
false;
2334 bool HaveCommonEltTy =
true;
2335 bool HaveCommonVecPtrTy =
true;
2336 auto CheckCandidateType = [&](
Type *Ty) {
2339 if (!CandidateTys.
empty()) {
2341 if (
DL.getTypeSizeInBits(VTy).getFixedValue() !=
2342 DL.getTypeSizeInBits(V).getFixedValue()) {
2343 CandidateTys.
clear();
2348 Type *EltTy = VTy->getElementType();
2351 CommonEltTy = EltTy;
2352 else if (CommonEltTy != EltTy)
2353 HaveCommonEltTy =
false;
2356 HaveVecPtrTy =
true;
2357 if (!CommonVecPtrTy)
2358 CommonVecPtrTy = VTy;
2359 else if (CommonVecPtrTy != VTy)
2360 HaveCommonVecPtrTy =
false;
2366 for (
const Slice &S :
P) {
2371 Ty =
SI->getValueOperand()->getType();
2375 auto CandTy = Ty->getScalarType();
2376 if (CandTy->isPointerTy() && (S.beginOffset() !=
P.beginOffset() ||
2377 S.endOffset() !=
P.endOffset())) {
2384 if (S.beginOffset() ==
P.beginOffset() && S.endOffset() ==
P.endOffset())
2385 CheckCandidateType(Ty);
2390 LoadStoreTys, CandidateTysCopy, CheckCandidateType,
P,
DL,
2391 CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
2392 HaveCommonVecPtrTy, CommonVecPtrTy, VScale))
2395 CandidateTys.
clear();
2397 DeferredTys, CandidateTysCopy, CheckCandidateType,
P,
DL, CandidateTys,
2398 HaveCommonEltTy, CommonEltTy, HaveVecPtrTy, HaveCommonVecPtrTy,
2399 CommonVecPtrTy, VScale);
2410 bool &WholeAllocaOp) {
2413 uint64_t RelBegin = S.beginOffset() - AllocBeginOffset;
2414 uint64_t RelEnd = S.endOffset() - AllocBeginOffset;
2416 Use *U = S.getUse();
2423 if (
II->isLifetimeStartOrEnd() ||
II->isDroppable())
2441 if (S.beginOffset() < AllocBeginOffset)
2447 WholeAllocaOp =
true;
2449 if (ITy->getBitWidth() <
DL.getTypeStoreSizeInBits(ITy).getFixedValue())
2451 }
else if (RelBegin != 0 || RelEnd !=
Size ||
2458 Type *ValueTy =
SI->getValueOperand()->getType();
2459 if (
SI->isVolatile())
2462 TypeSize StoreSize =
DL.getTypeStoreSize(ValueTy);
2467 if (S.beginOffset() < AllocBeginOffset)
2473 WholeAllocaOp =
true;
2475 if (ITy->getBitWidth() <
DL.getTypeStoreSizeInBits(ITy).getFixedValue())
2477 }
else if (RelBegin != 0 || RelEnd !=
Size ||
2486 if (!S.isSplittable())
2503 uint64_t SizeInBits =
DL.getTypeSizeInBits(AllocaTy).getFixedValue();
2509 if (SizeInBits !=
DL.getTypeStoreSizeInBits(AllocaTy).getFixedValue())
2527 bool WholeAllocaOp =
P.empty() &&
DL.isLegalInteger(SizeInBits);
2529 for (
const Slice &S :
P)
2534 for (
const Slice *S :
P.splitSliceTails())
2539 return WholeAllocaOp;
2544 const Twine &Name) {
2548 DL.getTypeStoreSize(IntTy).getFixedValue() &&
2549 "Element extends past full value");
2551 if (
DL.isBigEndian())
2552 ShAmt = 8 * (
DL.getTypeStoreSize(IntTy).getFixedValue() -
2553 DL.getTypeStoreSize(Ty).getFixedValue() -
Offset);
2555 V = IRB.CreateLShr(V, ShAmt, Name +
".shift");
2558 assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
2559 "Cannot extract to a larger integer!");
2561 V = IRB.CreateTrunc(V, Ty, Name +
".trunc");
2571 assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
2572 "Cannot insert a larger integer!");
2575 V = IRB.CreateZExt(V, IntTy, Name +
".ext");
2579 DL.getTypeStoreSize(IntTy).getFixedValue() &&
2580 "Element store outside of alloca store");
2582 if (
DL.isBigEndian())
2583 ShAmt = 8 * (
DL.getTypeStoreSize(IntTy).getFixedValue() -
2584 DL.getTypeStoreSize(Ty).getFixedValue() -
Offset);
2586 V = IRB.CreateShl(V, ShAmt, Name +
".shift");
2590 if (ShAmt || Ty->getBitWidth() < IntTy->getBitWidth()) {
2591 APInt Mask = ~Ty->getMask().zext(IntTy->getBitWidth()).shl(ShAmt);
2592 Old = IRB.CreateAnd(Old, Mask, Name +
".mask");
2594 V = IRB.CreateOr(Old, V, Name +
".insert");
2601 unsigned EndIndex,
const Twine &Name) {
2603 unsigned NumElements = EndIndex - BeginIndex;
2606 if (NumElements == VecTy->getNumElements())
2609 if (NumElements == 1) {
2610 V = IRB.CreateExtractElement(V, IRB.getInt32(BeginIndex),
2617 V = IRB.CreateShuffleVector(V, Mask, Name +
".extract");
2623 unsigned BeginIndex,
const Twine &Name) {
2625 assert(VecTy &&
"Can only insert a vector into a vector");
2630 V = IRB.CreateInsertElement(Old, V, IRB.getInt32(BeginIndex),
2638 "Too many elements!");
2641 assert(V->getType() == VecTy &&
"Vector type mismatch");
2653 if (i >= BeginIndex && i < EndIndex)
2654 Mask.push_back(i - BeginIndex);
2657 V = IRB.CreateShuffleVector(V, Mask, Name +
".expand");
2663 Mask2.
push_back(IRB.getInt1(i >= BeginIndex && i < EndIndex));
2705 const char *DebugName) {
2706 Type *EltType = VecType->getElementType();
2707 if (EltType != NewAIEltTy) {
2709 unsigned TotalBits =
2710 VecType->getNumElements() *
DL.getTypeSizeInBits(EltType);
2711 unsigned NewNumElts = TotalBits /
DL.getTypeSizeInBits(NewAIEltTy);
2714 V = Builder.CreateBitCast(V, NewVecType);
2715 VecType = NewVecType;
2716 LLVM_DEBUG(
dbgs() <<
" bitcast " << DebugName <<
": " << *V <<
"\n");
2720 BitcastIfNeeded(V0, VecType0,
"V0");
2721 BitcastIfNeeded(V1, VecType1,
"V1");
2723 unsigned NumElts0 = VecType0->getNumElements();
2724 unsigned NumElts1 = VecType1->getNumElements();
2728 if (NumElts0 == NumElts1) {
2729 for (
unsigned i = 0; i < NumElts0 + NumElts1; ++i)
2730 ShuffleMask.push_back(i);
2734 unsigned SmallSize = std::min(NumElts0, NumElts1);
2735 unsigned LargeSize = std::max(NumElts0, NumElts1);
2736 bool IsV0Smaller = NumElts0 < NumElts1;
2737 Value *&ExtendedVec = IsV0Smaller ? V0 : V1;
2739 for (
unsigned i = 0; i < SmallSize; ++i)
2741 for (
unsigned i = SmallSize; i < LargeSize; ++i)
2743 ExtendedVec = Builder.CreateShuffleVector(
2745 LLVM_DEBUG(
dbgs() <<
" shufflevector: " << *ExtendedVec <<
"\n");
2746 for (
unsigned i = 0; i < NumElts0; ++i)
2747 ShuffleMask.push_back(i);
2748 for (
unsigned i = 0; i < NumElts1; ++i)
2749 ShuffleMask.push_back(LargeSize + i);
2752 return Builder.CreateShuffleVector(V0, V1, ShuffleMask);
2763class AllocaSliceRewriter :
public InstVisitor<AllocaSliceRewriter, bool> {
2765 friend class InstVisitor<AllocaSliceRewriter, bool>;
2767 using Base = InstVisitor<AllocaSliceRewriter, bool>;
2769 const DataLayout &
DL;
2772 AllocaInst &OldAI, &NewAI;
2773 const uint64_t NewAllocaBeginOffset, NewAllocaEndOffset;
2793 uint64_t ElementSize;
2797 uint64_t BeginOffset = 0;
2798 uint64_t EndOffset = 0;
2802 uint64_t NewBeginOffset = 0, NewEndOffset = 0;
2804 uint64_t SliceSize = 0;
2805 bool IsSplittable =
false;
2806 bool IsSplit =
false;
2807 Use *OldUse =
nullptr;
2811 SmallSetVector<PHINode *, 8> &PHIUsers;
2812 SmallSetVector<SelectInst *, 8> &SelectUsers;
2820 Value *getPtrToNewAI(
unsigned AddrSpace,
bool IsVolatile) {
2824 Type *AccessTy = IRB.getPtrTy(AddrSpace);
2825 return IRB.CreateAddrSpaceCast(&NewAI, AccessTy);
2829 AllocaSliceRewriter(
const DataLayout &
DL, AllocaSlices &AS, SROA &
Pass,
2830 AllocaInst &OldAI, AllocaInst &NewAI,
2831 uint64_t NewAllocaBeginOffset,
2832 uint64_t NewAllocaEndOffset,
bool IsIntegerPromotable,
2833 VectorType *PromotableVecTy,
2834 SmallSetVector<PHINode *, 8> &PHIUsers,
2835 SmallSetVector<SelectInst *, 8> &SelectUsers)
2836 :
DL(
DL), AS(AS),
Pass(
Pass), OldAI(OldAI), NewAI(NewAI),
2837 NewAllocaBeginOffset(NewAllocaBeginOffset),
2838 NewAllocaEndOffset(NewAllocaEndOffset),
2839 NewAllocaTy(NewAI.getAllocatedType()),
2843 DL.getTypeSizeInBits(NewAI.getAllocatedType())
2846 VecTy(PromotableVecTy),
2847 ElementTy(VecTy ? VecTy->getElementType() : nullptr),
2848 ElementSize(VecTy ?
DL.getTypeSizeInBits(ElementTy).getFixedValue() / 8
2850 PHIUsers(PHIUsers), SelectUsers(SelectUsers),
2853 assert((
DL.getTypeSizeInBits(ElementTy).getFixedValue() % 8) == 0 &&
2854 "Only multiple-of-8 sized vector elements are viable");
2857 assert((!IntTy && !VecTy) || (IntTy && !VecTy) || (!IntTy && VecTy));
2860 bool visit(AllocaSlices::const_iterator
I) {
2861 bool CanSROA =
true;
2862 BeginOffset =
I->beginOffset();
2863 EndOffset =
I->endOffset();
2864 IsSplittable =
I->isSplittable();
2866 BeginOffset < NewAllocaBeginOffset || EndOffset > NewAllocaEndOffset;
2867 LLVM_DEBUG(
dbgs() <<
" rewriting " << (IsSplit ?
"split " :
""));
2872 assert(BeginOffset < NewAllocaEndOffset);
2873 assert(EndOffset > NewAllocaBeginOffset);
2874 NewBeginOffset = std::max(BeginOffset, NewAllocaBeginOffset);
2875 NewEndOffset = std::min(EndOffset, NewAllocaEndOffset);
2877 SliceSize = NewEndOffset - NewBeginOffset;
2878 LLVM_DEBUG(
dbgs() <<
" Begin:(" << BeginOffset <<
", " << EndOffset
2879 <<
") NewBegin:(" << NewBeginOffset <<
", "
2880 << NewEndOffset <<
") NewAllocaBegin:("
2881 << NewAllocaBeginOffset <<
", " << NewAllocaEndOffset
2883 assert(IsSplit || NewBeginOffset == BeginOffset);
2884 OldUse =
I->getUse();
2888 IRB.SetInsertPoint(OldUserI);
2889 IRB.SetCurrentDebugLocation(OldUserI->
getDebugLoc());
2890 IRB.getInserter().SetNamePrefix(Twine(NewAI.
getName()) +
"." +
2891 Twine(BeginOffset) +
".");
2937 std::optional<SmallVector<Value *, 4>>
2938 rewriteTreeStructuredMerge(Partition &
P) {
2940 if (
P.splitSliceTails().size() > 0)
2941 return std::nullopt;
2944 LoadInst *TheLoad =
nullptr;
2949 uint64_t BeginOffset;
2952 StoreInfo(StoreInst *SI, uint64_t Begin, uint64_t End,
Value *Val)
2953 :
Store(
SI), BeginOffset(Begin), EndOffset(End), StoredValue(Val) {}
2960 Type *AllocatedEltTy =
2970 auto IsTypeValidForTreeStructuredMerge = [&](
Type *Ty) ->
bool {
2972 return FixedVecTy &&
2973 DL.getTypeSizeInBits(FixedVecTy->getElementType()) % 8 == 0 &&
2974 !FixedVecTy->getElementType()->isPointerTy();
2977 for (Slice &S :
P) {
2985 if (TheLoad || !IsTypeValidForTreeStructuredMerge(LI->
getType()) ||
2986 S.beginOffset() != NewAllocaBeginOffset ||
2987 S.endOffset() != NewAllocaEndOffset || LI->
isVolatile())
2988 return std::nullopt;
2994 if (!IsTypeValidForTreeStructuredMerge(
2995 SI->getValueOperand()->getType()) ||
2997 return std::nullopt;
2998 StoreInfos.
emplace_back(SI, S.beginOffset(), S.endOffset(),
2999 SI->getValueOperand());
3003 return std::nullopt;
3008 return std::nullopt;
3011 if (StoreInfos.
size() < 2)
3012 return std::nullopt;
3016 llvm::sort(StoreInfos, [](
const StoreInfo &
A,
const StoreInfo &
B) {
3017 return A.BeginOffset <
B.BeginOffset;
3021 uint64_t ExpectedStart = NewAllocaBeginOffset;
3022 for (
auto &StoreInfo : StoreInfos) {
3023 uint64_t BeginOff = StoreInfo.BeginOffset;
3024 uint64_t EndOff = StoreInfo.EndOffset;
3027 if (BeginOff != ExpectedStart)
3028 return std::nullopt;
3030 ExpectedStart = EndOff;
3033 if (ExpectedStart != NewAllocaEndOffset)
3034 return std::nullopt;
3045 BasicBlock *StoreBB = StoreInfos[0].Store->getParent();
3047 for (
auto &StoreInfo : StoreInfos) {
3048 if (StoreInfo.Store->getParent() != StoreBB)
3049 return std::nullopt;
3050 if (LoadBB == StoreBB && !StoreInfo.Store->comesBefore(TheLoad))
3051 return std::nullopt;
3057 dbgs() <<
"Tree structured merge rewrite:\n Load: " << *TheLoad
3058 <<
"\n Ordered stores:\n";
3060 dbgs() <<
" [" << i <<
"] Range[" <<
Info.BeginOffset <<
", "
3061 <<
Info.EndOffset <<
") \tStore: " << *
Info.Store
3062 <<
"\tValue: " << *
Info.StoredValue <<
"\n";
3067 std::queue<Value *> VecElements;
3069 for (
const auto &
Info : StoreInfos) {
3071 VecElements.push(
Info.StoredValue);
3075 while (VecElements.size() > 1) {
3076 const auto NumElts = VecElements.size();
3077 for ([[maybe_unused]]
const auto _ :
llvm::seq(NumElts / 2)) {
3078 Value *V0 = VecElements.front();
3080 Value *V1 = VecElements.front();
3084 VecElements.push(Merged);
3086 if (NumElts % 2 == 1) {
3087 Value *
V = VecElements.front();
3089 VecElements.push(V);
3094 Value *MergedValue = VecElements.front();
3095 Builder.CreateAlignedStore(MergedValue, &NewAI, getSliceAlign());
3100 TheLoad->
getName() +
".sroa.new.load"));
3103 return DeletedValues;
3111 bool visitInstruction(Instruction &
I) {
3119 assert(IsSplit || BeginOffset == NewBeginOffset);
3120 uint64_t
Offset = NewBeginOffset - NewAllocaBeginOffset;
3123 StringRef OldName = OldPtr->
getName();
3125 size_t LastSROAPrefix = OldName.
rfind(
".sroa.");
3127 OldName = OldName.
substr(LastSROAPrefix + strlen(
".sroa."));
3132 OldName = OldName.
substr(IndexEnd + 1);
3136 OldName = OldName.
substr(OffsetEnd + 1);
3140 OldName = OldName.
substr(0, OldName.
find(
".sroa_"));
3147 Twine(OldName) +
"."
3159 Align getSliceAlign() {
3161 NewBeginOffset - NewAllocaBeginOffset);
3164 unsigned getIndex(uint64_t
Offset) {
3165 assert(VecTy &&
"Can only call getIndex when rewriting a vector");
3166 uint64_t RelOffset =
Offset - NewAllocaBeginOffset;
3167 assert(RelOffset / ElementSize < UINT32_MAX &&
"Index out of bounds");
3168 uint32_t
Index = RelOffset / ElementSize;
3169 assert(Index * ElementSize == RelOffset);
3173 void deleteIfTriviallyDead(
Value *V) {
3176 Pass.DeadInsts.push_back(
I);
3179 Value *rewriteVectorizedLoadInst(LoadInst &LI) {
3180 unsigned BeginIndex = getIndex(NewBeginOffset);
3181 unsigned EndIndex = getIndex(NewEndOffset);
3182 assert(EndIndex > BeginIndex &&
"Empty vector!");
3187 Load->copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access,
3188 LLVMContext::MD_access_group});
3189 return extractVector(IRB, Load, BeginIndex, EndIndex,
"vec");
3192 Value *rewriteIntegerLoad(LoadInst &LI) {
3193 assert(IntTy &&
"We cannot insert an integer to the alloca");
3198 assert(NewBeginOffset >= NewAllocaBeginOffset &&
"Out of bounds offset");
3199 uint64_t
Offset = NewBeginOffset - NewAllocaBeginOffset;
3200 if (
Offset > 0 || NewEndOffset < NewAllocaEndOffset) {
3201 IntegerType *ExtractTy = Type::getIntNTy(LI.
getContext(), SliceSize * 8);
3210 "Can only handle an extract for an overly wide load");
3212 V = IRB.CreateZExt(V, LI.
getType());
3216 bool visitLoadInst(LoadInst &LI) {
3225 Type *TargetTy = IsSplit ? Type::getIntNTy(LI.
getContext(), SliceSize * 8)
3227 bool IsPtrAdjusted =
false;
3230 V = rewriteVectorizedLoadInst(LI);
3232 V = rewriteIntegerLoad(LI);
3233 }
else if (NewBeginOffset == NewAllocaBeginOffset &&
3234 NewEndOffset == NewAllocaEndOffset &&
3237 DL.getTypeStoreSize(TargetTy).getFixedValue() > SliceSize &&
3240 getPtrToNewAI(LI.getPointerAddressSpace(), LI.isVolatile());
3241 LoadInst *NewLI = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), NewPtr,
3242 NewAI.getAlign(), LI.isVolatile(),
3244 if (LI.isVolatile())
3245 NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
3246 if (NewLI->isAtomic())
3247 NewLI->setAlignment(LI.getAlign());
3252 copyMetadataForLoad(*NewLI, LI);
3256 NewLI->setAAMetadata(AATags.adjustForAccess(
3257 NewBeginOffset - BeginOffset, NewLI->getType(), DL));
3265 if (auto *AITy = dyn_cast<IntegerType>(NewAllocaTy))
3266 if (auto *TITy = dyn_cast<IntegerType>(TargetTy))
3267 if (AITy->getBitWidth() < TITy->getBitWidth()) {
3268 V = IRB.CreateZExt(V, TITy,
"load.ext");
3269 if (DL.isBigEndian())
3270 V = IRB.CreateShl(V, TITy->getBitWidth() - AITy->getBitWidth(),
3274 Type *LTy = IRB.getPtrTy(AS);
3276 IRB.CreateAlignedLoad(TargetTy, getNewAllocaSlicePtr(IRB, LTy),
3281 NewBeginOffset - BeginOffset, NewLI->
getType(),
DL));
3285 NewLI->
copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access,
3286 LLVMContext::MD_access_group});
3289 IsPtrAdjusted =
true;
3296 "Only integer type loads and stores are split");
3297 assert(SliceSize <
DL.getTypeStoreSize(LI.
getType()).getFixedValue() &&
3298 "Split load isn't smaller than original load");
3300 "Non-byte-multiple bit width");
3306 LIIt.setHeadBit(
true);
3307 IRB.SetInsertPoint(LI.
getParent(), LIIt);
3312 Value *Placeholder =
3318 Placeholder->replaceAllUsesWith(&LI);
3319 Placeholder->deleteValue();
3324 Pass.DeadInsts.push_back(&LI);
3325 deleteIfTriviallyDead(OldOp);
3330 bool rewriteVectorizedStoreInst(
Value *V, StoreInst &SI,
Value *OldOp,
3335 if (
V->getType() != VecTy) {
3336 unsigned BeginIndex = getIndex(NewBeginOffset);
3337 unsigned EndIndex = getIndex(NewEndOffset);
3338 assert(EndIndex > BeginIndex &&
"Empty vector!");
3339 unsigned NumElements = EndIndex - BeginIndex;
3341 "Too many elements!");
3342 Type *SliceTy = (NumElements == 1)
3344 : FixedVectorType::
get(ElementTy, NumElements);
3345 if (
V->getType() != SliceTy)
3353 StoreInst *
Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.
getAlign());
3354 Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
3355 LLVMContext::MD_access_group});
3359 Pass.DeadInsts.push_back(&SI);
3363 Store,
Store->getPointerOperand(), OrigV,
DL);
3368 bool rewriteIntegerStore(
Value *V, StoreInst &SI, AAMDNodes AATags) {
3369 assert(IntTy &&
"We cannot extract an integer from the alloca");
3371 if (
DL.getTypeSizeInBits(
V->getType()).getFixedValue() !=
3376 assert(BeginOffset >= NewAllocaBeginOffset &&
"Out of bounds offset");
3377 uint64_t
Offset = BeginOffset - NewAllocaBeginOffset;
3381 StoreInst *
Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.
getAlign());
3382 Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
3383 LLVMContext::MD_access_group});
3389 Store,
Store->getPointerOperand(),
3390 Store->getValueOperand(),
DL);
3392 Pass.DeadInsts.push_back(&SI);
3397 bool visitStoreInst(StoreInst &SI) {
3399 Value *OldOp =
SI.getOperand(1);
3402 AAMDNodes AATags =
SI.getAAMetadata();
3407 if (
V->getType()->isPointerTy())
3409 Pass.PostPromotionWorklist.insert(AI);
3411 TypeSize StoreSize =
DL.getTypeStoreSize(
V->getType());
3414 assert(
V->getType()->isIntegerTy() &&
3415 "Only integer type loads and stores are split");
3416 assert(
DL.typeSizeEqualsStoreSize(
V->getType()) &&
3417 "Non-byte-multiple bit width");
3418 IntegerType *NarrowTy = Type::getIntNTy(
SI.getContext(), SliceSize * 8);
3424 return rewriteVectorizedStoreInst(V, SI, OldOp, AATags);
3425 if (IntTy &&
V->getType()->isIntegerTy())
3426 return rewriteIntegerStore(V, SI, AATags);
3429 if (NewBeginOffset == NewAllocaBeginOffset &&
3430 NewEndOffset == NewAllocaEndOffset &&
3434 getPtrToNewAI(
SI.getPointerAddressSpace(),
SI.isVolatile());
3437 IRB.CreateAlignedStore(V, NewPtr, NewAI.
getAlign(),
SI.isVolatile());
3439 unsigned AS =
SI.getPointerAddressSpace();
3440 Value *NewPtr = getNewAllocaSlicePtr(IRB, IRB.getPtrTy(AS));
3442 IRB.CreateAlignedStore(V, NewPtr, getSliceAlign(),
SI.isVolatile());
3444 NewSI->
copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
3445 LLVMContext::MD_access_group});
3449 if (
SI.isVolatile())
3458 Pass.DeadInsts.push_back(&SI);
3459 deleteIfTriviallyDead(OldOp);
3477 assert(
Size > 0 &&
"Expected a positive number of bytes.");
3485 IRB.CreateZExt(V, SplatIntTy,
"zext"),
3495 V = IRB.CreateVectorSplat(NumElements, V,
"vsplat");
3500 bool visitMemSetInst(MemSetInst &
II) {
3504 AAMDNodes AATags =
II.getAAMetadata();
3510 assert(NewBeginOffset == BeginOffset);
3511 II.setDest(getNewAllocaSlicePtr(IRB, OldPtr->
getType()));
3512 II.setDestAlignment(getSliceAlign());
3517 "AT: Unexpected link to non-const GEP");
3518 deleteIfTriviallyDead(OldPtr);
3523 Pass.DeadInsts.push_back(&
II);
3528 const bool CanContinue = [&]() {
3531 if (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset)
3535 const uint64_t
Len =
C->getLimitedValue();
3536 if (Len > std::numeric_limits<unsigned>::max())
3538 auto *Int8Ty = IntegerType::getInt8Ty(NewAI.
getContext());
3541 DL.isLegalInteger(
DL.getTypeSizeInBits(ScalarTy).getFixedValue());
3547 Type *SizeTy =
II.getLength()->getType();
3548 unsigned Sz = NewEndOffset - NewBeginOffset;
3551 getNewAllocaSlicePtr(IRB, OldPtr->
getType()),
II.getValue(),
Size,
3552 MaybeAlign(getSliceAlign()),
II.isVolatile()));
3558 New,
New->getRawDest(),
nullptr,
DL);
3573 assert(ElementTy == ScalarTy);
3575 unsigned BeginIndex = getIndex(NewBeginOffset);
3576 unsigned EndIndex = getIndex(NewEndOffset);
3577 assert(EndIndex > BeginIndex &&
"Empty vector!");
3578 unsigned NumElements = EndIndex - BeginIndex;
3580 "Too many elements!");
3583 II.getValue(),
DL.getTypeSizeInBits(ElementTy).getFixedValue() / 8);
3585 if (NumElements > 1)
3596 uint64_t
Size = NewEndOffset - NewBeginOffset;
3597 V = getIntegerSplat(
II.getValue(),
Size);
3599 if (IntTy && (BeginOffset != NewAllocaBeginOffset ||
3600 EndOffset != NewAllocaBeginOffset)) {
3604 uint64_t
Offset = NewBeginOffset - NewAllocaBeginOffset;
3607 assert(
V->getType() == IntTy &&
3608 "Wrong type for an alloca wide integer!");
3613 assert(NewBeginOffset == NewAllocaBeginOffset);
3614 assert(NewEndOffset == NewAllocaEndOffset);
3616 V = getIntegerSplat(
II.getValue(),
3617 DL.getTypeSizeInBits(ScalarTy).getFixedValue() / 8);
3625 Value *NewPtr = getPtrToNewAI(
II.getDestAddressSpace(),
II.isVolatile());
3627 IRB.CreateAlignedStore(V, NewPtr, NewAI.
getAlign(),
II.isVolatile());
3628 New->copyMetadata(
II, {LLVMContext::MD_mem_parallel_loop_access,
3629 LLVMContext::MD_access_group});
3635 New,
New->getPointerOperand(), V,
DL);
3638 return !
II.isVolatile();
3641 bool visitMemTransferInst(MemTransferInst &
II) {
3647 AAMDNodes AATags =
II.getAAMetadata();
3649 bool IsDest = &
II.getRawDestUse() == OldUse;
3650 assert((IsDest &&
II.getRawDest() == OldPtr) ||
3651 (!IsDest &&
II.getRawSource() == OldPtr));
3653 Align SliceAlign = getSliceAlign();
3661 if (!IsSplittable) {
3662 Value *AdjustedPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
3667 DbgAssign->getAddress() ==
II.getDest())
3668 DbgAssign->replaceVariableLocationOp(
II.getDest(), AdjustedPtr);
3670 II.setDest(AdjustedPtr);
3671 II.setDestAlignment(SliceAlign);
3673 II.setSource(AdjustedPtr);
3674 II.setSourceAlignment(SliceAlign);
3678 deleteIfTriviallyDead(OldPtr);
3691 (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset ||
3700 if (EmitMemCpy && &OldAI == &NewAI) {
3702 assert(NewBeginOffset == BeginOffset);
3705 if (NewEndOffset != EndOffset)
3706 II.setLength(NewEndOffset - NewBeginOffset);
3710 Pass.DeadInsts.push_back(&
II);
3714 Value *OtherPtr = IsDest ?
II.getRawSource() :
II.getRawDest();
3715 if (AllocaInst *AI =
3717 assert(AI != &OldAI && AI != &NewAI &&
3718 "Splittable transfers cannot reach the same alloca on both ends.");
3719 Pass.Worklist.insert(AI);
3726 unsigned OffsetWidth =
DL.getIndexSizeInBits(OtherAS);
3727 APInt OtherOffset(OffsetWidth, NewBeginOffset - BeginOffset);
3729 (IsDest ?
II.getSourceAlign() :
II.getDestAlign()).valueOrOne();
3731 commonAlignment(OtherAlign, OtherOffset.zextOrTrunc(64).getZExtValue());
3739 Value *OurPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
3740 Type *SizeTy =
II.getLength()->getType();
3741 Constant *
Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset);
3743 Value *DestPtr, *SrcPtr;
3744 MaybeAlign DestAlign, SrcAlign;
3748 DestAlign = SliceAlign;
3750 SrcAlign = OtherAlign;
3753 DestAlign = OtherAlign;
3755 SrcAlign = SliceAlign;
3757 CallInst *
New = IRB.CreateMemCpy(DestPtr, DestAlign, SrcPtr, SrcAlign,
3760 New->setAAMetadata(AATags.
shift(NewBeginOffset - BeginOffset));
3765 &
II, New, DestPtr,
nullptr,
DL);
3770 SliceSize * 8, &
II, New, DestPtr,
nullptr,
DL);
3776 bool IsWholeAlloca = NewBeginOffset == NewAllocaBeginOffset &&
3777 NewEndOffset == NewAllocaEndOffset;
3778 uint64_t
Size = NewEndOffset - NewBeginOffset;
3779 unsigned BeginIndex = VecTy ? getIndex(NewBeginOffset) : 0;
3780 unsigned EndIndex = VecTy ? getIndex(NewEndOffset) : 0;
3781 unsigned NumElements = EndIndex - BeginIndex;
3782 IntegerType *SubIntTy =
3783 IntTy ? Type::getIntNTy(IntTy->
getContext(),
Size * 8) : nullptr;
3788 if (VecTy && !IsWholeAlloca) {
3789 if (NumElements == 1)
3790 OtherTy = VecTy->getElementType();
3793 }
else if (IntTy && !IsWholeAlloca) {
3796 OtherTy = NewAllocaTy;
3801 MaybeAlign SrcAlign = OtherAlign;
3802 MaybeAlign DstAlign = SliceAlign;
3810 DstPtr = getPtrToNewAI(
II.getDestAddressSpace(),
II.isVolatile());
3814 SrcPtr = getPtrToNewAI(
II.getSourceAddressSpace(),
II.isVolatile());
3818 if (VecTy && !IsWholeAlloca && !IsDest) {
3822 }
else if (IntTy && !IsWholeAlloca && !IsDest) {
3826 uint64_t
Offset = NewBeginOffset - NewAllocaBeginOffset;
3829 LoadInst *
Load = IRB.CreateAlignedLoad(OtherTy, SrcPtr, SrcAlign,
3830 II.isVolatile(),
"copyload");
3831 Load->copyMetadata(
II, {LLVMContext::MD_mem_parallel_loop_access,
3832 LLVMContext::MD_access_group});
3839 if (VecTy && !IsWholeAlloca && IsDest) {
3843 }
else if (IntTy && !IsWholeAlloca && IsDest) {
3847 uint64_t
Offset = NewBeginOffset - NewAllocaBeginOffset;
3853 IRB.CreateAlignedStore(Src, DstPtr, DstAlign,
II.isVolatile()));
3854 Store->copyMetadata(
II, {LLVMContext::MD_mem_parallel_loop_access,
3855 LLVMContext::MD_access_group});
3858 Src->getType(),
DL));
3864 Store, DstPtr, Src,
DL);
3869 &
II, Store, DstPtr, Src,
DL);
3873 return !
II.isVolatile();
3876 bool visitIntrinsicInst(IntrinsicInst &
II) {
3877 assert((
II.isLifetimeStartOrEnd() ||
II.isDroppable()) &&
3878 "Unexpected intrinsic!");
3882 Pass.DeadInsts.push_back(&
II);
3884 if (
II.isDroppable()) {
3885 assert(
II.getIntrinsicID() == Intrinsic::assume &&
"Expected assume");
3891 assert(
II.getArgOperand(0) == OldPtr);
3895 if (
II.getIntrinsicID() == Intrinsic::lifetime_start)
3896 New = IRB.CreateLifetimeStart(
Ptr);
3898 New = IRB.CreateLifetimeEnd(
Ptr);
3906 void fixLoadStoreAlign(Instruction &Root) {
3910 SmallPtrSet<Instruction *, 4> Visited;
3911 SmallVector<Instruction *, 4>
Uses;
3913 Uses.push_back(&Root);
3922 SI->setAlignment(std::min(
SI->getAlign(), getSliceAlign()));
3929 for (User *U :
I->users())
3932 }
while (!
Uses.empty());
3935 bool visitPHINode(PHINode &PN) {
3937 assert(BeginOffset >= NewAllocaBeginOffset &&
"PHIs are unsplittable");
3938 assert(EndOffset <= NewAllocaEndOffset &&
"PHIs are unsplittable");
3944 IRBuilderBase::InsertPointGuard Guard(IRB);
3947 OldPtr->
getParent()->getFirstInsertionPt());
3949 IRB.SetInsertPoint(OldPtr);
3950 IRB.SetCurrentDebugLocation(OldPtr->
getDebugLoc());
3952 Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
3957 deleteIfTriviallyDead(OldPtr);
3960 fixLoadStoreAlign(PN);
3969 bool visitSelectInst(SelectInst &SI) {
3971 assert((
SI.getTrueValue() == OldPtr ||
SI.getFalseValue() == OldPtr) &&
3972 "Pointer isn't an operand!");
3973 assert(BeginOffset >= NewAllocaBeginOffset &&
"Selects are unsplittable");
3974 assert(EndOffset <= NewAllocaEndOffset &&
"Selects are unsplittable");
3976 Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
3978 if (
SI.getOperand(1) == OldPtr)
3979 SI.setOperand(1, NewPtr);
3980 if (
SI.getOperand(2) == OldPtr)
3981 SI.setOperand(2, NewPtr);
3984 deleteIfTriviallyDead(OldPtr);
3987 fixLoadStoreAlign(SI);
4002class AggLoadStoreRewriter :
public InstVisitor<AggLoadStoreRewriter, bool> {
4004 friend class InstVisitor<AggLoadStoreRewriter, bool>;
4010 SmallPtrSet<User *, 8> Visited;
4017 const DataLayout &
DL;
4022 AggLoadStoreRewriter(
const DataLayout &
DL, IRBuilderTy &IRB)
4023 :
DL(
DL), IRB(IRB) {}
4027 bool rewrite(Instruction &
I) {
4031 while (!
Queue.empty()) {
4032 U =
Queue.pop_back_val();
4041 void enqueueUsers(Instruction &
I) {
4042 for (Use &U :
I.uses())
4043 if (Visited.
insert(
U.getUser()).second)
4044 Queue.push_back(&U);
4048 bool visitInstruction(Instruction &
I) {
return false; }
4051 template <
typename Derived>
class OpSplitter {
4058 SmallVector<unsigned, 4> Indices;
4076 const DataLayout &
DL;
4080 OpSplitter(Instruction *InsertionPoint,
Value *
Ptr,
Type *BaseTy,
4081 Align BaseAlign,
const DataLayout &
DL, IRBuilderTy &IRB)
4082 : IRB(IRB), GEPIndices(1, IRB.getInt32(0)),
Ptr(
Ptr), BaseTy(BaseTy),
4083 BaseAlign(BaseAlign),
DL(
DL) {
4084 IRB.SetInsertPoint(InsertionPoint);
4101 void emitSplitOps(
Type *Ty,
Value *&Agg,
const Twine &Name) {
4103 unsigned Offset =
DL.getIndexedOffsetInType(BaseTy, GEPIndices);
4104 return static_cast<Derived *
>(
this)->emitFunc(
4109 unsigned OldSize = Indices.
size();
4111 for (
unsigned Idx = 0,
Size = ATy->getNumElements(); Idx !=
Size;
4113 assert(Indices.
size() == OldSize &&
"Did not return to the old size");
4115 GEPIndices.
push_back(IRB.getInt32(Idx));
4116 emitSplitOps(ATy->getElementType(), Agg, Name +
"." + Twine(Idx));
4124 unsigned OldSize = Indices.
size();
4126 for (
unsigned Idx = 0,
Size = STy->getNumElements(); Idx !=
Size;
4128 assert(Indices.
size() == OldSize &&
"Did not return to the old size");
4130 GEPIndices.
push_back(IRB.getInt32(Idx));
4131 emitSplitOps(STy->getElementType(Idx), Agg, Name +
"." + Twine(Idx));
4142 struct LoadOpSplitter :
public OpSplitter<LoadOpSplitter> {
4151 LoadOpSplitter(Instruction *InsertionPoint,
Value *
Ptr,
Type *BaseTy,
4152 AAMDNodes AATags, Align BaseAlign,
const DataLayout &
DL,
4154 : OpSplitter<LoadOpSplitter>(InsertionPoint,
Ptr, BaseTy, BaseAlign,
DL,
4160 void emitFunc(
Type *Ty,
Value *&Agg, Align Alignment,
const Twine &Name) {
4164 IRB.CreateInBoundsGEP(BaseTy,
Ptr, GEPIndices, Name +
".gep");
4166 IRB.CreateAlignedLoad(Ty,
GEP, Alignment, Name +
".load");
4169 DL.getIndexSizeInBits(
Ptr->getType()->getPointerAddressSpace()), 0);
4172 Load->setAAMetadata(
4178 Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name +
".insert");
4183 void recordFakeUses(LoadInst &LI) {
4184 for (Use &U : LI.
uses())
4186 if (
II->getIntrinsicID() == Intrinsic::fake_use)
4192 void emitFakeUses() {
4193 for (Instruction *
I : FakeUses) {
4194 IRB.SetInsertPoint(
I);
4195 for (
auto *V : Components)
4196 IRB.CreateIntrinsic(Intrinsic::fake_use, {
V});
4197 I->eraseFromParent();
4202 bool visitLoadInst(LoadInst &LI) {
4211 Splitter.recordFakeUses(LI);
4214 Splitter.emitFakeUses();
4221 struct StoreOpSplitter :
public OpSplitter<StoreOpSplitter> {
4222 StoreOpSplitter(Instruction *InsertionPoint,
Value *
Ptr,
Type *BaseTy,
4223 AAMDNodes AATags, StoreInst *AggStore, Align BaseAlign,
4224 const DataLayout &
DL, IRBuilderTy &IRB)
4225 : OpSplitter<StoreOpSplitter>(InsertionPoint,
Ptr, BaseTy, BaseAlign,
4227 AATags(AATags), AggStore(AggStore) {}
4229 StoreInst *AggStore;
4232 void emitFunc(
Type *Ty,
Value *&Agg, Align Alignment,
const Twine &Name) {
4238 Value *ExtractValue =
4239 IRB.CreateExtractValue(Agg, Indices, Name +
".extract");
4240 Value *InBoundsGEP =
4241 IRB.CreateInBoundsGEP(BaseTy,
Ptr, GEPIndices, Name +
".gep");
4243 IRB.CreateAlignedStore(ExtractValue, InBoundsGEP, Alignment);
4246 DL.getIndexSizeInBits(
Ptr->getType()->getPointerAddressSpace()), 0);
4259 uint64_t SizeInBits =
4260 DL.getTypeSizeInBits(
Store->getValueOperand()->getType());
4262 SizeInBits, AggStore, Store,
4263 Store->getPointerOperand(),
Store->getValueOperand(),
4267 "AT: unexpected debug.assign linked to store through "
4274 bool visitStoreInst(StoreInst &SI) {
4275 if (!
SI.isSimple() ||
SI.getPointerOperand() != *U)
4278 if (
V->getType()->isSingleValueType())
4283 StoreOpSplitter Splitter(&SI, *U,
V->getType(),
SI.getAAMetadata(), &SI,
4285 Splitter.emitSplitOps(
V->getType(), V,
V->getName() +
".fca");
4290 SI.eraseFromParent();
4294 bool visitBitCastInst(BitCastInst &BC) {
4299 bool visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
4309 bool unfoldGEPSelect(GetElementPtrInst &GEPI) {
4328 if (!ZI->getSrcTy()->isIntegerTy(1))
4341 dbgs() <<
" original: " << *Sel <<
"\n";
4342 dbgs() <<
" " << GEPI <<
"\n";);
4344 auto GetNewOps = [&](
Value *SelOp) {
4356 Cond =
SI->getCondition();
4357 True =
SI->getTrueValue();
4358 False =
SI->getFalseValue();
4360 Cond = Sel->getOperand(0);
4361 True = ConstantInt::get(Sel->getType(), 1);
4362 False = ConstantInt::get(Sel->getType(), 0);
4367 IRB.SetInsertPoint(&GEPI);
4371 Value *NTrue = IRB.CreateGEP(Ty, TrueOps[0],
ArrayRef(TrueOps).drop_front(),
4372 True->
getName() +
".sroa.gep", NW);
4375 IRB.CreateGEP(Ty, FalseOps[0],
ArrayRef(FalseOps).drop_front(),
4376 False->
getName() +
".sroa.gep", NW);
4379 IRB.CreateSelect(
Cond, NTrue, NFalse, Sel->getName() +
".sroa.sel");
4380 Visited.
erase(&GEPI);
4385 enqueueUsers(*NSelI);
4388 dbgs() <<
" " << *NFalse <<
"\n";
4389 dbgs() <<
" " << *NSel <<
"\n";);
4398 bool unfoldGEPPhi(GetElementPtrInst &GEPI) {
4403 auto IsInvalidPointerOperand = [](
Value *
V) {
4407 return !AI->isStaticAlloca();
4411 if (
any_of(
Phi->operands(), IsInvalidPointerOperand))
4426 [](
Value *V) { return isa<ConstantInt>(V); }))
4439 dbgs() <<
" original: " << *
Phi <<
"\n";
4440 dbgs() <<
" " << GEPI <<
"\n";);
4442 auto GetNewOps = [&](
Value *PhiOp) {
4452 IRB.SetInsertPoint(Phi);
4453 PHINode *NewPhi = IRB.CreatePHI(GEPI.
getType(),
Phi->getNumIncomingValues(),
4454 Phi->getName() +
".sroa.phi");
4460 for (
unsigned I = 0,
E =
Phi->getNumIncomingValues();
I !=
E; ++
I) {
4469 IRB.CreateGEP(SourceTy, NewOps[0],
ArrayRef(NewOps).drop_front(),
4475 Visited.
erase(&GEPI);
4479 enqueueUsers(*NewPhi);
4485 dbgs() <<
"\n " << *NewPhi <<
'\n');
4490 bool visitGetElementPtrInst(GetElementPtrInst &GEPI) {
4491 if (unfoldGEPSelect(GEPI))
4494 if (unfoldGEPPhi(GEPI))
4501 bool visitPHINode(PHINode &PN) {
4506 bool visitSelectInst(SelectInst &SI) {
4520 if (Ty->isSingleValueType())
4523 uint64_t AllocSize =
DL.getTypeAllocSize(Ty).getFixedValue();
4528 InnerTy = ArrTy->getElementType();
4532 InnerTy = STy->getElementType(Index);
4537 if (AllocSize >
DL.getTypeAllocSize(InnerTy).getFixedValue() ||
4538 TypeSize >
DL.getTypeSizeInBits(InnerTy).getFixedValue())
4559 if (
Offset == 0 &&
DL.getTypeAllocSize(Ty).getFixedValue() ==
Size)
4561 if (
Offset >
DL.getTypeAllocSize(Ty).getFixedValue() ||
4562 (
DL.getTypeAllocSize(Ty).getFixedValue() -
Offset) <
Size)
4569 ElementTy = AT->getElementType();
4570 TyNumElements = AT->getNumElements();
4575 ElementTy = VT->getElementType();
4576 TyNumElements = VT->getNumElements();
4578 uint64_t ElementSize =
DL.getTypeAllocSize(ElementTy).getFixedValue();
4580 if (NumSkippedElements >= TyNumElements)
4582 Offset -= NumSkippedElements * ElementSize;
4594 if (
Size == ElementSize)
4598 if (NumElements * ElementSize !=
Size)
4622 uint64_t ElementSize =
DL.getTypeAllocSize(ElementTy).getFixedValue();
4623 if (
Offset >= ElementSize)
4634 if (
Size == ElementSize)
4641 if (Index == EndIndex)
4651 assert(Index < EndIndex);
4690bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
4704 struct SplitOffsets {
4706 std::vector<uint64_t> Splits;
4708 SmallDenseMap<Instruction *, SplitOffsets, 8> SplitOffsetsMap;
4721 SmallPtrSet<LoadInst *, 8> UnsplittableLoads;
4723 LLVM_DEBUG(
dbgs() <<
" Searching for candidate loads and stores\n");
4724 for (
auto &
P : AS.partitions()) {
4725 for (Slice &S :
P) {
4727 if (!S.isSplittable() || S.endOffset() <=
P.endOffset()) {
4732 UnsplittableLoads.
insert(LI);
4735 UnsplittableLoads.
insert(LI);
4738 assert(
P.endOffset() > S.beginOffset() &&
4739 "Empty or backwards partition!");
4748 auto IsLoadSimplyStored = [](LoadInst *LI) {
4749 for (User *LU : LI->
users()) {
4751 if (!SI || !
SI->isSimple())
4756 if (!IsLoadSimplyStored(LI)) {
4757 UnsplittableLoads.
insert(LI);
4763 if (S.getUse() != &
SI->getOperandUse(
SI->getPointerOperandIndex()))
4767 if (!StoredLoad || !StoredLoad->isSimple())
4769 assert(!
SI->isVolatile() &&
"Cannot split volatile stores!");
4779 auto &
Offsets = SplitOffsetsMap[
I];
4781 "Should not have splits the first time we see an instruction!");
4783 Offsets.Splits.push_back(
P.endOffset() - S.beginOffset());
4788 for (Slice *S :
P.splitSliceTails()) {
4789 auto SplitOffsetsMapI =
4791 if (SplitOffsetsMapI == SplitOffsetsMap.
end())
4793 auto &
Offsets = SplitOffsetsMapI->second;
4797 "Cannot have an empty set of splits on the second partition!");
4799 P.beginOffset() -
Offsets.S->beginOffset() &&
4800 "Previous split does not end where this one begins!");
4804 if (S->endOffset() >
P.endOffset())
4813 llvm::erase_if(Stores, [&UnsplittableLoads, &SplitOffsetsMap](StoreInst *SI) {
4819 if (UnsplittableLoads.
count(LI))
4822 auto LoadOffsetsI = SplitOffsetsMap.
find(LI);
4823 if (LoadOffsetsI == SplitOffsetsMap.
end())
4825 auto &LoadOffsets = LoadOffsetsI->second;
4828 auto &StoreOffsets = SplitOffsetsMap[
SI];
4833 if (LoadOffsets.Splits == StoreOffsets.Splits)
4837 <<
" " << *LI <<
"\n"
4838 <<
" " << *SI <<
"\n");
4844 UnsplittableLoads.
insert(LI);
4853 return UnsplittableLoads.
count(LI);
4858 return UnsplittableLoads.
count(LI);
4868 IRBuilderTy IRB(&AI);
4875 SmallPtrSet<AllocaInst *, 4> ResplitPromotableAllocas;
4885 SmallDenseMap<LoadInst *, std::vector<LoadInst *>, 1> SplitLoadsMap;
4886 std::vector<LoadInst *> SplitLoads;
4887 const DataLayout &
DL = AI.getDataLayout();
4888 for (LoadInst *LI : Loads) {
4891 auto &
Offsets = SplitOffsetsMap[LI];
4892 unsigned SliceSize =
Offsets.S->endOffset() -
Offsets.S->beginOffset();
4894 "Load must have type size equal to store size");
4896 "Load must be >= slice size");
4898 uint64_t BaseOffset =
Offsets.S->beginOffset();
4899 assert(BaseOffset + SliceSize > BaseOffset &&
4900 "Cannot represent alloca access size using 64-bit integers!");
4903 IRB.SetInsertPoint(LI);
4907 uint64_t PartOffset = 0, PartSize =
Offsets.Splits.front();
4910 auto *PartTy = Type::getIntNTy(LI->
getContext(), PartSize * 8);
4913 LoadInst *PLoad = IRB.CreateAlignedLoad(
4916 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
4917 PartPtrTy,
BasePtr->getName() +
"."),
4920 PLoad->
copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
4921 LLVMContext::MD_access_group});
4925 SplitLoads.push_back(PLoad);
4929 Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
4933 <<
", " << NewSlices.
back().endOffset()
4934 <<
"): " << *PLoad <<
"\n");
4941 PartOffset =
Offsets.Splits[Idx];
4943 PartSize = (Idx <
Size ?
Offsets.Splits[Idx] : SliceSize) - PartOffset;
4949 bool DeferredStores =
false;
4950 for (User *LU : LI->
users()) {
4952 if (!Stores.
empty() && SplitOffsetsMap.
count(SI)) {
4953 DeferredStores =
true;
4959 Value *StoreBasePtr =
SI->getPointerOperand();
4960 IRB.SetInsertPoint(SI);
4961 AAMDNodes AATags =
SI->getAAMetadata();
4963 LLVM_DEBUG(
dbgs() <<
" Splitting store of load: " << *SI <<
"\n");
4965 for (
int Idx = 0,
Size = SplitLoads.size(); Idx <
Size; ++Idx) {
4966 LoadInst *PLoad = SplitLoads[Idx];
4967 uint64_t PartOffset = Idx == 0 ? 0 :
Offsets.Splits[Idx - 1];
4968 auto *PartPtrTy =
SI->getPointerOperandType();
4970 auto AS =
SI->getPointerAddressSpace();
4971 StoreInst *PStore = IRB.CreateAlignedStore(
4974 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
4975 PartPtrTy, StoreBasePtr->
getName() +
"."),
4978 PStore->
copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access,
4979 LLVMContext::MD_access_group,
4980 LLVMContext::MD_DIAssignID});
4985 LLVM_DEBUG(
dbgs() <<
" +" << PartOffset <<
":" << *PStore <<
"\n");
4993 ResplitPromotableAllocas.
insert(OtherAI);
4994 Worklist.insert(OtherAI);
4997 Worklist.insert(OtherAI);
5001 DeadInsts.push_back(SI);
5006 SplitLoadsMap.
insert(std::make_pair(LI, std::move(SplitLoads)));
5009 DeadInsts.push_back(LI);
5018 for (StoreInst *SI : Stores) {
5023 assert(StoreSize > 0 &&
"Cannot have a zero-sized integer store!");
5027 "Slice size should always match load size exactly!");
5028 uint64_t BaseOffset =
Offsets.S->beginOffset();
5029 assert(BaseOffset + StoreSize > BaseOffset &&
5030 "Cannot represent alloca access size using 64-bit integers!");
5038 auto SplitLoadsMapI = SplitLoadsMap.
find(LI);
5039 std::vector<LoadInst *> *SplitLoads =
nullptr;
5040 if (SplitLoadsMapI != SplitLoadsMap.
end()) {
5041 SplitLoads = &SplitLoadsMapI->second;
5043 "Too few split loads for the number of splits in the store!");
5048 uint64_t PartOffset = 0, PartSize =
Offsets.Splits.front();
5051 auto *PartTy = Type::getIntNTy(Ty->
getContext(), PartSize * 8);
5053 auto *StorePartPtrTy =
SI->getPointerOperandType();
5058 PLoad = (*SplitLoads)[Idx];
5060 IRB.SetInsertPoint(LI);
5062 PLoad = IRB.CreateAlignedLoad(
5065 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
5066 LoadPartPtrTy, LoadBasePtr->
getName() +
"."),
5069 PLoad->
copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
5070 LLVMContext::MD_access_group});
5074 IRB.SetInsertPoint(SI);
5075 auto AS =
SI->getPointerAddressSpace();
5076 StoreInst *PStore = IRB.CreateAlignedStore(
5079 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
5080 StorePartPtrTy, StoreBasePtr->
getName() +
"."),
5083 PStore->
copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access,
5084 LLVMContext::MD_access_group});
5088 Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
5092 <<
", " << NewSlices.
back().endOffset()
5093 <<
"): " << *PStore <<
"\n");
5103 PartOffset =
Offsets.Splits[Idx];
5105 PartSize = (Idx <
Size ?
Offsets.Splits[Idx] : StoreSize) - PartOffset;
5115 assert(OtherAI != &AI &&
"We can't re-split our own alloca!");
5116 ResplitPromotableAllocas.
insert(OtherAI);
5117 Worklist.insert(OtherAI);
5120 assert(OtherAI != &AI &&
"We can't re-split our own alloca!");
5121 Worklist.insert(OtherAI);
5136 DeadInsts.push_back(LI);
5138 DeadInsts.push_back(SI);
5147 AS.insert(NewSlices);
5151 for (
auto I = AS.begin(),
E = AS.end();
I !=
E; ++
I)
5157 PromotableAllocas.set_subtract(ResplitPromotableAllocas);
5172AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
5177 Type *SliceTy =
nullptr;
5179 const DataLayout &
DL = AI.getDataLayout();
5180 unsigned VScale = AI.getFunction()->getVScaleValue();
5182 std::pair<Type *, IntegerType *> CommonUseTy =
5185 if (CommonUseTy.first) {
5186 TypeSize CommonUseSize =
DL.getTypeAllocSize(CommonUseTy.first);
5188 SliceTy = CommonUseTy.first;
5195 P.beginOffset(),
P.
size()))
5196 SliceTy = TypePartitionTy;
5199 if (!SliceTy && CommonUseTy.second)
5200 if (
DL.getTypeAllocSize(CommonUseTy.second).getFixedValue() >=
P.
size()) {
5201 SliceTy = CommonUseTy.second;
5204 if ((!SliceTy || (SliceTy->
isArrayTy() &&
5206 DL.isLegalInteger(
P.size() * 8)) {
5207 SliceTy = Type::getIntNTy(*
C,
P.size() * 8);
5214 P.beginOffset(),
P.
size())) {
5216 if (TypePartitionVecTy &&
5218 SliceTy = TypePartitionTy;
5222 SliceTy = ArrayType::get(Type::getInt8Ty(*
C),
P.size());
5223 assert(
DL.getTypeAllocSize(SliceTy).getFixedValue() >=
P.
size());
5239 if (SliceTy == AI.getAllocatedType() &&
P.beginOffset() == 0) {
5249 const bool IsUnconstrained = Alignment <=
DL.getABITypeAlign(SliceTy);
5250 NewAI =
new AllocaInst(
5251 SliceTy, AI.getAddressSpace(),
nullptr,
5252 IsUnconstrained ?
DL.getPrefTypeAlign(SliceTy) : Alignment,
5253 AI.
getName() +
".sroa." + Twine(
P.begin() - AS.begin()),
5260 LLVM_DEBUG(
dbgs() <<
"Rewriting alloca partition " <<
"[" <<
P.beginOffset()
5261 <<
"," <<
P.endOffset() <<
") to: " << *NewAI <<
"\n");
5266 unsigned PPWOldSize = PostPromotionWorklist.size();
5267 unsigned NumUses = 0;
5268 SmallSetVector<PHINode *, 8> PHIUsers;
5269 SmallSetVector<SelectInst *, 8> SelectUsers;
5271 AllocaSliceRewriter
Rewriter(
DL, AS, *
this, AI, *NewAI,
P.beginOffset(),
5272 P.endOffset(), IsIntegerPromotable, VecTy,
5273 PHIUsers, SelectUsers);
5274 bool Promotable =
true;
5276 if (
auto DeletedValues =
Rewriter.rewriteTreeStructuredMerge(
P)) {
5277 NumUses += DeletedValues->
size() + 1;
5278 for (
Value *V : *DeletedValues)
5279 DeadInsts.push_back(V);
5281 for (Slice *S :
P.splitSliceTails()) {
5285 for (Slice &S :
P) {
5291 NumAllocaPartitionUses += NumUses;
5292 MaxUsesPerAllocaPartition.updateMax(NumUses);
5296 for (PHINode *
PHI : PHIUsers)
5300 SelectUsers.
clear();
5305 NewSelectsToRewrite;
5307 for (SelectInst *Sel : SelectUsers) {
5308 std::optional<RewriteableMemOps>
Ops =
5313 SelectUsers.clear();
5314 NewSelectsToRewrite.
clear();
5321 for (Use *U : AS.getDeadUsesIfPromotable()) {
5323 Value::dropDroppableUse(*U);
5326 DeadInsts.push_back(OldInst);
5328 if (PHIUsers.empty() && SelectUsers.empty()) {
5330 PromotableAllocas.insert(NewAI);
5335 SpeculatablePHIs.insert_range(PHIUsers);
5336 SelectsToRewrite.reserve(SelectsToRewrite.size() +
5337 NewSelectsToRewrite.
size());
5339 std::make_move_iterator(NewSelectsToRewrite.
begin()),
5340 std::make_move_iterator(NewSelectsToRewrite.
end())))
5341 SelectsToRewrite.insert(std::move(KV));
5342 Worklist.insert(NewAI);
5346 while (PostPromotionWorklist.size() > PPWOldSize)
5347 PostPromotionWorklist.pop_back();
5357 Worklist.insert(NewAI);
5404 int64_t BitExtractOffset) {
5406 bool HasFragment =
false;
5407 bool HasBitExtract =
false;
5416 HasBitExtract =
true;
5417 int64_t ExtractOffsetInBits =
Op.getArg(0);
5418 int64_t ExtractSizeInBits =
Op.getArg(1);
5427 assert(BitExtractOffset <= 0);
5428 int64_t AdjustedOffset = ExtractOffsetInBits + BitExtractOffset;
5434 if (AdjustedOffset < 0)
5437 Ops.push_back(
Op.getOp());
5438 Ops.push_back(std::max<int64_t>(0, AdjustedOffset));
5439 Ops.push_back(ExtractSizeInBits);
5442 Op.appendToVector(
Ops);
5447 if (HasFragment && HasBitExtract)
5450 if (!HasBitExtract) {
5469 std::optional<DIExpression::FragmentInfo> NewFragment,
5470 int64_t BitExtractAdjustment) {
5480 BitExtractAdjustment);
5481 if (!NewFragmentExpr)
5487 BeforeInst->
getParent()->insertDbgRecordBefore(DVR,
5500 BeforeInst->
getParent()->insertDbgRecordBefore(DVR,
5506 if (!NewAddr->
hasMetadata(LLVMContext::MD_DIAssignID)) {
5514 LLVM_DEBUG(
dbgs() <<
"Created new DVRAssign: " << *NewAssign <<
"\n");
5520bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
5521 if (AS.begin() == AS.end())
5524 unsigned NumPartitions = 0;
5526 const DataLayout &
DL = AI.getModule()->getDataLayout();
5529 Changed |= presplitLoadsAndStores(AI, AS);
5537 bool IsSorted =
true;
5539 uint64_t AllocaSize =
5540 DL.getTypeAllocSize(AI.getAllocatedType()).getFixedValue();
5541 const uint64_t MaxBitVectorSize = 1024;
5542 if (AllocaSize <= MaxBitVectorSize) {
5545 SmallBitVector SplittableOffset(AllocaSize + 1,
true);
5547 for (
unsigned O = S.beginOffset() + 1;
5548 O < S.endOffset() && O < AllocaSize; O++)
5549 SplittableOffset.reset(O);
5551 for (Slice &S : AS) {
5552 if (!S.isSplittable())
5555 if ((S.beginOffset() > AllocaSize || SplittableOffset[S.beginOffset()]) &&
5556 (S.endOffset() > AllocaSize || SplittableOffset[S.endOffset()]))
5561 S.makeUnsplittable();
5568 for (Slice &S : AS) {
5569 if (!S.isSplittable())
5572 if (S.beginOffset() == 0 && S.endOffset() >= AllocaSize)
5577 S.makeUnsplittable();
5592 Fragment(AllocaInst *AI, uint64_t O, uint64_t S)
5598 for (
auto &
P : AS.partitions()) {
5599 if (AllocaInst *NewAI = rewritePartition(AI, AS, P)) {
5602 uint64_t SizeOfByte = 8;
5603 uint64_t AllocaSize =
5604 DL.getTypeSizeInBits(NewAI->getAllocatedType()).getFixedValue();
5606 uint64_t Size = std::min(AllocaSize, P.size() * SizeOfByte);
5607 Fragments.push_back(
5608 Fragment(NewAI, P.beginOffset() * SizeOfByte, Size));
5614 NumAllocaPartitions += NumPartitions;
5615 MaxPartitionsPerAlloca.updateMax(NumPartitions);
5619 auto MigrateOne = [&](DbgVariableRecord *DbgVariable) {
5624 const Value *DbgPtr = DbgVariable->getAddress();
5626 DbgVariable->getFragmentOrEntireVariable();
5629 int64_t CurrentExprOffsetInBytes = 0;
5630 SmallVector<uint64_t> PostOffsetOps;
5632 ->extractLeadingOffset(CurrentExprOffsetInBytes, PostOffsetOps))
5636 int64_t ExtractOffsetInBits = 0;
5640 ExtractOffsetInBits =
Op.getArg(0);
5645 DIBuilder DIB(*AI.getModule(),
false);
5646 for (
auto Fragment : Fragments) {
5647 int64_t OffsetFromLocationInBits;
5648 std::optional<DIExpression::FragmentInfo> NewDbgFragment;
5653 DL, &AI, Fragment.Offset, Fragment.Size, DbgPtr,
5654 CurrentExprOffsetInBytes * 8, ExtractOffsetInBits, VarFrag,
5655 NewDbgFragment, OffsetFromLocationInBits))
5661 if (NewDbgFragment && !NewDbgFragment->SizeInBits)
5666 if (!NewDbgFragment)
5667 NewDbgFragment = DbgVariable->getFragment();
5671 int64_t OffestFromNewAllocaInBits =
5672 OffsetFromLocationInBits - ExtractOffsetInBits;
5675 int64_t BitExtractOffset =
5676 std::min<int64_t>(0, OffestFromNewAllocaInBits);
5681 OffestFromNewAllocaInBits =
5682 std::max(int64_t(0), OffestFromNewAllocaInBits);
5688 DIExpression *NewExpr = DIExpression::get(AI.getContext(), PostOffsetOps);
5689 if (OffestFromNewAllocaInBits > 0) {
5690 int64_t OffsetInBytes = (OffestFromNewAllocaInBits + 7) / 8;
5696 auto RemoveOne = [DbgVariable](
auto *OldDII) {
5697 auto SameVariableFragment = [](
const auto *
LHS,
const auto *
RHS) {
5698 return LHS->getVariable() ==
RHS->getVariable() &&
5699 LHS->getDebugLoc()->getInlinedAt() ==
5700 RHS->getDebugLoc()->getInlinedAt();
5702 if (SameVariableFragment(OldDII, DbgVariable))
5703 OldDII->eraseFromParent();
5708 NewDbgFragment, BitExtractOffset);
5722void SROA::clobberUse(Use &U) {
5732 DeadInsts.push_back(OldI);
5754bool SROA::propagateStoredValuesToLoads(AllocaInst &AI, AllocaSlices &AS) {
5759 LLVM_DEBUG(
dbgs() <<
"Attempting to propagate values on " << AI <<
"\n");
5760 bool AllSameAndValid =
true;
5761 Type *PartitionType =
nullptr;
5763 uint64_t BeginOffset = 0;
5764 uint64_t EndOffset = 0;
5766 auto Flush = [&]() {
5767 if (AllSameAndValid && !Insts.
empty()) {
5768 LLVM_DEBUG(
dbgs() <<
"Propagate values on slice [" << BeginOffset <<
", "
5769 << EndOffset <<
")\n");
5771 SSAUpdater
SSA(&NewPHIs);
5773 BasicLoadAndStorePromoter Promoter(Insts,
SSA, PartitionType);
5774 Promoter.run(Insts);
5776 AllSameAndValid =
true;
5777 PartitionType =
nullptr;
5781 for (Slice &S : AS) {
5785 dbgs() <<
"Ignoring slice: ";
5786 AS.print(
dbgs(), &S);
5790 if (S.beginOffset() >= EndOffset) {
5792 BeginOffset = S.beginOffset();
5793 EndOffset = S.endOffset();
5794 }
else if (S.beginOffset() != BeginOffset || S.endOffset() != EndOffset) {
5795 if (AllSameAndValid) {
5797 dbgs() <<
"Slice does not match range [" << BeginOffset <<
", "
5798 << EndOffset <<
")";
5799 AS.print(
dbgs(), &S);
5801 AllSameAndValid =
false;
5803 EndOffset = std::max(EndOffset, S.endOffset());
5810 if (!LI->
isSimple() || (PartitionType && UserTy != PartitionType))
5811 AllSameAndValid =
false;
5812 PartitionType = UserTy;
5815 Type *UserTy =
SI->getValueOperand()->getType();
5816 if (!
SI->isSimple() || (PartitionType && UserTy != PartitionType))
5817 AllSameAndValid =
false;
5818 PartitionType = UserTy;
5821 AllSameAndValid =
false;
5834std::pair<
bool ,
bool >
5835SROA::runOnAlloca(AllocaInst &AI) {
5837 bool CFGChanged =
false;
5840 ++NumAllocasAnalyzed;
5843 if (AI.use_empty()) {
5844 AI.eraseFromParent();
5848 const DataLayout &
DL = AI.getDataLayout();
5851 auto *AT = AI.getAllocatedType();
5852 TypeSize
Size =
DL.getTypeAllocSize(AT);
5853 if (AI.isArrayAllocation() || !AT->isSized() ||
Size.isScalable() ||
5854 Size.getFixedValue() == 0)
5859 IRBuilderTy IRB(&AI);
5860 AggLoadStoreRewriter AggRewriter(
DL, IRB);
5861 Changed |= AggRewriter.rewrite(AI);
5864 AllocaSlices AS(
DL, AI);
5869 if (AS.isEscapedReadOnly()) {
5870 Changed |= propagateStoredValuesToLoads(AI, AS);
5875 for (Instruction *DeadUser : AS.getDeadUsers()) {
5877 for (Use &DeadOp : DeadUser->operands())
5884 DeadInsts.push_back(DeadUser);
5887 for (Use *DeadOp : AS.getDeadOperands()) {
5888 clobberUse(*DeadOp);
5893 if (AS.begin() == AS.end())
5896 Changed |= splitAlloca(AI, AS);
5899 while (!SpeculatablePHIs.empty())
5903 auto RemainingSelectsToRewrite = SelectsToRewrite.takeVector();
5904 while (!RemainingSelectsToRewrite.empty()) {
5905 const auto [
K,
V] = RemainingSelectsToRewrite.pop_back_val();
5922bool SROA::deleteDeadInstructions(
5923 SmallPtrSetImpl<AllocaInst *> &DeletedAllocas) {
5925 while (!DeadInsts.empty()) {
5935 DeletedAllocas.
insert(AI);
5937 OldDII->eraseFromParent();
5943 for (Use &Operand :
I->operands())
5948 DeadInsts.push_back(U);
5952 I->eraseFromParent();
5962bool SROA::promoteAllocas() {
5963 if (PromotableAllocas.empty())
5970 NumPromoted += PromotableAllocas.size();
5971 PromoteMemToReg(PromotableAllocas.getArrayRef(), DTU->getDomTree(), AC);
5974 PromotableAllocas.clear();
5978std::pair<
bool ,
bool > SROA::runSROA(Function &
F) {
5981 const DataLayout &
DL =
F.getDataLayout();
5986 if (
DL.getTypeAllocSize(AI->getAllocatedType()).isScalable() &&
5988 PromotableAllocas.insert(AI);
5990 Worklist.insert(AI);
5995 bool CFGChanged =
false;
5998 SmallPtrSet<AllocaInst *, 4> DeletedAllocas;
6001 while (!Worklist.empty()) {
6002 auto [IterationChanged, IterationCFGChanged] =
6003 runOnAlloca(*Worklist.pop_back_val());
6005 CFGChanged |= IterationCFGChanged;
6007 Changed |= deleteDeadInstructions(DeletedAllocas);
6011 if (!DeletedAllocas.
empty()) {
6012 Worklist.set_subtract(DeletedAllocas);
6013 PostPromotionWorklist.set_subtract(DeletedAllocas);
6014 PromotableAllocas.set_subtract(DeletedAllocas);
6015 DeletedAllocas.
clear();
6021 Worklist = PostPromotionWorklist;
6022 PostPromotionWorklist.clear();
6023 }
while (!Worklist.empty());
6025 assert((!CFGChanged ||
Changed) &&
"Can not only modify the CFG.");
6027 "Should not have modified the CFG when told to preserve it.");
6030 for (
auto &BB :
F) {
6043 SROA(&
F.getContext(), &DTU, &AC, PreserveCFG).runSROA(
F);
6056 OS, MapClassName2PassName);
6078 if (skipFunction(
F))
6081 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
6083 getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
F);
6090 void getAnalysisUsage(AnalysisUsage &AU)
const override {
6097 StringRef getPassName()
const override {
return "SROA"; }
6102char SROALegacyPass::ID = 0;
6110 "Scalar Replacement Of Aggregates",
false,
false)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
#define LLVM_ATTRIBUTE_UNUSED
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
static bool runOnFunction(Function &F, bool PostInlining)
This is the interface for a simple mod/ref and alias analysis over globals.
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
This defines the Use class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
This file implements a map that provides insertion order iteration.
uint64_t IntrinsicInst * II
PassBuilder PB(Machine, PassOpts->PTO, std::nullopt, &PIC)
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the PointerIntPair class.
This file provides a collection of visitors which walk the (instruction) uses of a pointer.
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static unsigned getNumElements(Type *Ty)
bool isDead(const MachineInstr &MI, const MachineRegisterInfo &MRI)
void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)
static void migrateDebugInfo(AllocaInst *OldAlloca, bool IsSplit, uint64_t OldAllocaOffsetInBits, uint64_t SliceSizeInBits, Instruction *OldInst, Instruction *Inst, Value *Dest, Value *Value, const DataLayout &DL)
Find linked dbg.assign and generate a new one with the correct FragmentInfo.
static VectorType * isVectorPromotionViable(Partition &P, const DataLayout &DL, unsigned VScale)
Test whether the given alloca partitioning and range of slices can be promoted to a vector.
static Align getAdjustedAlignment(Instruction *I, uint64_t Offset)
Compute the adjusted alignment for a load or store from an offset.
static bool checkVectorTypeForPromotion(Partition &P, VectorType *VTy, const DataLayout &DL, unsigned VScale)
Test whether a vector type is viable for promotion.
static cl::opt< bool > SROASkipMem2Reg("sroa-skip-mem2reg", cl::init(false), cl::Hidden)
Disable running mem2reg during SROA in order to test or debug SROA.
static VectorType * checkVectorTypesForPromotion(Partition &P, const DataLayout &DL, SmallVectorImpl< VectorType * > &CandidateTys, bool HaveCommonEltTy, Type *CommonEltTy, bool HaveVecPtrTy, bool HaveCommonVecPtrTy, VectorType *CommonVecPtrTy, unsigned VScale)
Test whether any vector type in CandidateTys is viable for promotion.
static std::pair< Type *, IntegerType * > findCommonType(AllocaSlices::const_iterator B, AllocaSlices::const_iterator E, uint64_t EndOffset)
Walk the range of a partitioning looking for a common type to cover this sequence of slices.
static Type * stripAggregateTypeWrapping(const DataLayout &DL, Type *Ty)
Strip aggregate type wrapping.
static FragCalcResult calculateFragment(DILocalVariable *Variable, uint64_t NewStorageSliceOffsetInBits, uint64_t NewStorageSliceSizeInBits, std::optional< DIExpression::FragmentInfo > StorageFragment, std::optional< DIExpression::FragmentInfo > CurrentFragment, DIExpression::FragmentInfo &Target)
static DIExpression * createOrReplaceFragment(const DIExpression *Expr, DIExpression::FragmentInfo Frag, int64_t BitExtractOffset)
Create or replace an existing fragment in a DIExpression with Frag.
static Value * insertInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *Old, Value *V, uint64_t Offset, const Twine &Name)
static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S, VectorType *Ty, uint64_t ElementSize, const DataLayout &DL, unsigned VScale)
Test whether the given slice use can be promoted to a vector.
static Value * getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr, APInt Offset, Type *PointerTy, const Twine &NamePrefix)
Compute an adjusted pointer from Ptr by Offset bytes where the resulting pointer has PointerTy.
static bool isIntegerWideningViableForSlice(const Slice &S, uint64_t AllocBeginOffset, Type *AllocaTy, const DataLayout &DL, bool &WholeAllocaOp)
Test whether a slice of an alloca is valid for integer widening.
static Value * extractVector(IRBuilderTy &IRB, Value *V, unsigned BeginIndex, unsigned EndIndex, const Twine &Name)
static Value * foldPHINodeOrSelectInst(Instruction &I)
A helper that folds a PHI node or a select.
static bool rewriteSelectInstMemOps(SelectInst &SI, const RewriteableMemOps &Ops, IRBuilderTy &IRB, DomTreeUpdater *DTU)
static void rewriteMemOpOfSelect(SelectInst &SI, T &I, SelectHandSpeculativity Spec, DomTreeUpdater &DTU)
static Value * foldSelectInst(SelectInst &SI)
bool isKillAddress(const DbgVariableRecord *DVR)
static Value * insertVector(IRBuilderTy &IRB, Value *Old, Value *V, unsigned BeginIndex, const Twine &Name)
static bool isIntegerWideningViable(Partition &P, Type *AllocaTy, const DataLayout &DL)
Test whether the given alloca partition's integer operations can be widened to promotable ones.
static void speculatePHINodeLoads(IRBuilderTy &IRB, PHINode &PN)
static VectorType * createAndCheckVectorTypesForPromotion(SetVector< Type * > &OtherTys, ArrayRef< VectorType * > CandidateTysCopy, function_ref< void(Type *)> CheckCandidateType, Partition &P, const DataLayout &DL, SmallVectorImpl< VectorType * > &CandidateTys, bool &HaveCommonEltTy, Type *&CommonEltTy, bool &HaveVecPtrTy, bool &HaveCommonVecPtrTy, VectorType *&CommonVecPtrTy, unsigned VScale)
static DebugVariable getAggregateVariable(DbgVariableRecord *DVR)
static bool isSafePHIToSpeculate(PHINode &PN)
PHI instructions that use an alloca and are subsequently loaded can be rewritten to load both input p...
static Value * extractInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *V, IntegerType *Ty, uint64_t Offset, const Twine &Name)
static void insertNewDbgInst(DIBuilder &DIB, DbgVariableRecord *Orig, AllocaInst *NewAddr, DIExpression *NewAddrExpr, Instruction *BeforeInst, std::optional< DIExpression::FragmentInfo > NewFragment, int64_t BitExtractAdjustment)
Insert a new DbgRecord.
static void speculateSelectInstLoads(SelectInst &SI, LoadInst &LI, IRBuilderTy &IRB)
static Value * mergeTwoVectors(Value *V0, Value *V1, const DataLayout &DL, Type *NewAIEltTy, IRBuilder<> &Builder)
This function takes two vector values and combines them into a single vector by concatenating their e...
const DIExpression * getAddressExpression(const DbgVariableRecord *DVR)
static Type * getTypePartition(const DataLayout &DL, Type *Ty, uint64_t Offset, uint64_t Size)
Try to find a partition of the aggregate type passed in for a given offset and size.
static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy, unsigned VScale=0)
Test whether we can convert a value from the old to the new type.
static SelectHandSpeculativity isSafeLoadOfSelectToSpeculate(LoadInst &LI, SelectInst &SI, bool PreserveCFG)
static Value * convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V, Type *NewTy)
Generic routine to convert an SSA value to a value of a different type.
This file provides the interface for LLVM's Scalar Replacement of Aggregates pass.
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static SymbolRef::Type getType(const Symbol *Sym)
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
Virtual Register Rewriter
Builder for the alloca slices.
SliceBuilder(const DataLayout &DL, AllocaInst &AI, AllocaSlices &AS)
An iterator over partitions of the alloca's slices.
bool operator==(const partition_iterator &RHS) const
friend class AllocaSlices
partition_iterator & operator++()
Class for arbitrary precision integers.
an instruction to allocate memory on the stack
LLVM_ABI bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
PointerType * getType() const
Overload to return most specific pointer type.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
const Function * getParent() const
Return the enclosing method, or null if none.
InstListType::iterator iterator
Instruction iterators...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Represents analyses that only rely on functions' control flow.
LLVM_ABI CaptureInfo getCaptureInfo(unsigned OpNo) const
Return which pointer components this operand may capture.
bool onlyReadsMemory(unsigned OpNo) const
bool isDataOperand(const Use *U) const
This is the shared class of boolean and integer constants.
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static DIAssignID * getDistinct(LLVMContext &Context)
LLVM_ABI DbgInstPtr insertDbgAssign(Instruction *LinkedInstr, Value *Val, DILocalVariable *SrcVar, DIExpression *ValExpr, Value *Addr, DIExpression *AddrExpr, const DILocation *DL)
Insert a new llvm.dbg.assign intrinsic call.
iterator_range< expr_op_iterator > expr_ops() const
DbgVariableFragmentInfo FragmentInfo
LLVM_ABI bool startsWithDeref() const
Return whether the first element a DW_OP_deref.
static LLVM_ABI bool calculateFragmentIntersect(const DataLayout &DL, const Value *SliceStart, uint64_t SliceOffsetInBits, uint64_t SliceSizeInBits, const Value *DbgPtr, int64_t DbgPtrOffsetInBits, int64_t DbgExtractOffsetInBits, DIExpression::FragmentInfo VarFrag, std::optional< DIExpression::FragmentInfo > &Result, int64_t &OffsetFromLocationInBits)
Computes a fragment, bit-extract operation if needed, and new constant offset to describe a part of a...
static LLVM_ABI std::optional< DIExpression * > createFragmentExpression(const DIExpression *Expr, unsigned OffsetInBits, unsigned SizeInBits)
Create a DIExpression to describe one part of an aggregate variable that is fragmented across multipl...
static LLVM_ABI DIExpression * prepend(const DIExpression *Expr, uint8_t Flags, int64_t Offset=0)
Prepend DIExpr with a deref and offset operation and optionally turn it into a stack value or/and an ...
A parsed version of the target data layout string in and methods for querying it.
DebugLoc getDebugLoc() const
LLVM_ABI void moveBefore(DbgRecord *MoveBefore)
void setDebugLoc(DebugLoc Loc)
Record of a variable value-assignment, aka a non instruction representation of the dbg....
LocationType getType() const
LLVM_ABI bool isKillAddress() const
Check whether this kills the address component.
LLVM_ABI bool isKillLocation() const
Value * getValue(unsigned OpIdx=0) const
LLVM_ABI void setKillLocation()
static LLVM_ABI DbgVariableRecord * createDbgVariableRecord(Value *Location, DILocalVariable *DV, DIExpression *Expr, const DILocation *DI)
static LLVM_ABI DbgVariableRecord * createDVRDeclare(Value *Address, DILocalVariable *DV, DIExpression *Expr, const DILocation *DI)
DIExpression * getExpression() const
LLVM_ABI void setKillAddress()
Kill the address component.
DILocalVariable * getVariable() const
bool isDbgDeclare() const
static LLVM_ABI DbgVariableRecord * createLinkedDVRAssign(Instruction *LinkedInstr, Value *Val, DILocalVariable *Variable, DIExpression *Expression, Value *Address, DIExpression *AddressExpression, const DILocation *DI)
DIExpression * getAddressExpression() const
LLVM_ABI DILocation * getInlinedAt() const
Identifies a unique instance of a variable.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Analysis pass which computes a DominatorTree.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Class to represent fixed width SIMD vectors.
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
FunctionPass class - This class is used to implement most global optimizations.
unsigned getVScaleValue() const
Return the value for vscale based on the vscale_range attribute or 0 when unknown.
const BasicBlock & getEntryBlock() const
LLVM_ABI bool accumulateConstantOffset(const DataLayout &DL, APInt &Offset, function_ref< bool(Value &, APInt &)> ExternalAnalysis=nullptr) const
Accumulate the constant address offset of this GEP if possible.
Value * getPointerOperand()
iterator_range< op_iterator > indices()
Type * getSourceElementType() const
LLVM_ABI GEPNoWrapFlags getNoWrapFlags() const
Get the nowrap flags for the GEP instruction.
This provides the default implementation of the IRBuilder 'InsertHelper' method that is called whenev...
virtual void InsertHelper(Instruction *I, const Twine &Name, BasicBlock::iterator InsertPt) const
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Base class for instruction visitors.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
bool hasMetadata() const
Return true if this instruction has any metadata attached to it.
LLVM_ABI bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Class to represent integer types.
@ MAX_INT_BITS
Maximum number of bits that can be specified.
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
An instruction for reading from memory.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
void setAlignment(Align Align)
Value * getPointerOperand()
bool isVolatile() const
Return true if this is a load from a volatile memory location.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Type * getPointerOperandType() const
static unsigned getPointerOperandIndex()
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Align getAlign() const
Return the alignment of the access that is being performed.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
LLVMContext & getContext() const
LLVM_ABI StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
This is the common base class for memset/memcpy/memmove.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
PointerIntPair - This class implements a pair of a pointer and small integer.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
PreservedAnalyses & preserve()
Mark an analysis as preserved.
PtrUseVisitor(const DataLayout &DL)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Run the pass over the function.
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
SROAPass(SROAOptions PreserveCFG)
If PreserveCFG is set, then the pass is not allowed to modify CFG in any way, even if it would update...
Helper class for SSA formation on a set of values defined in multiple blocks.
This class represents the LLVM 'select' instruction.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
void clear()
Completely clear the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
typename SuperClass::const_iterator const_iterator
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAlignment(Align Align)
Value * getValueOperand()
static unsigned getPointerOperandIndex()
Value * getPointerOperand()
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
StringRef - Represent a constant reference to a string, i.e.
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
size_t rfind(char C, size_t From=npos) const
Search for the last character C in the string.
size_t find(char C, size_t From=0) const
Search for the first character C in the string.
static constexpr size_t npos
LLVM_ABI size_t find_first_not_of(char C, size_t From=0) const
Find the first character in the string that is not C or npos if not found.
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
TypeSize getSizeInBytes() const
LLVM_ABI unsigned getElementContainingOffset(uint64_t FixedOffset) const
Given a valid byte offset into the structure, returns the structure index that contains it.
TypeSize getElementOffset(unsigned Idx) const
TypeSize getSizeInBits() const
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
element_iterator element_end() const
element_iterator element_begin() const
Type * getElementType(unsigned N) const
Type::subtype_iterator element_iterator
Target - Wrapper for Target specific information.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM_ABI unsigned getIntegerBitWidth() const
bool isArrayTy() const
True if this is an instance of ArrayType.
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
bool isPointerTy() const
True if this is an instance of PointerType.
Type * getArrayElementType() const
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
bool isStructTy() const
True if this is an instance of StructType.
bool isTargetExtTy() const
Return true if this is a target extension type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
const Use & getOperandUse(unsigned i) const
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI const Value * stripInBoundsOffsets(function_ref< void(const Value *)> Func=[](const Value *) {}) const
Strip off pointer casts and inbounds GEPs.
iterator_range< user_iterator > users()
LLVM_ABI void dropDroppableUsesIn(User &Usr)
Remove every use of this value in User that can safely be removed.
LLVM_ABI const Value * stripAndAccumulateConstantOffsets(const DataLayout &DL, APInt &Offset, bool AllowNonInbounds, bool AllowInvariantGroup=false, function_ref< bool(Value &Value, APInt &Offset)> ExternalAnalysis=nullptr, bool LookThroughIntToPtr=false) const
Accumulate the constant offset this value has compared to a base pointer.
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
iterator_range< use_iterator > uses()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
static VectorType * getWithSizeAndScalar(VectorType *SizeTy, Type *EltTy)
This static method attempts to construct a VectorType with the same size-in-bits as SizeTy but with a...
static LLVM_ABI bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
CRTP base class which implements the entire standard iterator facade in terms of a minimal subset of ...
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Return a range of dbg_assign records for which Inst performs the assignment they encode.
LLVM_ABI void deleteAssignmentMarkers(const Instruction *Inst)
Delete the llvm.dbg.assign intrinsics linked to Inst.
initializer< Ty > init(const Ty &Val)
@ DW_OP_LLVM_extract_bits_zext
Only used in LLVM metadata.
@ DW_OP_LLVM_fragment
Only used in LLVM metadata.
@ DW_OP_LLVM_extract_bits_sext
Only used in LLVM metadata.
@ User
could "use" a pointer
NodeAddr< PhiNode * > Phi
NodeAddr< UseNode * > Use
Context & getContext() const
friend class Instruction
Iterator for Instructions in a `BasicBlock.
LLVM_ABI iterator begin() const
This is an optimization pass for GlobalISel generic memory operations.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
bool operator<(int64_t V1, const APSInt &V2)
FunctionAddr VTableAddr Value
void stable_sort(R &&Range)
LLVM_ABI bool RemoveRedundantDbgInstrs(BasicBlock *BB)
Try to remove redundant dbg.value instructions from given basic block.
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI void PromoteMemToReg(ArrayRef< AllocaInst * > Allocas, DominatorTree &DT, AssumptionCache *AC=nullptr)
Promote the specified list of alloca instructions into scalar registers, inserting PHI nodes as appro...
LLVM_ABI bool isAssumeLikeIntrinsic(const Instruction *I)
Return true if it is an intrinsic that cannot be speculated but also cannot trap.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
auto successors(const MachineBasicBlock *BB)
bool operator!=(uint64_t V1, const APInt &V2)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI std::optional< RegOrConstant > getVectorSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI)
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
auto unique(Range &&R, Predicate P)
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
LLVM_ABI bool isAllocaPromotable(const AllocaInst *AI)
Return true if this alloca is legal for promotion.
auto dyn_cast_or_null(const Y &Val)
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
bool capturesFullProvenance(CaptureComponents CC)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
LLVM_ABI bool isSafeToLoadUnconditionally(Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, Instruction *ScanFrom, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI void initializeSROALegacyPassPass(PassRegistry &)
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
LLVM_ABI TinyPtrVector< DbgVariableRecord * > findDVRValues(Value *V)
As above, for DVRValues.
LLVM_ABI void llvm_unreachable_internal(const char *msg=nullptr, const char *file=nullptr, unsigned line=0)
This function calls abort(), and prints the optional message to stderr.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
constexpr int PoisonMaskElem
iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI bool isAssignmentTrackingEnabled(const Module &M)
Return true if assignment tracking is enabled for module M.
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
LLVM_ABI TinyPtrVector< DbgVariableRecord * > findDVRDeclares(Value *V)
Finds dbg.declare records declaring local variables as living in the memory that 'V' points to.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI FunctionPass * createSROAPass(bool PreserveCFG=true)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
AAMDNodes shift(size_t Offset) const
Create a new AAMDNode that describes this AAMDNode after applying a constant offset to the start of t...
LLVM_ABI AAMDNodes adjustForAccess(unsigned AccessSize)
Create a new AAMDNode for accessing AccessSize bytes of this AAMDNode.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Describes an element of a Bitfield.
static Bitfield::Type get(StorageType Packed)
Unpacks the field from the Packed value.
static void set(StorageType &Packed, typename Bitfield::Type Value)
Sets the typed value in the provided Packed value.
A CRTP mix-in to automatically provide informational APIs needed for passes.