47#include "llvm/Config/llvm-config.h"
104#define DEBUG_TYPE "sroa"
106STATISTIC(NumAllocasAnalyzed,
"Number of allocas analyzed for replacement");
107STATISTIC(NumAllocaPartitions,
"Number of alloca partitions formed");
108STATISTIC(MaxPartitionsPerAlloca,
"Maximum number of partitions per alloca");
109STATISTIC(NumAllocaPartitionUses,
"Number of alloca partition uses rewritten");
110STATISTIC(MaxUsesPerAllocaPartition,
"Maximum number of uses of a partition");
111STATISTIC(NumNewAllocas,
"Number of new, smaller allocas introduced");
112STATISTIC(NumPromoted,
"Number of allocas promoted to SSA values");
113STATISTIC(NumLoadsSpeculated,
"Number of loads speculated to allow promotion");
115 "Number of loads rewritten into predicated loads to allow promotion");
118 "Number of stores rewritten into predicated loads to allow promotion");
120STATISTIC(NumVectorized,
"Number of vectorized aggregates");
131class AllocaSliceRewriter;
135class SelectHandSpeculativity {
136 unsigned char Storage = 0;
140 SelectHandSpeculativity() =
default;
141 SelectHandSpeculativity &setAsSpeculatable(
bool isTrueVal);
142 bool isSpeculatable(
bool isTrueVal)
const;
143 bool areAllSpeculatable()
const;
144 bool areAnySpeculatable()
const;
145 bool areNoneSpeculatable()
const;
147 explicit operator intptr_t()
const {
return static_cast<intptr_t
>(Storage); }
148 explicit SelectHandSpeculativity(intptr_t Storage_) : Storage(Storage_) {}
150static_assert(
sizeof(SelectHandSpeculativity) ==
sizeof(
unsigned char));
152using PossiblySpeculatableLoad =
155using RewriteableMemOp =
156 std::variant<PossiblySpeculatableLoad, UnspeculatableStore>;
178 LLVMContext *
const C;
179 DomTreeUpdater *
const DTU;
180 AssumptionCache *
const AC;
181 const bool PreserveCFG;
190 SmallSetVector<AllocaInst *, 16> Worklist;
205 SmallSetVector<AllocaInst *, 16> PostPromotionWorklist;
208 SetVector<AllocaInst *, SmallVector<AllocaInst *>,
209 SmallPtrSet<AllocaInst *, 16>, 16>
217 SmallSetVector<PHINode *, 8> SpeculatablePHIs;
221 SmallMapVector<SelectInst *, RewriteableMemOps, 8> SelectsToRewrite;
239 static std::optional<RewriteableMemOps>
240 isSafeSelectToSpeculate(SelectInst &SI,
bool PreserveCFG);
243 SROA(LLVMContext *C, DomTreeUpdater *DTU, AssumptionCache *AC,
245 : C(C), DTU(DTU), AC(AC),
246 PreserveCFG(PreserveCFG_ ==
SROAOptions::PreserveCFG) {}
249 std::pair<
bool ,
bool > runSROA(Function &
F);
252 friend class AllocaSliceRewriter;
254 bool presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS);
255 AllocaInst *rewritePartition(AllocaInst &AI, AllocaSlices &AS, Partition &
P);
256 bool splitAlloca(AllocaInst &AI, AllocaSlices &AS);
257 bool propagateStoredValuesToLoads(AllocaInst &AI, AllocaSlices &AS);
258 std::pair<
bool ,
bool > runOnAlloca(AllocaInst &AI);
259 void clobberUse(Use &U);
260 bool deleteDeadInstructions(SmallPtrSetImpl<AllocaInst *> &DeletedAllocas);
261 bool promoteAllocas();
275enum FragCalcResult { UseFrag, UseNoFrag,
Skip };
279 uint64_t NewStorageSliceOffsetInBits,
281 std::optional<DIExpression::FragmentInfo> StorageFragment,
282 std::optional<DIExpression::FragmentInfo> CurrentFragment,
286 if (StorageFragment) {
288 std::min(NewStorageSliceSizeInBits, StorageFragment->SizeInBits);
290 NewStorageSliceOffsetInBits + StorageFragment->OffsetInBits;
292 Target.SizeInBits = NewStorageSliceSizeInBits;
293 Target.OffsetInBits = NewStorageSliceOffsetInBits;
299 if (!CurrentFragment) {
300 if (
auto Size = Variable->getSizeInBits()) {
303 if (
Target == CurrentFragment)
310 if (!CurrentFragment || *CurrentFragment ==
Target)
316 if (
Target.startInBits() < CurrentFragment->startInBits() ||
317 Target.endInBits() > CurrentFragment->endInBits())
356 if (DVRAssignMarkerRange.empty())
362 LLVM_DEBUG(
dbgs() <<
" OldAllocaOffsetInBits: " << OldAllocaOffsetInBits
364 LLVM_DEBUG(
dbgs() <<
" SliceSizeInBits: " << SliceSizeInBits <<
"\n");
376 DVR->getExpression()->getFragmentInfo();
389 auto *Expr = DbgAssign->getExpression();
390 bool SetKillLocation =
false;
393 std::optional<DIExpression::FragmentInfo> BaseFragment;
396 if (R == BaseFragments.
end())
398 BaseFragment = R->second;
400 std::optional<DIExpression::FragmentInfo> CurrentFragment =
401 Expr->getFragmentInfo();
404 DbgAssign->getVariable(), OldAllocaOffsetInBits, SliceSizeInBits,
405 BaseFragment, CurrentFragment, NewFragment);
409 if (Result == UseFrag && !(NewFragment == CurrentFragment)) {
410 if (CurrentFragment) {
415 NewFragment.
OffsetInBits -= CurrentFragment->OffsetInBits;
428 SetKillLocation =
true;
436 Inst->
setMetadata(LLVMContext::MD_DIAssignID, NewID);
445 DbgAssign->getDebugLoc())));
448 NewAssign = DbgAssign;
467 Value && (DbgAssign->hasArgList() ||
468 !DbgAssign->getExpression()->isSingleLocationExpression());
485 if (NewAssign != DbgAssign) {
486 NewAssign->
moveBefore(DbgAssign->getIterator());
489 LLVM_DEBUG(
dbgs() <<
"Created new assign: " << *NewAssign <<
"\n");
492 for_each(DVRAssignMarkerRange, MigrateDbgAssign);
502 Twine getNameWithPrefix(
const Twine &Name)
const {
507 void SetNamePrefix(
const Twine &
P) { Prefix =
P.str(); }
509 void InsertHelper(Instruction *
I,
const Twine &Name,
527 uint64_t BeginOffset = 0;
530 uint64_t EndOffset = 0;
534 PointerIntPair<Use *, 1, bool> UseAndIsSplittable;
539 Slice(uint64_t BeginOffset, uint64_t EndOffset, Use *U,
bool IsSplittable,
540 Value *ProtectedFieldDisc)
541 : BeginOffset(BeginOffset), EndOffset(EndOffset),
542 UseAndIsSplittable(
U, IsSplittable),
543 ProtectedFieldDisc(ProtectedFieldDisc) {}
545 uint64_t beginOffset()
const {
return BeginOffset; }
546 uint64_t endOffset()
const {
return EndOffset; }
548 bool isSplittable()
const {
return UseAndIsSplittable.getInt(); }
549 void makeUnsplittable() { UseAndIsSplittable.setInt(
false); }
551 Use *getUse()
const {
return UseAndIsSplittable.getPointer(); }
553 bool isDead()
const {
return getUse() ==
nullptr; }
554 void kill() { UseAndIsSplittable.setPointer(
nullptr); }
558 Value *ProtectedFieldDisc;
567 if (beginOffset() <
RHS.beginOffset())
569 if (beginOffset() >
RHS.beginOffset())
571 if (isSplittable() !=
RHS.isSplittable())
572 return !isSplittable();
573 if (endOffset() >
RHS.endOffset())
579 [[maybe_unused]]
friend bool operator<(
const Slice &
LHS, uint64_t RHSOffset) {
580 return LHS.beginOffset() < RHSOffset;
582 [[maybe_unused]]
friend bool operator<(uint64_t LHSOffset,
const Slice &
RHS) {
583 return LHSOffset <
RHS.beginOffset();
587 return isSplittable() ==
RHS.isSplittable() &&
588 beginOffset() ==
RHS.beginOffset() && endOffset() ==
RHS.endOffset();
603 AllocaSlices(
const DataLayout &
DL, AllocaInst &AI);
609 bool isEscaped()
const {
return PointerEscapingInstr; }
610 bool isEscapedReadOnly()
const {
return PointerEscapingInstrReadOnly; }
615 using range = iterator_range<iterator>;
617 iterator
begin() {
return Slices.begin(); }
618 iterator
end() {
return Slices.end(); }
621 using const_range = iterator_range<const_iterator>;
623 const_iterator
begin()
const {
return Slices.begin(); }
624 const_iterator
end()
const {
return Slices.end(); }
628 void erase(iterator Start, iterator Stop) { Slices.erase(Start, Stop); }
636 int OldSize = Slices.size();
637 Slices.append(NewSlices.
begin(), NewSlices.
end());
638 auto SliceI = Slices.begin() + OldSize;
639 std::stable_sort(SliceI, Slices.end());
640 std::inplace_merge(Slices.begin(), SliceI, Slices.end());
657 return DeadUseIfPromotable;
668#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
669 void print(raw_ostream &OS, const_iterator
I, StringRef Indent =
" ")
const;
670 void printSlice(raw_ostream &OS, const_iterator
I,
671 StringRef Indent =
" ")
const;
672 void printUse(raw_ostream &OS, const_iterator
I,
673 StringRef Indent =
" ")
const;
674 void print(raw_ostream &OS)
const;
675 void dump(const_iterator
I)
const;
680 template <
typename DerivedT,
typename RetT =
void>
class BuilderBase;
683 friend class AllocaSlices::SliceBuilder;
685#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
713 SmallVector<Instruction *, 8> DeadUsers;
744 friend class AllocaSlices;
745 friend class AllocaSlices::partition_iterator;
747 using iterator = AllocaSlices::iterator;
751 uint64_t BeginOffset = 0, EndOffset = 0;
761 Partition(iterator SI) : SI(SI), SJ(SI) {}
767 uint64_t beginOffset()
const {
return BeginOffset; }
772 uint64_t endOffset()
const {
return EndOffset; }
777 uint64_t
size()
const {
778 assert(BeginOffset < EndOffset &&
"Partitions must span some bytes!");
779 return EndOffset - BeginOffset;
784 bool empty()
const {
return SI == SJ; }
795 iterator
begin()
const {
return SI; }
796 iterator
end()
const {
return SJ; }
828 AllocaSlices::iterator SE;
832 uint64_t MaxSplitSliceEndOffset = 0;
836 partition_iterator(AllocaSlices::iterator
SI, AllocaSlices::iterator SE)
848 assert((
P.SI != SE || !
P.SplitTails.empty()) &&
849 "Cannot advance past the end of the slices!");
852 if (!
P.SplitTails.empty()) {
853 if (
P.EndOffset >= MaxSplitSliceEndOffset) {
855 P.SplitTails.clear();
856 MaxSplitSliceEndOffset = 0;
862 [&](Slice *S) { return S->endOffset() <= P.EndOffset; });
865 return S->endOffset() == MaxSplitSliceEndOffset;
867 "Could not find the current max split slice offset!");
870 return S->endOffset() <= MaxSplitSliceEndOffset;
872 "Max split slice end offset is not actually the max!");
879 assert(P.SplitTails.empty() &&
"Failed to clear the split slices!");
889 if (S.isSplittable() && S.endOffset() > P.EndOffset) {
890 P.SplitTails.push_back(&S);
891 MaxSplitSliceEndOffset =
892 std::max(S.endOffset(), MaxSplitSliceEndOffset);
900 P.BeginOffset = P.EndOffset;
901 P.EndOffset = MaxSplitSliceEndOffset;
908 if (!P.SplitTails.empty() && P.SI->beginOffset() != P.EndOffset &&
909 !P.SI->isSplittable()) {
910 P.BeginOffset = P.EndOffset;
911 P.EndOffset = P.SI->beginOffset();
921 P.BeginOffset = P.SplitTails.empty() ? P.SI->beginOffset() : P.EndOffset;
922 P.EndOffset = P.SI->endOffset();
927 if (!P.SI->isSplittable()) {
930 assert(P.BeginOffset == P.SI->beginOffset());
934 while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
935 if (!P.SJ->isSplittable())
936 P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
948 assert(P.SI->isSplittable() &&
"Forming a splittable partition!");
951 while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset &&
952 P.SJ->isSplittable()) {
953 P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
960 if (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
961 assert(!P.SJ->isSplittable());
962 P.EndOffset = P.SJ->beginOffset();
969 "End iterators don't match between compared partition iterators!");
976 if (P.SI == RHS.P.SI && P.SplitTails.empty() == RHS.P.SplitTails.empty()) {
977 assert(P.SJ == RHS.P.SJ &&
978 "Same set of slices formed two different sized partitions!");
979 assert(P.SplitTails.size() == RHS.P.SplitTails.size() &&
980 "Same slice position with differently sized non-empty split "
1003 return make_range(partition_iterator(begin(), end()),
1004 partition_iterator(end(), end()));
1012 return SI.getOperand(1 + CI->isZero());
1013 if (
SI.getOperand(1) ==
SI.getOperand(2))
1014 return SI.getOperand(1);
1023 return PN->hasConstantValue();
1049 Value *ProtectedFieldDisc =
nullptr;
1054 AllocSize(
DL.getTypeAllocSize(AI.getAllocatedType()).getFixedValue()),
1059 if (VisitedDeadInsts.
insert(&
I).second)
1064 bool IsSplittable =
false) {
1070 <<
" which has zero size or starts outside of the "
1071 << AllocSize <<
" byte alloca:\n"
1072 <<
" alloca: " << AS.AI <<
"\n"
1073 <<
" use: " <<
I <<
"\n");
1074 return markAsDead(
I);
1077 uint64_t BeginOffset =
Offset.getZExtValue();
1078 uint64_t EndOffset = BeginOffset +
Size;
1086 assert(AllocSize >= BeginOffset);
1087 if (
Size > AllocSize - BeginOffset) {
1089 <<
Offset <<
" to remain within the " << AllocSize
1090 <<
" byte alloca:\n"
1091 <<
" alloca: " << AS.AI <<
"\n"
1092 <<
" use: " <<
I <<
"\n");
1093 EndOffset = AllocSize;
1096 AS.Slices.push_back(
1097 Slice(BeginOffset, EndOffset, U, IsSplittable, ProtectedFieldDisc));
1100 void visitBitCastInst(BitCastInst &BC) {
1102 return markAsDead(BC);
1104 return Base::visitBitCastInst(BC);
1107 void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
1109 return markAsDead(ASC);
1111 return Base::visitAddrSpaceCastInst(ASC);
1114 void visitGetElementPtrInst(GetElementPtrInst &GEPI) {
1116 return markAsDead(GEPI);
1118 return Base::visitGetElementPtrInst(GEPI);
1121 void handleLoadOrStore(
Type *Ty, Instruction &
I,
const APInt &
Offset,
1122 uint64_t
Size,
bool IsVolatile) {
1132 void visitLoadInst(LoadInst &LI) {
1134 "All simple FCA loads should have been pre-split");
1139 return PI.setEscapedReadOnly(&LI);
1142 if (
Size.isScalable()) {
1145 return PI.setAborted(&LI);
1154 void visitStoreInst(StoreInst &SI) {
1155 Value *ValOp =
SI.getValueOperand();
1157 return PI.setEscapedAndAborted(&SI);
1159 return PI.setAborted(&SI);
1161 TypeSize StoreSize =
DL.getTypeStoreSize(ValOp->
getType());
1163 unsigned VScale =
SI.getFunction()->getVScaleValue();
1165 return PI.setAborted(&SI);
1181 <<
Offset <<
" which extends past the end of the "
1182 << AllocSize <<
" byte alloca:\n"
1183 <<
" alloca: " << AS.AI <<
"\n"
1184 <<
" use: " << SI <<
"\n");
1185 return markAsDead(SI);
1189 "All simple FCA stores should have been pre-split");
1193 void visitMemSetInst(MemSetInst &
II) {
1194 assert(
II.getRawDest() == *U &&
"Pointer use is not the destination?");
1197 (IsOffsetKnown &&
Offset.uge(AllocSize)))
1199 return markAsDead(
II);
1202 return PI.setAborted(&
II);
1206 : AllocSize -
Offset.getLimitedValue(),
1210 void visitMemTransferInst(MemTransferInst &
II) {
1214 return markAsDead(
II);
1218 if (VisitedDeadInsts.
count(&
II))
1222 return PI.setAborted(&
II);
1229 if (
Offset.uge(AllocSize)) {
1230 SmallDenseMap<Instruction *, unsigned>::iterator MTPI =
1231 MemTransferSliceMap.
find(&
II);
1232 if (MTPI != MemTransferSliceMap.
end())
1233 AS.Slices[MTPI->second].kill();
1234 return markAsDead(
II);
1237 uint64_t RawOffset =
Offset.getLimitedValue();
1238 uint64_t
Size =
Length ?
Length->getLimitedValue() : AllocSize - RawOffset;
1242 if (*U ==
II.getRawDest() && *U ==
II.getRawSource()) {
1244 if (!
II.isVolatile())
1245 return markAsDead(
II);
1253 SmallDenseMap<Instruction *, unsigned>::iterator MTPI;
1254 std::tie(MTPI, Inserted) =
1255 MemTransferSliceMap.
insert(std::make_pair(&
II, AS.Slices.size()));
1256 unsigned PrevIdx = MTPI->second;
1258 Slice &PrevP = AS.Slices[PrevIdx];
1262 if (!
II.isVolatile() && PrevP.beginOffset() == RawOffset) {
1264 return markAsDead(
II);
1269 PrevP.makeUnsplittable();
1276 assert(AS.Slices[PrevIdx].getUse()->getUser() == &
II &&
1277 "Map index doesn't point back to a slice with this user.");
1283 void visitIntrinsicInst(IntrinsicInst &
II) {
1284 if (
II.isDroppable()) {
1285 AS.DeadUseIfPromotable.push_back(U);
1290 return PI.setAborted(&
II);
1292 if (
II.isLifetimeStartOrEnd()) {
1293 insertUse(
II,
Offset, AllocSize,
true);
1297 if (
II.getIntrinsicID() == Intrinsic::protected_field_ptr) {
1301 AS.PFPUsers.push_back(&
II);
1302 ProtectedFieldDisc =
II.getArgOperand(1);
1303 for (Use &U :
II.uses()) {
1308 visitStoreInst(*SI);
1314 ProtectedFieldDisc =
nullptr;
1318 Base::visitIntrinsicInst(
II);
1321 Instruction *hasUnsafePHIOrSelectUse(Instruction *Root, uint64_t &
Size) {
1326 SmallPtrSet<Instruction *, 4> Visited;
1336 std::tie(UsedI,
I) =
Uses.pop_back_val();
1339 TypeSize LoadSize =
DL.getTypeStoreSize(LI->
getType());
1351 TypeSize StoreSize =
DL.getTypeStoreSize(
Op->getType());
1361 if (!
GEP->hasAllZeroIndices())
1368 for (User *U :
I->users())
1371 }
while (!
Uses.empty());
1376 void visitPHINodeOrSelectInst(Instruction &
I) {
1379 return markAsDead(
I);
1385 I.getParent()->getFirstInsertionPt() ==
I.getParent()->end())
1386 return PI.setAborted(&
I);
1404 AS.DeadOperands.push_back(U);
1410 return PI.setAborted(&
I);
1413 uint64_t &
Size = PHIOrSelectSizes[&
I];
1416 if (Instruction *UnsafeI = hasUnsafePHIOrSelectUse(&
I,
Size))
1417 return PI.setAborted(UnsafeI);
1426 if (
Offset.uge(AllocSize)) {
1427 AS.DeadOperands.push_back(U);
1434 void visitPHINode(PHINode &PN) { visitPHINodeOrSelectInst(PN); }
1436 void visitSelectInst(SelectInst &SI) { visitPHINodeOrSelectInst(SI); }
1439 void visitInstruction(Instruction &
I) { PI.setAborted(&
I); }
1441 void visitCallBase(CallBase &CB) {
1447 PI.setEscapedReadOnly(&CB);
1451 Base::visitCallBase(CB);
1455AllocaSlices::AllocaSlices(
const DataLayout &
DL, AllocaInst &AI)
1457#
if !defined(
NDEBUG) || defined(LLVM_ENABLE_DUMP)
1460 PointerEscapingInstr(nullptr), PointerEscapingInstrReadOnly(nullptr) {
1462 SliceBuilder::PtrInfo PtrI =
PB.visitPtr(AI);
1463 if (PtrI.isEscaped() || PtrI.isAborted()) {
1466 PointerEscapingInstr = PtrI.getEscapingInst() ? PtrI.getEscapingInst()
1467 : PtrI.getAbortingInst();
1468 assert(PointerEscapingInstr &&
"Did not track a bad instruction");
1471 PointerEscapingInstrReadOnly = PtrI.getEscapedReadOnlyInst();
1473 llvm::erase_if(Slices, [](
const Slice &S) {
return S.isDead(); });
1480#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1482void AllocaSlices::print(raw_ostream &OS, const_iterator
I,
1483 StringRef Indent)
const {
1484 printSlice(OS,
I, Indent);
1486 printUse(OS,
I, Indent);
1489void AllocaSlices::printSlice(raw_ostream &OS, const_iterator
I,
1490 StringRef Indent)
const {
1491 OS << Indent <<
"[" <<
I->beginOffset() <<
"," <<
I->endOffset() <<
")"
1492 <<
" slice #" << (
I -
begin())
1493 << (
I->isSplittable() ?
" (splittable)" :
"");
1496void AllocaSlices::printUse(raw_ostream &OS, const_iterator
I,
1497 StringRef Indent)
const {
1498 OS << Indent <<
" used by: " << *
I->getUse()->getUser() <<
"\n";
1501void AllocaSlices::print(raw_ostream &OS)
const {
1502 if (PointerEscapingInstr) {
1503 OS <<
"Can't analyze slices for alloca: " << AI <<
"\n"
1504 <<
" A pointer to this alloca escaped by:\n"
1505 <<
" " << *PointerEscapingInstr <<
"\n";
1509 if (PointerEscapingInstrReadOnly)
1510 OS <<
"Escapes into ReadOnly: " << *PointerEscapingInstrReadOnly <<
"\n";
1512 OS <<
"Slices of alloca: " << AI <<
"\n";
1526static std::pair<Type *, IntegerType *>
1530 bool TyIsCommon =
true;
1535 for (AllocaSlices::const_iterator
I =
B;
I !=
E; ++
I) {
1536 Use *U =
I->getUse();
1539 if (
I->beginOffset() !=
B->beginOffset() ||
I->endOffset() != EndOffset)
1542 Type *UserTy =
nullptr;
1546 UserTy =
SI->getValueOperand()->getType();
1554 if (UserITy->getBitWidth() % 8 != 0 ||
1555 UserITy->getBitWidth() / 8 > (EndOffset -
B->beginOffset()))
1560 if (!ITy || ITy->
getBitWidth() < UserITy->getBitWidth())
1566 if (!UserTy || (Ty && Ty != UserTy))
1572 return {TyIsCommon ? Ty :
nullptr, ITy};
1603 Type *LoadType =
nullptr;
1616 if (LoadType != LI->
getType())
1625 if (BBI->mayWriteToMemory())
1628 MaxAlign = std::max(MaxAlign, LI->
getAlign());
1635 APInt(APWidth,
DL.getTypeStoreSize(LoadType).getFixedValue());
1672 IRB.SetInsertPoint(&PN);
1674 PN.
getName() +
".sroa.speculated");
1704 IRB.SetInsertPoint(TI);
1706 LoadInst *Load = IRB.CreateAlignedLoad(
1707 LoadTy, InVal, Alignment,
1708 (PN.
getName() +
".sroa.speculate.load." + Pred->getName()));
1709 ++NumLoadsSpeculated;
1711 Load->setAAMetadata(AATags);
1713 InjectedLoads[Pred] = Load;
1720SelectHandSpeculativity &
1721SelectHandSpeculativity::setAsSpeculatable(
bool isTrueVal) {
1729bool SelectHandSpeculativity::isSpeculatable(
bool isTrueVal)
const {
1734bool SelectHandSpeculativity::areAllSpeculatable()
const {
1735 return isSpeculatable(
true) &&
1736 isSpeculatable(
false);
1739bool SelectHandSpeculativity::areAnySpeculatable()
const {
1740 return isSpeculatable(
true) ||
1741 isSpeculatable(
false);
1743bool SelectHandSpeculativity::areNoneSpeculatable()
const {
1744 return !areAnySpeculatable();
1747static SelectHandSpeculativity
1750 SelectHandSpeculativity
Spec;
1756 Spec.setAsSpeculatable(
Value ==
SI.getTrueValue());
1763std::optional<RewriteableMemOps>
1764SROA::isSafeSelectToSpeculate(SelectInst &SI,
bool PreserveCFG) {
1765 RewriteableMemOps
Ops;
1767 for (User *U :
SI.users()) {
1777 Ops.emplace_back(Store);
1788 PossiblySpeculatableLoad
Load(LI);
1794 Ops.emplace_back(Load);
1798 SelectHandSpeculativity Spec =
1804 Ops.emplace_back(Load);
1814 Value *TV =
SI.getTrueValue();
1815 Value *FV =
SI.getFalseValue();
1820 IRB.SetInsertPoint(&LI);
1824 LI.
getName() +
".sroa.speculate.load.true");
1827 LI.
getName() +
".sroa.speculate.load.false");
1828 NumLoadsSpeculated += 2;
1840 Value *V = IRB.CreateSelect(
SI.getCondition(), TL, FL,
1841 LI.
getName() +
".sroa.speculated",
1848template <
typename T>
1850 SelectHandSpeculativity
Spec,
1857 if (
Spec.areNoneSpeculatable())
1859 SI.getMetadata(LLVMContext::MD_prof), &DTU);
1862 SI.getMetadata(LLVMContext::MD_prof), &DTU,
1864 if (
Spec.isSpeculatable(
true))
1875 bool IsThen = SuccBB == HeadBI->getSuccessor(0);
1876 int SuccIdx = IsThen ? 0 : 1;
1877 auto *NewMemOpBB = SuccBB ==
Tail ? Head : SuccBB;
1878 auto &CondMemOp =
cast<T>(*
I.clone());
1879 if (NewMemOpBB != Head) {
1880 NewMemOpBB->setName(Head->
getName() + (IsThen ?
".then" :
".else"));
1882 ++NumLoadsPredicated;
1884 ++NumStoresPredicated;
1886 CondMemOp.dropUBImplyingAttrsAndMetadata();
1887 ++NumLoadsSpeculated;
1889 CondMemOp.insertBefore(NewMemOpBB->getTerminator()->getIterator());
1890 Value *Ptr =
SI.getOperand(1 + SuccIdx);
1891 CondMemOp.setOperand(
I.getPointerOperandIndex(), Ptr);
1893 CondMemOp.setName(
I.getName() + (IsThen ?
".then" :
".else") +
".val");
1901 I.replaceAllUsesWith(PN);
1906 SelectHandSpeculativity
Spec,
1917 const RewriteableMemOps &
Ops,
1919 bool CFGChanged =
false;
1922 for (
const RewriteableMemOp &
Op :
Ops) {
1923 SelectHandSpeculativity
Spec;
1925 if (
auto *
const *US = std::get_if<UnspeculatableStore>(&
Op)) {
1928 auto PSL = std::get<PossiblySpeculatableLoad>(
Op);
1929 I = PSL.getPointer();
1930 Spec = PSL.getInt();
1932 if (
Spec.areAllSpeculatable()) {
1935 assert(DTU &&
"Should not get here when not allowed to modify the CFG!");
1939 I->eraseFromParent();
1944 SI.eraseFromParent();
1952 const Twine &NamePrefix) {
1954 Ptr = IRB.CreateInBoundsPtrAdd(Ptr, IRB.getInt(
Offset),
1955 NamePrefix +
"sroa_idx");
1956 return IRB.CreatePointerBitCastOrAddrSpaceCast(Ptr,
PointerTy,
1957 NamePrefix +
"sroa_cast");
1972 unsigned VScale = 0) {
1982 "We can't have the same bitwidth for different int types");
1986 TypeSize NewSize =
DL.getTypeSizeInBits(NewTy);
1987 TypeSize OldSize =
DL.getTypeSizeInBits(OldTy);
2014 if (NewSize != OldSize)
2030 return OldAS == NewAS ||
2031 (!
DL.isNonIntegralAddressSpace(OldAS) &&
2032 !
DL.isNonIntegralAddressSpace(NewAS) &&
2033 DL.getPointerSize(OldAS) ==
DL.getPointerSize(NewAS));
2039 return !
DL.isNonIntegralPointerType(NewTy);
2043 if (!
DL.isNonIntegralPointerType(OldTy))
2063 Type *OldTy = V->getType();
2070 "Value not convertable to type");
2077 "Integer types must be the exact same to convert.");
2081 auto CreateBitCastLike = [&IRB](
Value *In,
Type *Ty) ->
Value * {
2082 Type *InTy = In->getType();
2090 return IRB.CreateBitCast(IRB.CreateInsertVector(VTy,
2100 return IRB.CreateExtractVector(Ty, IRB.CreateBitCast(In, VTy),
2104 return IRB.CreateBitCast(In, Ty);
2113 return IRB.CreateIntToPtr(CreateBitCastLike(V,
DL.getIntPtrType(NewTy)),
2123 return CreateBitCastLike(IRB.CreatePtrToInt(V,
DL.getIntPtrType(OldTy)),
2136 if (OldAS != NewAS) {
2137 assert(
DL.getPointerSize(OldAS) ==
DL.getPointerSize(NewAS));
2138 return IRB.CreateIntToPtr(
2139 CreateBitCastLike(IRB.CreatePtrToInt(V,
DL.getIntPtrType(OldTy)),
2140 DL.getIntPtrType(NewTy)),
2145 return CreateBitCastLike(V, NewTy);
2159 std::max(S.beginOffset(),
P.beginOffset()) -
P.beginOffset();
2160 uint64_t BeginIndex = BeginOffset / ElementSize;
2161 if (BeginIndex * ElementSize != BeginOffset ||
2164 uint64_t EndOffset = std::min(S.endOffset(),
P.endOffset()) -
P.beginOffset();
2165 uint64_t EndIndex = EndOffset / ElementSize;
2166 if (EndIndex * ElementSize != EndOffset ||
2170 assert(EndIndex > BeginIndex &&
"Empty vector!");
2171 uint64_t NumElements = EndIndex - BeginIndex;
2172 Type *SliceTy = (NumElements == 1)
2173 ? Ty->getElementType()
2179 Use *U = S.getUse();
2182 if (
MI->isVolatile())
2184 if (!S.isSplittable())
2187 if (!
II->isLifetimeStartOrEnd() && !
II->isDroppable())
2194 if (LTy->isStructTy())
2196 if (
P.beginOffset() > S.beginOffset() ||
P.endOffset() < S.endOffset()) {
2197 assert(LTy->isIntegerTy());
2203 if (
SI->isVolatile())
2205 Type *STy =
SI->getValueOperand()->getType();
2209 if (
P.beginOffset() > S.beginOffset() ||
P.endOffset() < S.endOffset()) {
2229 bool HaveCommonEltTy,
Type *CommonEltTy,
2230 bool HaveVecPtrTy,
bool HaveCommonVecPtrTy,
2231 VectorType *CommonVecPtrTy,
unsigned VScale) {
2233 if (CandidateTys.
empty())
2240 if (HaveVecPtrTy && !HaveCommonVecPtrTy)
2244 if (!HaveCommonEltTy && HaveVecPtrTy) {
2246 CandidateTys.
clear();
2248 }
else if (!HaveCommonEltTy && !HaveVecPtrTy) {
2251 if (!VTy->getElementType()->isIntegerTy())
2253 VTy->getContext(), VTy->getScalarSizeInBits())));
2260 assert(
DL.getTypeSizeInBits(RHSTy).getFixedValue() ==
2261 DL.getTypeSizeInBits(LHSTy).getFixedValue() &&
2262 "Cannot have vector types of different sizes!");
2263 assert(RHSTy->getElementType()->isIntegerTy() &&
2264 "All non-integer types eliminated!");
2265 assert(LHSTy->getElementType()->isIntegerTy() &&
2266 "All non-integer types eliminated!");
2272 assert(
DL.getTypeSizeInBits(RHSTy).getFixedValue() ==
2273 DL.getTypeSizeInBits(LHSTy).getFixedValue() &&
2274 "Cannot have vector types of different sizes!");
2275 assert(RHSTy->getElementType()->isIntegerTy() &&
2276 "All non-integer types eliminated!");
2277 assert(LHSTy->getElementType()->isIntegerTy() &&
2278 "All non-integer types eliminated!");
2282 llvm::sort(CandidateTys, RankVectorTypesComp);
2283 CandidateTys.erase(
llvm::unique(CandidateTys, RankVectorTypesEq),
2284 CandidateTys.end());
2290 assert(VTy->getElementType() == CommonEltTy &&
2291 "Unaccounted for element type!");
2292 assert(VTy == CandidateTys[0] &&
2293 "Different vector types with the same element type!");
2296 CandidateTys.resize(1);
2303 std::numeric_limits<unsigned short>::max();
2309 DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
2313 if (ElementSize % 8)
2315 assert((
DL.getTypeSizeInBits(VTy).getFixedValue() % 8) == 0 &&
2316 "vector size not a multiple of element size?");
2319 for (
const Slice &S :
P)
2323 for (
const Slice *S :
P.splitSliceTails())
2329 return VTy != CandidateTys.
end() ? *VTy :
nullptr;
2336 bool &HaveCommonEltTy,
Type *&CommonEltTy,
bool &HaveVecPtrTy,
2337 bool &HaveCommonVecPtrTy,
VectorType *&CommonVecPtrTy,
unsigned VScale) {
2339 CandidateTysCopy.
size() ? CandidateTysCopy[0] :
nullptr;
2342 for (
Type *Ty : OtherTys) {
2345 unsigned TypeSize =
DL.getTypeSizeInBits(Ty).getFixedValue();
2348 for (
VectorType *
const VTy : CandidateTysCopy) {
2350 assert(CandidateTysCopy[0] == OriginalElt &&
"Different Element");
2351 unsigned VectorSize =
DL.getTypeSizeInBits(VTy).getFixedValue();
2352 unsigned ElementSize =
2353 DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
2357 CheckCandidateType(NewVTy);
2363 P,
DL, CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
2364 HaveCommonVecPtrTy, CommonVecPtrTy, VScale);
2383 Type *CommonEltTy =
nullptr;
2385 bool HaveVecPtrTy =
false;
2386 bool HaveCommonEltTy =
true;
2387 bool HaveCommonVecPtrTy =
true;
2388 auto CheckCandidateType = [&](
Type *Ty) {
2391 if (!CandidateTys.
empty()) {
2393 if (
DL.getTypeSizeInBits(VTy).getFixedValue() !=
2394 DL.getTypeSizeInBits(V).getFixedValue()) {
2395 CandidateTys.
clear();
2400 Type *EltTy = VTy->getElementType();
2403 CommonEltTy = EltTy;
2404 else if (CommonEltTy != EltTy)
2405 HaveCommonEltTy =
false;
2408 HaveVecPtrTy =
true;
2409 if (!CommonVecPtrTy)
2410 CommonVecPtrTy = VTy;
2411 else if (CommonVecPtrTy != VTy)
2412 HaveCommonVecPtrTy =
false;
2418 for (
const Slice &S :
P) {
2423 Ty =
SI->getValueOperand()->getType();
2427 auto CandTy = Ty->getScalarType();
2428 if (CandTy->isPointerTy() && (S.beginOffset() !=
P.beginOffset() ||
2429 S.endOffset() !=
P.endOffset())) {
2436 if (S.beginOffset() ==
P.beginOffset() && S.endOffset() ==
P.endOffset())
2437 CheckCandidateType(Ty);
2442 LoadStoreTys, CandidateTysCopy, CheckCandidateType,
P,
DL,
2443 CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
2444 HaveCommonVecPtrTy, CommonVecPtrTy, VScale))
2447 CandidateTys.
clear();
2449 DeferredTys, CandidateTysCopy, CheckCandidateType,
P,
DL, CandidateTys,
2450 HaveCommonEltTy, CommonEltTy, HaveVecPtrTy, HaveCommonVecPtrTy,
2451 CommonVecPtrTy, VScale);
2462 bool &WholeAllocaOp) {
2465 uint64_t RelBegin = S.beginOffset() - AllocBeginOffset;
2466 uint64_t RelEnd = S.endOffset() - AllocBeginOffset;
2468 Use *U = S.getUse();
2475 if (
II->isLifetimeStartOrEnd() ||
II->isDroppable())
2493 if (S.beginOffset() < AllocBeginOffset)
2499 WholeAllocaOp =
true;
2501 if (ITy->getBitWidth() <
DL.getTypeStoreSizeInBits(ITy).getFixedValue())
2503 }
else if (RelBegin != 0 || RelEnd !=
Size ||
2510 Type *ValueTy =
SI->getValueOperand()->getType();
2511 if (
SI->isVolatile())
2514 TypeSize StoreSize =
DL.getTypeStoreSize(ValueTy);
2519 if (S.beginOffset() < AllocBeginOffset)
2525 WholeAllocaOp =
true;
2527 if (ITy->getBitWidth() <
DL.getTypeStoreSizeInBits(ITy).getFixedValue())
2529 }
else if (RelBegin != 0 || RelEnd !=
Size ||
2538 if (!S.isSplittable())
2555 uint64_t SizeInBits =
DL.getTypeSizeInBits(AllocaTy).getFixedValue();
2561 if (SizeInBits !=
DL.getTypeStoreSizeInBits(AllocaTy).getFixedValue())
2579 bool WholeAllocaOp =
P.empty() &&
DL.isLegalInteger(SizeInBits);
2581 for (
const Slice &S :
P)
2586 for (
const Slice *S :
P.splitSliceTails())
2591 return WholeAllocaOp;
2596 const Twine &Name) {
2600 DL.getTypeStoreSize(IntTy).getFixedValue() &&
2601 "Element extends past full value");
2603 if (
DL.isBigEndian())
2604 ShAmt = 8 * (
DL.getTypeStoreSize(IntTy).getFixedValue() -
2605 DL.getTypeStoreSize(Ty).getFixedValue() -
Offset);
2607 V = IRB.CreateLShr(V, ShAmt, Name +
".shift");
2610 assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
2611 "Cannot extract to a larger integer!");
2613 V = IRB.CreateTrunc(V, Ty, Name +
".trunc");
2623 assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
2624 "Cannot insert a larger integer!");
2627 V = IRB.CreateZExt(V, IntTy, Name +
".ext");
2631 DL.getTypeStoreSize(IntTy).getFixedValue() &&
2632 "Element store outside of alloca store");
2634 if (
DL.isBigEndian())
2635 ShAmt = 8 * (
DL.getTypeStoreSize(IntTy).getFixedValue() -
2636 DL.getTypeStoreSize(Ty).getFixedValue() -
Offset);
2638 V = IRB.CreateShl(V, ShAmt, Name +
".shift");
2642 if (ShAmt || Ty->getBitWidth() < IntTy->getBitWidth()) {
2643 APInt Mask = ~Ty->getMask().zext(IntTy->getBitWidth()).shl(ShAmt);
2644 Old = IRB.CreateAnd(Old, Mask, Name +
".mask");
2646 V = IRB.CreateOr(Old, V, Name +
".insert");
2653 unsigned EndIndex,
const Twine &Name) {
2655 unsigned NumElements = EndIndex - BeginIndex;
2658 if (NumElements == VecTy->getNumElements())
2661 if (NumElements == 1) {
2662 V = IRB.CreateExtractElement(V, IRB.getInt32(BeginIndex),
2669 V = IRB.CreateShuffleVector(V, Mask, Name +
".extract");
2675 unsigned BeginIndex,
const Twine &Name) {
2677 assert(VecTy &&
"Can only insert a vector into a vector");
2682 V = IRB.CreateInsertElement(Old, V, IRB.getInt32(BeginIndex),
2690 "Too many elements!");
2693 assert(V->getType() == VecTy &&
"Vector type mismatch");
2705 if (i >= BeginIndex && i < EndIndex)
2706 Mask.push_back(i - BeginIndex);
2709 V = IRB.CreateShuffleVector(V, Mask, Name +
".expand");
2715 Mask2.
push_back(IRB.getInt1(i >= BeginIndex && i < EndIndex));
2759 const char *DebugName) {
2760 Type *EltType = VecType->getElementType();
2761 if (EltType != NewAIEltTy) {
2763 unsigned TotalBits =
2764 VecType->getNumElements() *
DL.getTypeSizeInBits(EltType);
2765 unsigned NewNumElts = TotalBits /
DL.getTypeSizeInBits(NewAIEltTy);
2768 V = Builder.CreateBitCast(V, NewVecType);
2769 VecType = NewVecType;
2770 LLVM_DEBUG(
dbgs() <<
" bitcast " << DebugName <<
": " << *V <<
"\n");
2774 BitcastIfNeeded(V0, VecType0,
"V0");
2775 BitcastIfNeeded(V1, VecType1,
"V1");
2777 unsigned NumElts0 = VecType0->getNumElements();
2778 unsigned NumElts1 = VecType1->getNumElements();
2782 if (NumElts0 == NumElts1) {
2783 for (
unsigned i = 0; i < NumElts0 + NumElts1; ++i)
2784 ShuffleMask.push_back(i);
2788 unsigned SmallSize = std::min(NumElts0, NumElts1);
2789 unsigned LargeSize = std::max(NumElts0, NumElts1);
2790 bool IsV0Smaller = NumElts0 < NumElts1;
2791 Value *&ExtendedVec = IsV0Smaller ? V0 : V1;
2793 for (
unsigned i = 0; i < SmallSize; ++i)
2795 for (
unsigned i = SmallSize; i < LargeSize; ++i)
2797 ExtendedVec = Builder.CreateShuffleVector(
2799 LLVM_DEBUG(
dbgs() <<
" shufflevector: " << *ExtendedVec <<
"\n");
2800 for (
unsigned i = 0; i < NumElts0; ++i)
2801 ShuffleMask.push_back(i);
2802 for (
unsigned i = 0; i < NumElts1; ++i)
2803 ShuffleMask.push_back(LargeSize + i);
2806 return Builder.CreateShuffleVector(V0, V1, ShuffleMask);
2817class AllocaSliceRewriter :
public InstVisitor<AllocaSliceRewriter, bool> {
2819 friend class InstVisitor<AllocaSliceRewriter, bool>;
2821 using Base = InstVisitor<AllocaSliceRewriter, bool>;
2823 const DataLayout &
DL;
2826 AllocaInst &OldAI, &NewAI;
2827 const uint64_t NewAllocaBeginOffset, NewAllocaEndOffset;
2847 uint64_t ElementSize;
2851 uint64_t BeginOffset = 0;
2852 uint64_t EndOffset = 0;
2856 uint64_t NewBeginOffset = 0, NewEndOffset = 0;
2858 uint64_t SliceSize = 0;
2859 bool IsSplittable =
false;
2860 bool IsSplit =
false;
2861 Use *OldUse =
nullptr;
2865 SmallSetVector<PHINode *, 8> &PHIUsers;
2866 SmallSetVector<SelectInst *, 8> &SelectUsers;
2874 Value *getPtrToNewAI(
unsigned AddrSpace,
bool IsVolatile) {
2878 Type *AccessTy = IRB.getPtrTy(AddrSpace);
2879 return IRB.CreateAddrSpaceCast(&NewAI, AccessTy);
2883 AllocaSliceRewriter(
const DataLayout &
DL, AllocaSlices &AS, SROA &
Pass,
2884 AllocaInst &OldAI, AllocaInst &NewAI,
2885 uint64_t NewAllocaBeginOffset,
2886 uint64_t NewAllocaEndOffset,
bool IsIntegerPromotable,
2887 VectorType *PromotableVecTy,
2888 SmallSetVector<PHINode *, 8> &PHIUsers,
2889 SmallSetVector<SelectInst *, 8> &SelectUsers)
2890 :
DL(
DL), AS(AS),
Pass(
Pass), OldAI(OldAI), NewAI(NewAI),
2891 NewAllocaBeginOffset(NewAllocaBeginOffset),
2892 NewAllocaEndOffset(NewAllocaEndOffset),
2893 NewAllocaTy(NewAI.getAllocatedType()),
2897 DL.getTypeSizeInBits(NewAI.getAllocatedType())
2900 VecTy(PromotableVecTy),
2901 ElementTy(VecTy ? VecTy->getElementType() : nullptr),
2902 ElementSize(VecTy ?
DL.getTypeSizeInBits(ElementTy).getFixedValue() / 8
2904 PHIUsers(PHIUsers), SelectUsers(SelectUsers),
2907 assert((
DL.getTypeSizeInBits(ElementTy).getFixedValue() % 8) == 0 &&
2908 "Only multiple-of-8 sized vector elements are viable");
2911 assert((!IntTy && !VecTy) || (IntTy && !VecTy) || (!IntTy && VecTy));
2914 bool visit(AllocaSlices::const_iterator
I) {
2915 bool CanSROA =
true;
2916 BeginOffset =
I->beginOffset();
2917 EndOffset =
I->endOffset();
2918 IsSplittable =
I->isSplittable();
2920 BeginOffset < NewAllocaBeginOffset || EndOffset > NewAllocaEndOffset;
2921 LLVM_DEBUG(
dbgs() <<
" rewriting " << (IsSplit ?
"split " :
""));
2926 assert(BeginOffset < NewAllocaEndOffset);
2927 assert(EndOffset > NewAllocaBeginOffset);
2928 NewBeginOffset = std::max(BeginOffset, NewAllocaBeginOffset);
2929 NewEndOffset = std::min(EndOffset, NewAllocaEndOffset);
2931 SliceSize = NewEndOffset - NewBeginOffset;
2932 LLVM_DEBUG(
dbgs() <<
" Begin:(" << BeginOffset <<
", " << EndOffset
2933 <<
") NewBegin:(" << NewBeginOffset <<
", "
2934 << NewEndOffset <<
") NewAllocaBegin:("
2935 << NewAllocaBeginOffset <<
", " << NewAllocaEndOffset
2937 assert(IsSplit || NewBeginOffset == BeginOffset);
2938 OldUse =
I->getUse();
2942 IRB.SetInsertPoint(OldUserI);
2943 IRB.SetCurrentDebugLocation(OldUserI->
getDebugLoc());
2944 IRB.getInserter().SetNamePrefix(Twine(NewAI.
getName()) +
"." +
2945 Twine(BeginOffset) +
".");
2991 std::optional<SmallVector<Value *, 4>>
2992 rewriteTreeStructuredMerge(Partition &
P) {
2994 if (
P.splitSliceTails().size() > 0)
2995 return std::nullopt;
2998 LoadInst *TheLoad =
nullptr;
3003 uint64_t BeginOffset;
3006 StoreInfo(StoreInst *SI, uint64_t Begin, uint64_t End,
Value *Val)
3007 :
Store(
SI), BeginOffset(Begin), EndOffset(End), StoredValue(Val) {}
3014 Type *AllocatedEltTy =
3018 unsigned AllocatedEltTySize =
DL.getTypeSizeInBits(AllocatedEltTy);
3025 auto IsTypeValidForTreeStructuredMerge = [&](
Type *Ty) ->
bool {
3027 return FixedVecTy &&
3028 DL.getTypeSizeInBits(FixedVecTy->getElementType()) % 8 == 0 &&
3029 !FixedVecTy->getElementType()->isPointerTy();
3032 for (Slice &S :
P) {
3040 if (TheLoad || !IsTypeValidForTreeStructuredMerge(LI->
getType()) ||
3041 S.beginOffset() != NewAllocaBeginOffset ||
3042 S.endOffset() != NewAllocaEndOffset || LI->
isVolatile())
3043 return std::nullopt;
3051 if (!IsTypeValidForTreeStructuredMerge(
3052 SI->getValueOperand()->getType()) ||
3054 return std::nullopt;
3056 unsigned NumElts = VecTy->getNumElements();
3057 unsigned EltSize =
DL.getTypeSizeInBits(VecTy->getElementType());
3058 if (NumElts * EltSize % AllocatedEltTySize != 0)
3059 return std::nullopt;
3060 StoreInfos.
emplace_back(SI, S.beginOffset(), S.endOffset(),
3061 SI->getValueOperand());
3065 return std::nullopt;
3070 return std::nullopt;
3073 if (StoreInfos.
size() < 2)
3074 return std::nullopt;
3078 llvm::sort(StoreInfos, [](
const StoreInfo &
A,
const StoreInfo &
B) {
3079 return A.BeginOffset <
B.BeginOffset;
3083 uint64_t ExpectedStart = NewAllocaBeginOffset;
3084 for (
auto &StoreInfo : StoreInfos) {
3085 uint64_t BeginOff = StoreInfo.BeginOffset;
3086 uint64_t EndOff = StoreInfo.EndOffset;
3089 if (BeginOff != ExpectedStart)
3090 return std::nullopt;
3092 ExpectedStart = EndOff;
3095 if (ExpectedStart != NewAllocaEndOffset)
3096 return std::nullopt;
3107 BasicBlock *StoreBB = StoreInfos[0].Store->getParent();
3109 for (
auto &StoreInfo : StoreInfos) {
3110 if (StoreInfo.Store->getParent() != StoreBB)
3111 return std::nullopt;
3112 if (LoadBB == StoreBB && !StoreInfo.Store->comesBefore(TheLoad))
3113 return std::nullopt;
3119 dbgs() <<
"Tree structured merge rewrite:\n Load: " << *TheLoad
3120 <<
"\n Ordered stores:\n";
3122 dbgs() <<
" [" << i <<
"] Range[" <<
Info.BeginOffset <<
", "
3123 <<
Info.EndOffset <<
") \tStore: " << *
Info.Store
3124 <<
"\tValue: " << *
Info.StoredValue <<
"\n";
3129 std::queue<Value *> VecElements;
3131 for (
const auto &
Info : StoreInfos) {
3133 VecElements.push(
Info.StoredValue);
3137 while (VecElements.size() > 1) {
3138 const auto NumElts = VecElements.size();
3139 for ([[maybe_unused]]
const auto _ :
llvm::seq(NumElts / 2)) {
3140 Value *V0 = VecElements.front();
3142 Value *V1 = VecElements.front();
3146 VecElements.push(Merged);
3148 if (NumElts % 2 == 1) {
3149 Value *
V = VecElements.front();
3151 VecElements.push(V);
3156 Value *MergedValue = VecElements.front();
3157 Builder.CreateAlignedStore(MergedValue, &NewAI, getSliceAlign());
3162 TheLoad->
getName() +
".sroa.new.load"));
3165 return DeletedValues;
3173 bool visitInstruction(Instruction &
I) {
3181 assert(IsSplit || BeginOffset == NewBeginOffset);
3182 uint64_t
Offset = NewBeginOffset - NewAllocaBeginOffset;
3184 StringRef OldName = OldPtr->
getName();
3186 size_t LastSROAPrefix = OldName.
rfind(
".sroa.");
3188 OldName = OldName.
substr(LastSROAPrefix + strlen(
".sroa."));
3193 OldName = OldName.
substr(IndexEnd + 1);
3197 OldName = OldName.
substr(OffsetEnd + 1);
3201 OldName = OldName.
substr(0, OldName.
find(
".sroa_"));
3213 Align getSliceAlign() {
3215 NewBeginOffset - NewAllocaBeginOffset);
3218 unsigned getIndex(uint64_t
Offset) {
3219 assert(VecTy &&
"Can only call getIndex when rewriting a vector");
3220 uint64_t RelOffset =
Offset - NewAllocaBeginOffset;
3221 assert(RelOffset / ElementSize < UINT32_MAX &&
"Index out of bounds");
3222 uint32_t
Index = RelOffset / ElementSize;
3223 assert(Index * ElementSize == RelOffset);
3227 void deleteIfTriviallyDead(
Value *V) {
3230 Pass.DeadInsts.push_back(
I);
3233 Value *rewriteVectorizedLoadInst(LoadInst &LI) {
3234 unsigned BeginIndex = getIndex(NewBeginOffset);
3235 unsigned EndIndex = getIndex(NewEndOffset);
3236 assert(EndIndex > BeginIndex &&
"Empty vector!");
3241 Load->copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access,
3242 LLVMContext::MD_access_group});
3243 return extractVector(IRB, Load, BeginIndex, EndIndex,
"vec");
3246 Value *rewriteIntegerLoad(LoadInst &LI) {
3247 assert(IntTy &&
"We cannot insert an integer to the alloca");
3252 assert(NewBeginOffset >= NewAllocaBeginOffset &&
"Out of bounds offset");
3253 uint64_t
Offset = NewBeginOffset - NewAllocaBeginOffset;
3254 if (
Offset > 0 || NewEndOffset < NewAllocaEndOffset) {
3255 IntegerType *ExtractTy = Type::getIntNTy(LI.
getContext(), SliceSize * 8);
3264 "Can only handle an extract for an overly wide load");
3266 V = IRB.CreateZExt(V, LI.
getType());
3270 bool visitLoadInst(LoadInst &LI) {
3279 Type *TargetTy = IsSplit ? Type::getIntNTy(LI.
getContext(), SliceSize * 8)
3281 bool IsPtrAdjusted =
false;
3284 V = rewriteVectorizedLoadInst(LI);
3286 V = rewriteIntegerLoad(LI);
3287 }
else if (NewBeginOffset == NewAllocaBeginOffset &&
3288 NewEndOffset == NewAllocaEndOffset &&
3291 DL.getTypeStoreSize(TargetTy).getFixedValue() > SliceSize &&
3294 getPtrToNewAI(LI.getPointerAddressSpace(), LI.isVolatile());
3295 LoadInst *NewLI = IRB.CreateAlignedLoad(NewAI.getAllocatedType(), NewPtr,
3296 NewAI.getAlign(), LI.isVolatile(),
3298 if (LI.isVolatile())
3299 NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
3300 if (NewLI->isAtomic())
3301 NewLI->setAlignment(LI.getAlign());
3306 copyMetadataForLoad(*NewLI, LI);
3310 NewLI->setAAMetadata(AATags.adjustForAccess(
3311 NewBeginOffset - BeginOffset, NewLI->getType(), DL));
3319 if (auto *AITy = dyn_cast<IntegerType>(NewAllocaTy))
3320 if (auto *TITy = dyn_cast<IntegerType>(TargetTy))
3321 if (AITy->getBitWidth() < TITy->getBitWidth()) {
3322 V = IRB.CreateZExt(V, TITy,
"load.ext");
3323 if (DL.isBigEndian())
3324 V = IRB.CreateShl(V, TITy->getBitWidth() - AITy->getBitWidth(),
3328 Type *LTy = IRB.getPtrTy(AS);
3330 IRB.CreateAlignedLoad(TargetTy, getNewAllocaSlicePtr(IRB, LTy),
3335 NewBeginOffset - BeginOffset, NewLI->
getType(),
DL));
3339 NewLI->
copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access,
3340 LLVMContext::MD_access_group});
3343 IsPtrAdjusted =
true;
3350 "Only integer type loads and stores are split");
3351 assert(SliceSize <
DL.getTypeStoreSize(LI.
getType()).getFixedValue() &&
3352 "Split load isn't smaller than original load");
3354 "Non-byte-multiple bit width");
3360 LIIt.setHeadBit(
true);
3361 IRB.SetInsertPoint(LI.
getParent(), LIIt);
3366 Value *Placeholder =
3372 Placeholder->replaceAllUsesWith(&LI);
3373 Placeholder->deleteValue();
3378 Pass.DeadInsts.push_back(&LI);
3379 deleteIfTriviallyDead(OldOp);
3384 bool rewriteVectorizedStoreInst(
Value *V, StoreInst &SI,
Value *OldOp,
3389 if (
V->getType() != VecTy) {
3390 unsigned BeginIndex = getIndex(NewBeginOffset);
3391 unsigned EndIndex = getIndex(NewEndOffset);
3392 assert(EndIndex > BeginIndex &&
"Empty vector!");
3393 unsigned NumElements = EndIndex - BeginIndex;
3395 "Too many elements!");
3396 Type *SliceTy = (NumElements == 1)
3398 : FixedVectorType::
get(ElementTy, NumElements);
3399 if (
V->getType() != SliceTy)
3407 StoreInst *
Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.
getAlign());
3408 Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
3409 LLVMContext::MD_access_group});
3413 Pass.DeadInsts.push_back(&SI);
3417 Store,
Store->getPointerOperand(), OrigV,
DL);
3422 bool rewriteIntegerStore(
Value *V, StoreInst &SI, AAMDNodes AATags) {
3423 assert(IntTy &&
"We cannot extract an integer from the alloca");
3425 if (
DL.getTypeSizeInBits(
V->getType()).getFixedValue() !=
3430 assert(BeginOffset >= NewAllocaBeginOffset &&
"Out of bounds offset");
3431 uint64_t
Offset = BeginOffset - NewAllocaBeginOffset;
3435 StoreInst *
Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.
getAlign());
3436 Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
3437 LLVMContext::MD_access_group});
3443 Store,
Store->getPointerOperand(),
3444 Store->getValueOperand(),
DL);
3446 Pass.DeadInsts.push_back(&SI);
3451 bool visitStoreInst(StoreInst &SI) {
3453 Value *OldOp =
SI.getOperand(1);
3456 AAMDNodes AATags =
SI.getAAMetadata();
3461 if (
V->getType()->isPointerTy())
3463 Pass.PostPromotionWorklist.insert(AI);
3465 TypeSize StoreSize =
DL.getTypeStoreSize(
V->getType());
3468 assert(
V->getType()->isIntegerTy() &&
3469 "Only integer type loads and stores are split");
3470 assert(
DL.typeSizeEqualsStoreSize(
V->getType()) &&
3471 "Non-byte-multiple bit width");
3472 IntegerType *NarrowTy = Type::getIntNTy(
SI.getContext(), SliceSize * 8);
3478 return rewriteVectorizedStoreInst(V, SI, OldOp, AATags);
3479 if (IntTy &&
V->getType()->isIntegerTy())
3480 return rewriteIntegerStore(V, SI, AATags);
3483 if (NewBeginOffset == NewAllocaBeginOffset &&
3484 NewEndOffset == NewAllocaEndOffset &&
3488 getPtrToNewAI(
SI.getPointerAddressSpace(),
SI.isVolatile());
3491 IRB.CreateAlignedStore(V, NewPtr, NewAI.
getAlign(),
SI.isVolatile());
3493 unsigned AS =
SI.getPointerAddressSpace();
3494 Value *NewPtr = getNewAllocaSlicePtr(IRB, IRB.getPtrTy(AS));
3496 IRB.CreateAlignedStore(V, NewPtr, getSliceAlign(),
SI.isVolatile());
3498 NewSI->
copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
3499 LLVMContext::MD_access_group});
3503 if (
SI.isVolatile())
3512 Pass.DeadInsts.push_back(&SI);
3513 deleteIfTriviallyDead(OldOp);
3531 assert(
Size > 0 &&
"Expected a positive number of bytes.");
3539 IRB.CreateZExt(V, SplatIntTy,
"zext"),
3549 V = IRB.CreateVectorSplat(NumElements, V,
"vsplat");
3554 bool visitMemSetInst(MemSetInst &
II) {
3558 AAMDNodes AATags =
II.getAAMetadata();
3564 assert(NewBeginOffset == BeginOffset);
3565 II.setDest(getNewAllocaSlicePtr(IRB, OldPtr->
getType()));
3566 II.setDestAlignment(getSliceAlign());
3571 "AT: Unexpected link to non-const GEP");
3572 deleteIfTriviallyDead(OldPtr);
3577 Pass.DeadInsts.push_back(&
II);
3582 const bool CanContinue = [&]() {
3585 if (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset)
3589 const uint64_t
Len =
C->getLimitedValue();
3590 if (Len > std::numeric_limits<unsigned>::max())
3592 auto *Int8Ty = IntegerType::getInt8Ty(NewAI.
getContext());
3595 DL.isLegalInteger(
DL.getTypeSizeInBits(ScalarTy).getFixedValue());
3601 Type *SizeTy =
II.getLength()->getType();
3602 unsigned Sz = NewEndOffset - NewBeginOffset;
3605 getNewAllocaSlicePtr(IRB, OldPtr->
getType()),
II.getValue(),
Size,
3606 MaybeAlign(getSliceAlign()),
II.isVolatile()));
3612 New,
New->getRawDest(),
nullptr,
DL);
3627 assert(ElementTy == ScalarTy);
3629 unsigned BeginIndex = getIndex(NewBeginOffset);
3630 unsigned EndIndex = getIndex(NewEndOffset);
3631 assert(EndIndex > BeginIndex &&
"Empty vector!");
3632 unsigned NumElements = EndIndex - BeginIndex;
3634 "Too many elements!");
3637 II.getValue(),
DL.getTypeSizeInBits(ElementTy).getFixedValue() / 8);
3639 if (NumElements > 1)
3650 uint64_t
Size = NewEndOffset - NewBeginOffset;
3651 V = getIntegerSplat(
II.getValue(),
Size);
3653 if (IntTy && (BeginOffset != NewAllocaBeginOffset ||
3654 EndOffset != NewAllocaBeginOffset)) {
3658 uint64_t
Offset = NewBeginOffset - NewAllocaBeginOffset;
3661 assert(
V->getType() == IntTy &&
3662 "Wrong type for an alloca wide integer!");
3667 assert(NewBeginOffset == NewAllocaBeginOffset);
3668 assert(NewEndOffset == NewAllocaEndOffset);
3670 V = getIntegerSplat(
II.getValue(),
3671 DL.getTypeSizeInBits(ScalarTy).getFixedValue() / 8);
3679 Value *NewPtr = getPtrToNewAI(
II.getDestAddressSpace(),
II.isVolatile());
3681 IRB.CreateAlignedStore(V, NewPtr, NewAI.
getAlign(),
II.isVolatile());
3682 New->copyMetadata(
II, {LLVMContext::MD_mem_parallel_loop_access,
3683 LLVMContext::MD_access_group});
3689 New,
New->getPointerOperand(), V,
DL);
3692 return !
II.isVolatile();
3695 bool visitMemTransferInst(MemTransferInst &
II) {
3701 AAMDNodes AATags =
II.getAAMetadata();
3703 bool IsDest = &
II.getRawDestUse() == OldUse;
3704 assert((IsDest &&
II.getRawDest() == OldPtr) ||
3705 (!IsDest &&
II.getRawSource() == OldPtr));
3707 Align SliceAlign = getSliceAlign();
3715 if (!IsSplittable) {
3716 Value *AdjustedPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
3721 DbgAssign->getAddress() ==
II.getDest())
3722 DbgAssign->replaceVariableLocationOp(
II.getDest(), AdjustedPtr);
3724 II.setDest(AdjustedPtr);
3725 II.setDestAlignment(SliceAlign);
3727 II.setSource(AdjustedPtr);
3728 II.setSourceAlignment(SliceAlign);
3732 deleteIfTriviallyDead(OldPtr);
3745 (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset ||
3754 if (EmitMemCpy && &OldAI == &NewAI) {
3756 assert(NewBeginOffset == BeginOffset);
3759 if (NewEndOffset != EndOffset)
3760 II.setLength(NewEndOffset - NewBeginOffset);
3764 Pass.DeadInsts.push_back(&
II);
3768 Value *OtherPtr = IsDest ?
II.getRawSource() :
II.getRawDest();
3769 if (AllocaInst *AI =
3771 assert(AI != &OldAI && AI != &NewAI &&
3772 "Splittable transfers cannot reach the same alloca on both ends.");
3773 Pass.Worklist.insert(AI);
3780 unsigned OffsetWidth =
DL.getIndexSizeInBits(OtherAS);
3781 APInt OtherOffset(OffsetWidth, NewBeginOffset - BeginOffset);
3783 (IsDest ?
II.getSourceAlign() :
II.getDestAlign()).valueOrOne();
3785 commonAlignment(OtherAlign, OtherOffset.zextOrTrunc(64).getZExtValue());
3793 Value *OurPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
3794 Type *SizeTy =
II.getLength()->getType();
3795 Constant *
Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset);
3797 Value *DestPtr, *SrcPtr;
3798 MaybeAlign DestAlign, SrcAlign;
3802 DestAlign = SliceAlign;
3804 SrcAlign = OtherAlign;
3807 DestAlign = OtherAlign;
3809 SrcAlign = SliceAlign;
3811 CallInst *
New = IRB.CreateMemCpy(DestPtr, DestAlign, SrcPtr, SrcAlign,
3814 New->setAAMetadata(AATags.
shift(NewBeginOffset - BeginOffset));
3819 &
II, New, DestPtr,
nullptr,
DL);
3824 SliceSize * 8, &
II, New, DestPtr,
nullptr,
DL);
3830 bool IsWholeAlloca = NewBeginOffset == NewAllocaBeginOffset &&
3831 NewEndOffset == NewAllocaEndOffset;
3832 uint64_t
Size = NewEndOffset - NewBeginOffset;
3833 unsigned BeginIndex = VecTy ? getIndex(NewBeginOffset) : 0;
3834 unsigned EndIndex = VecTy ? getIndex(NewEndOffset) : 0;
3835 unsigned NumElements = EndIndex - BeginIndex;
3836 IntegerType *SubIntTy =
3837 IntTy ? Type::getIntNTy(IntTy->
getContext(),
Size * 8) : nullptr;
3842 if (VecTy && !IsWholeAlloca) {
3843 if (NumElements == 1)
3844 OtherTy = VecTy->getElementType();
3847 }
else if (IntTy && !IsWholeAlloca) {
3850 OtherTy = NewAllocaTy;
3855 MaybeAlign SrcAlign = OtherAlign;
3856 MaybeAlign DstAlign = SliceAlign;
3864 DstPtr = getPtrToNewAI(
II.getDestAddressSpace(),
II.isVolatile());
3868 SrcPtr = getPtrToNewAI(
II.getSourceAddressSpace(),
II.isVolatile());
3872 if (VecTy && !IsWholeAlloca && !IsDest) {
3876 }
else if (IntTy && !IsWholeAlloca && !IsDest) {
3880 uint64_t
Offset = NewBeginOffset - NewAllocaBeginOffset;
3883 LoadInst *
Load = IRB.CreateAlignedLoad(OtherTy, SrcPtr, SrcAlign,
3884 II.isVolatile(),
"copyload");
3885 Load->copyMetadata(
II, {LLVMContext::MD_mem_parallel_loop_access,
3886 LLVMContext::MD_access_group});
3893 if (VecTy && !IsWholeAlloca && IsDest) {
3897 }
else if (IntTy && !IsWholeAlloca && IsDest) {
3901 uint64_t
Offset = NewBeginOffset - NewAllocaBeginOffset;
3907 IRB.CreateAlignedStore(Src, DstPtr, DstAlign,
II.isVolatile()));
3908 Store->copyMetadata(
II, {LLVMContext::MD_mem_parallel_loop_access,
3909 LLVMContext::MD_access_group});
3912 Src->getType(),
DL));
3918 Store, DstPtr, Src,
DL);
3923 &
II, Store, DstPtr, Src,
DL);
3927 return !
II.isVolatile();
3930 bool visitIntrinsicInst(IntrinsicInst &
II) {
3931 assert((
II.isLifetimeStartOrEnd() ||
II.isDroppable()) &&
3932 "Unexpected intrinsic!");
3936 Pass.DeadInsts.push_back(&
II);
3938 if (
II.isDroppable()) {
3939 assert(
II.getIntrinsicID() == Intrinsic::assume &&
"Expected assume");
3945 assert(
II.getArgOperand(0) == OldPtr);
3949 if (
II.getIntrinsicID() == Intrinsic::lifetime_start)
3950 New = IRB.CreateLifetimeStart(Ptr);
3952 New = IRB.CreateLifetimeEnd(Ptr);
3960 void fixLoadStoreAlign(Instruction &Root) {
3964 SmallPtrSet<Instruction *, 4> Visited;
3965 SmallVector<Instruction *, 4>
Uses;
3967 Uses.push_back(&Root);
3976 SI->setAlignment(std::min(
SI->getAlign(), getSliceAlign()));
3983 for (User *U :
I->users())
3986 }
while (!
Uses.empty());
3989 bool visitPHINode(PHINode &PN) {
3991 assert(BeginOffset >= NewAllocaBeginOffset &&
"PHIs are unsplittable");
3992 assert(EndOffset <= NewAllocaEndOffset &&
"PHIs are unsplittable");
3998 IRBuilderBase::InsertPointGuard Guard(IRB);
4001 OldPtr->
getParent()->getFirstInsertionPt());
4003 IRB.SetInsertPoint(OldPtr);
4004 IRB.SetCurrentDebugLocation(OldPtr->
getDebugLoc());
4006 Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
4011 deleteIfTriviallyDead(OldPtr);
4014 fixLoadStoreAlign(PN);
4023 bool visitSelectInst(SelectInst &SI) {
4025 assert((
SI.getTrueValue() == OldPtr ||
SI.getFalseValue() == OldPtr) &&
4026 "Pointer isn't an operand!");
4027 assert(BeginOffset >= NewAllocaBeginOffset &&
"Selects are unsplittable");
4028 assert(EndOffset <= NewAllocaEndOffset &&
"Selects are unsplittable");
4030 Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
4032 if (
SI.getOperand(1) == OldPtr)
4033 SI.setOperand(1, NewPtr);
4034 if (
SI.getOperand(2) == OldPtr)
4035 SI.setOperand(2, NewPtr);
4038 deleteIfTriviallyDead(OldPtr);
4041 fixLoadStoreAlign(SI);
4056class AggLoadStoreRewriter :
public InstVisitor<AggLoadStoreRewriter, bool> {
4058 friend class InstVisitor<AggLoadStoreRewriter, bool>;
4064 SmallPtrSet<User *, 8> Visited;
4071 const DataLayout &
DL;
4076 AggLoadStoreRewriter(
const DataLayout &
DL, IRBuilderTy &IRB)
4077 :
DL(
DL), IRB(IRB) {}
4081 bool rewrite(Instruction &
I) {
4085 while (!
Queue.empty()) {
4086 U =
Queue.pop_back_val();
4095 void enqueueUsers(Instruction &
I) {
4096 for (Use &U :
I.uses())
4097 if (Visited.
insert(
U.getUser()).second)
4098 Queue.push_back(&U);
4102 bool visitInstruction(Instruction &
I) {
return false; }
4105 template <
typename Derived>
class OpSplitter {
4112 SmallVector<unsigned, 4> Indices;
4130 const DataLayout &
DL;
4134 OpSplitter(Instruction *InsertionPoint,
Value *Ptr,
Type *BaseTy,
4135 Align BaseAlign,
const DataLayout &
DL, IRBuilderTy &IRB)
4136 : IRB(IRB), GEPIndices(1, IRB.getInt32(0)), Ptr(Ptr), BaseTy(BaseTy),
4137 BaseAlign(BaseAlign),
DL(
DL) {
4138 IRB.SetInsertPoint(InsertionPoint);
4155 void emitSplitOps(
Type *Ty,
Value *&Agg,
const Twine &Name) {
4157 unsigned Offset =
DL.getIndexedOffsetInType(BaseTy, GEPIndices);
4158 return static_cast<Derived *
>(
this)->emitFunc(
4163 unsigned OldSize = Indices.
size();
4165 for (
unsigned Idx = 0,
Size = ATy->getNumElements(); Idx !=
Size;
4167 assert(Indices.
size() == OldSize &&
"Did not return to the old size");
4169 GEPIndices.
push_back(IRB.getInt32(Idx));
4170 emitSplitOps(ATy->getElementType(), Agg, Name +
"." + Twine(Idx));
4178 unsigned OldSize = Indices.
size();
4180 for (
unsigned Idx = 0,
Size = STy->getNumElements(); Idx !=
Size;
4182 assert(Indices.
size() == OldSize &&
"Did not return to the old size");
4184 GEPIndices.
push_back(IRB.getInt32(Idx));
4185 emitSplitOps(STy->getElementType(Idx), Agg, Name +
"." + Twine(Idx));
4196 struct LoadOpSplitter :
public OpSplitter<LoadOpSplitter> {
4205 LoadOpSplitter(Instruction *InsertionPoint,
Value *Ptr,
Type *BaseTy,
4206 AAMDNodes AATags, Align BaseAlign,
const DataLayout &
DL,
4208 : OpSplitter<LoadOpSplitter>(InsertionPoint, Ptr, BaseTy, BaseAlign,
DL,
4214 void emitFunc(
Type *Ty,
Value *&Agg, Align Alignment,
const Twine &Name) {
4218 IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name +
".gep");
4220 IRB.CreateAlignedLoad(Ty,
GEP, Alignment, Name +
".load");
4226 Load->setAAMetadata(
4232 Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name +
".insert");
4237 void recordFakeUses(LoadInst &LI) {
4238 for (Use &U : LI.
uses())
4240 if (
II->getIntrinsicID() == Intrinsic::fake_use)
4246 void emitFakeUses() {
4247 for (Instruction *
I : FakeUses) {
4248 IRB.SetInsertPoint(
I);
4249 for (
auto *V : Components)
4250 IRB.CreateIntrinsic(Intrinsic::fake_use, {
V});
4251 I->eraseFromParent();
4256 bool visitLoadInst(LoadInst &LI) {
4265 Splitter.recordFakeUses(LI);
4268 Splitter.emitFakeUses();
4275 struct StoreOpSplitter :
public OpSplitter<StoreOpSplitter> {
4276 StoreOpSplitter(Instruction *InsertionPoint,
Value *Ptr,
Type *BaseTy,
4277 AAMDNodes AATags, StoreInst *AggStore, Align BaseAlign,
4278 const DataLayout &
DL, IRBuilderTy &IRB)
4279 : OpSplitter<StoreOpSplitter>(InsertionPoint, Ptr, BaseTy, BaseAlign,
4281 AATags(AATags), AggStore(AggStore) {}
4283 StoreInst *AggStore;
4286 void emitFunc(
Type *Ty,
Value *&Agg, Align Alignment,
const Twine &Name) {
4292 Value *ExtractValue =
4293 IRB.CreateExtractValue(Agg, Indices, Name +
".extract");
4294 Value *InBoundsGEP =
4295 IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name +
".gep");
4297 IRB.CreateAlignedStore(ExtractValue, InBoundsGEP, Alignment);
4313 uint64_t SizeInBits =
4314 DL.getTypeSizeInBits(
Store->getValueOperand()->getType());
4316 SizeInBits, AggStore, Store,
4317 Store->getPointerOperand(),
Store->getValueOperand(),
4321 "AT: unexpected debug.assign linked to store through "
4328 bool visitStoreInst(StoreInst &SI) {
4329 if (!
SI.isSimple() ||
SI.getPointerOperand() != *U)
4332 if (
V->getType()->isSingleValueType())
4337 StoreOpSplitter Splitter(&SI, *U,
V->getType(),
SI.getAAMetadata(), &SI,
4339 Splitter.emitSplitOps(
V->getType(), V,
V->getName() +
".fca");
4344 SI.eraseFromParent();
4348 bool visitBitCastInst(BitCastInst &BC) {
4353 bool visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
4363 bool unfoldGEPSelect(GetElementPtrInst &GEPI) {
4382 if (!ZI->getSrcTy()->isIntegerTy(1))
4395 dbgs() <<
" original: " << *Sel <<
"\n";
4396 dbgs() <<
" " << GEPI <<
"\n";);
4398 auto GetNewOps = [&](
Value *SelOp) {
4411 Cond =
SI->getCondition();
4412 True =
SI->getTrueValue();
4413 False =
SI->getFalseValue();
4417 Cond = Sel->getOperand(0);
4418 True = ConstantInt::get(Sel->getType(), 1);
4419 False = ConstantInt::get(Sel->getType(), 0);
4424 IRB.SetInsertPoint(&GEPI);
4428 Value *NTrue = IRB.CreateGEP(Ty, TrueOps[0],
ArrayRef(TrueOps).drop_front(),
4429 True->
getName() +
".sroa.gep", NW);
4432 IRB.CreateGEP(Ty, FalseOps[0],
ArrayRef(FalseOps).drop_front(),
4433 False->
getName() +
".sroa.gep", NW);
4435 Value *NSel = MDFrom
4436 ? IRB.CreateSelect(
Cond, NTrue, NFalse,
4437 Sel->getName() +
".sroa.sel", MDFrom)
4438 : IRB.CreateSelectWithUnknownProfile(
4440 Sel->getName() +
".sroa.sel");
4441 Visited.
erase(&GEPI);
4446 enqueueUsers(*NSelI);
4449 dbgs() <<
" " << *NFalse <<
"\n";
4450 dbgs() <<
" " << *NSel <<
"\n";);
4459 bool unfoldGEPPhi(GetElementPtrInst &GEPI) {
4464 auto IsInvalidPointerOperand = [](
Value *
V) {
4468 return !AI->isStaticAlloca();
4472 if (
any_of(
Phi->operands(), IsInvalidPointerOperand))
4487 [](
Value *V) { return isa<ConstantInt>(V); }))
4500 dbgs() <<
" original: " << *
Phi <<
"\n";
4501 dbgs() <<
" " << GEPI <<
"\n";);
4503 auto GetNewOps = [&](
Value *PhiOp) {
4513 IRB.SetInsertPoint(Phi);
4514 PHINode *NewPhi = IRB.CreatePHI(GEPI.
getType(),
Phi->getNumIncomingValues(),
4515 Phi->getName() +
".sroa.phi");
4521 for (
unsigned I = 0,
E =
Phi->getNumIncomingValues();
I !=
E; ++
I) {
4530 IRB.CreateGEP(SourceTy, NewOps[0],
ArrayRef(NewOps).drop_front(),
4536 Visited.
erase(&GEPI);
4540 enqueueUsers(*NewPhi);
4546 dbgs() <<
"\n " << *NewPhi <<
'\n');
4551 bool visitGetElementPtrInst(GetElementPtrInst &GEPI) {
4552 if (unfoldGEPSelect(GEPI))
4555 if (unfoldGEPPhi(GEPI))
4562 bool visitPHINode(PHINode &PN) {
4567 bool visitSelectInst(SelectInst &SI) {
4581 if (Ty->isSingleValueType())
4584 uint64_t AllocSize =
DL.getTypeAllocSize(Ty).getFixedValue();
4589 InnerTy = ArrTy->getElementType();
4593 InnerTy = STy->getElementType(Index);
4598 if (AllocSize >
DL.getTypeAllocSize(InnerTy).getFixedValue() ||
4599 TypeSize >
DL.getTypeSizeInBits(InnerTy).getFixedValue())
4620 if (
Offset == 0 &&
DL.getTypeAllocSize(Ty).getFixedValue() ==
Size)
4622 if (
Offset >
DL.getTypeAllocSize(Ty).getFixedValue() ||
4623 (
DL.getTypeAllocSize(Ty).getFixedValue() -
Offset) <
Size)
4630 ElementTy = AT->getElementType();
4631 TyNumElements = AT->getNumElements();
4636 ElementTy = VT->getElementType();
4637 TyNumElements = VT->getNumElements();
4639 uint64_t ElementSize =
DL.getTypeAllocSize(ElementTy).getFixedValue();
4641 if (NumSkippedElements >= TyNumElements)
4643 Offset -= NumSkippedElements * ElementSize;
4655 if (
Size == ElementSize)
4659 if (NumElements * ElementSize !=
Size)
4683 uint64_t ElementSize =
DL.getTypeAllocSize(ElementTy).getFixedValue();
4684 if (
Offset >= ElementSize)
4695 if (
Size == ElementSize)
4702 if (Index == EndIndex)
4712 assert(Index < EndIndex);
4751bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
4765 struct SplitOffsets {
4767 std::vector<uint64_t> Splits;
4769 SmallDenseMap<Instruction *, SplitOffsets, 8> SplitOffsetsMap;
4782 SmallPtrSet<LoadInst *, 8> UnsplittableLoads;
4784 LLVM_DEBUG(
dbgs() <<
" Searching for candidate loads and stores\n");
4785 for (
auto &
P : AS.partitions()) {
4786 for (Slice &S :
P) {
4788 if (!S.isSplittable() || S.endOffset() <=
P.endOffset()) {
4793 UnsplittableLoads.
insert(LI);
4796 UnsplittableLoads.
insert(LI);
4799 assert(
P.endOffset() > S.beginOffset() &&
4800 "Empty or backwards partition!");
4809 auto IsLoadSimplyStored = [](LoadInst *LI) {
4810 for (User *LU : LI->
users()) {
4812 if (!SI || !
SI->isSimple())
4817 if (!IsLoadSimplyStored(LI)) {
4818 UnsplittableLoads.
insert(LI);
4824 if (S.getUse() != &
SI->getOperandUse(
SI->getPointerOperandIndex()))
4828 if (!StoredLoad || !StoredLoad->isSimple())
4830 assert(!
SI->isVolatile() &&
"Cannot split volatile stores!");
4840 auto &
Offsets = SplitOffsetsMap[
I];
4842 "Should not have splits the first time we see an instruction!");
4844 Offsets.Splits.push_back(
P.endOffset() - S.beginOffset());
4849 for (Slice *S :
P.splitSliceTails()) {
4850 auto SplitOffsetsMapI =
4852 if (SplitOffsetsMapI == SplitOffsetsMap.
end())
4854 auto &
Offsets = SplitOffsetsMapI->second;
4858 "Cannot have an empty set of splits on the second partition!");
4860 P.beginOffset() -
Offsets.S->beginOffset() &&
4861 "Previous split does not end where this one begins!");
4865 if (S->endOffset() >
P.endOffset())
4874 llvm::erase_if(Stores, [&UnsplittableLoads, &SplitOffsetsMap](StoreInst *SI) {
4880 if (UnsplittableLoads.
count(LI))
4883 auto LoadOffsetsI = SplitOffsetsMap.
find(LI);
4884 if (LoadOffsetsI == SplitOffsetsMap.
end())
4886 auto &LoadOffsets = LoadOffsetsI->second;
4889 auto &StoreOffsets = SplitOffsetsMap[
SI];
4894 if (LoadOffsets.Splits == StoreOffsets.Splits)
4898 <<
" " << *LI <<
"\n"
4899 <<
" " << *SI <<
"\n");
4905 UnsplittableLoads.
insert(LI);
4914 return UnsplittableLoads.
count(LI);
4919 return UnsplittableLoads.
count(LI);
4929 IRBuilderTy IRB(&AI);
4936 SmallPtrSet<AllocaInst *, 4> ResplitPromotableAllocas;
4946 SmallDenseMap<LoadInst *, std::vector<LoadInst *>, 1> SplitLoadsMap;
4947 std::vector<LoadInst *> SplitLoads;
4948 const DataLayout &
DL = AI.getDataLayout();
4949 for (LoadInst *LI : Loads) {
4952 auto &
Offsets = SplitOffsetsMap[LI];
4953 unsigned SliceSize =
Offsets.S->endOffset() -
Offsets.S->beginOffset();
4955 "Load must have type size equal to store size");
4957 "Load must be >= slice size");
4959 uint64_t BaseOffset =
Offsets.S->beginOffset();
4960 assert(BaseOffset + SliceSize > BaseOffset &&
4961 "Cannot represent alloca access size using 64-bit integers!");
4964 IRB.SetInsertPoint(LI);
4968 uint64_t PartOffset = 0, PartSize =
Offsets.Splits.front();
4971 auto *PartTy = Type::getIntNTy(LI->
getContext(), PartSize * 8);
4974 LoadInst *PLoad = IRB.CreateAlignedLoad(
4977 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
4978 PartPtrTy,
BasePtr->getName() +
"."),
4981 PLoad->
copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
4982 LLVMContext::MD_access_group});
4986 SplitLoads.push_back(PLoad);
4990 Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
4994 <<
", " << NewSlices.
back().endOffset()
4995 <<
"): " << *PLoad <<
"\n");
5002 PartOffset =
Offsets.Splits[Idx];
5004 PartSize = (Idx <
Size ?
Offsets.Splits[Idx] : SliceSize) - PartOffset;
5010 bool DeferredStores =
false;
5011 for (User *LU : LI->
users()) {
5013 if (!Stores.
empty() && SplitOffsetsMap.
count(SI)) {
5014 DeferredStores =
true;
5020 Value *StoreBasePtr =
SI->getPointerOperand();
5021 IRB.SetInsertPoint(SI);
5022 AAMDNodes AATags =
SI->getAAMetadata();
5024 LLVM_DEBUG(
dbgs() <<
" Splitting store of load: " << *SI <<
"\n");
5026 for (
int Idx = 0,
Size = SplitLoads.size(); Idx <
Size; ++Idx) {
5027 LoadInst *PLoad = SplitLoads[Idx];
5028 uint64_t PartOffset = Idx == 0 ? 0 :
Offsets.Splits[Idx - 1];
5029 auto *PartPtrTy =
SI->getPointerOperandType();
5031 auto AS =
SI->getPointerAddressSpace();
5032 StoreInst *PStore = IRB.CreateAlignedStore(
5035 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
5036 PartPtrTy, StoreBasePtr->
getName() +
"."),
5039 PStore->
copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access,
5040 LLVMContext::MD_access_group,
5041 LLVMContext::MD_DIAssignID});
5046 LLVM_DEBUG(
dbgs() <<
" +" << PartOffset <<
":" << *PStore <<
"\n");
5054 ResplitPromotableAllocas.
insert(OtherAI);
5055 Worklist.insert(OtherAI);
5058 Worklist.insert(OtherAI);
5062 DeadInsts.push_back(SI);
5067 SplitLoadsMap.
insert(std::make_pair(LI, std::move(SplitLoads)));
5070 DeadInsts.push_back(LI);
5079 for (StoreInst *SI : Stores) {
5084 assert(StoreSize > 0 &&
"Cannot have a zero-sized integer store!");
5088 "Slice size should always match load size exactly!");
5089 uint64_t BaseOffset =
Offsets.S->beginOffset();
5090 assert(BaseOffset + StoreSize > BaseOffset &&
5091 "Cannot represent alloca access size using 64-bit integers!");
5099 auto SplitLoadsMapI = SplitLoadsMap.
find(LI);
5100 std::vector<LoadInst *> *SplitLoads =
nullptr;
5101 if (SplitLoadsMapI != SplitLoadsMap.
end()) {
5102 SplitLoads = &SplitLoadsMapI->second;
5104 "Too few split loads for the number of splits in the store!");
5109 uint64_t PartOffset = 0, PartSize =
Offsets.Splits.front();
5112 auto *PartTy = Type::getIntNTy(Ty->
getContext(), PartSize * 8);
5114 auto *StorePartPtrTy =
SI->getPointerOperandType();
5119 PLoad = (*SplitLoads)[Idx];
5121 IRB.SetInsertPoint(LI);
5123 PLoad = IRB.CreateAlignedLoad(
5126 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
5127 LoadPartPtrTy, LoadBasePtr->
getName() +
"."),
5130 PLoad->
copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
5131 LLVMContext::MD_access_group});
5135 IRB.SetInsertPoint(SI);
5136 auto AS =
SI->getPointerAddressSpace();
5137 StoreInst *PStore = IRB.CreateAlignedStore(
5140 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
5141 StorePartPtrTy, StoreBasePtr->
getName() +
"."),
5144 PStore->
copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access,
5145 LLVMContext::MD_access_group});
5151 Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
5155 <<
", " << NewSlices.
back().endOffset()
5156 <<
"): " << *PStore <<
"\n");
5166 PartOffset =
Offsets.Splits[Idx];
5168 PartSize = (Idx <
Size ?
Offsets.Splits[Idx] : StoreSize) - PartOffset;
5178 assert(OtherAI != &AI &&
"We can't re-split our own alloca!");
5179 ResplitPromotableAllocas.
insert(OtherAI);
5180 Worklist.insert(OtherAI);
5183 assert(OtherAI != &AI &&
"We can't re-split our own alloca!");
5184 Worklist.insert(OtherAI);
5199 DeadInsts.push_back(LI);
5201 DeadInsts.push_back(SI);
5210 AS.insert(NewSlices);
5214 for (
auto I = AS.begin(),
E = AS.end();
I !=
E; ++
I)
5220 PromotableAllocas.set_subtract(ResplitPromotableAllocas);
5235AllocaInst *SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS,
5240 Type *SliceTy =
nullptr;
5241 const DataLayout &
DL = AI.getDataLayout();
5242 unsigned VScale = AI.getFunction()->getVScaleValue();
5244 std::pair<Type *, IntegerType *> CommonUseTy =
5247 if (CommonUseTy.first) {
5248 TypeSize CommonUseSize =
DL.getTypeAllocSize(CommonUseTy.first);
5250 SliceTy = CommonUseTy.first;
5255 P.beginOffset(),
P.
size()))
5256 SliceTy = TypePartitionTy;
5259 if (!SliceTy && CommonUseTy.second)
5260 if (
DL.getTypeAllocSize(CommonUseTy.second).getFixedValue() >=
P.
size())
5261 SliceTy = CommonUseTy.second;
5262 if ((!SliceTy || (SliceTy->
isArrayTy() &&
5264 DL.isLegalInteger(
P.size() * 8)) {
5265 SliceTy = Type::getIntNTy(*
C,
P.size() * 8);
5269 SliceTy = ArrayType::get(Type::getInt8Ty(*
C),
P.size());
5270 assert(
DL.getTypeAllocSize(SliceTy).getFixedValue() >=
P.
size());
5286 if (SliceTy == AI.getAllocatedType() &&
P.beginOffset() == 0) {
5296 const bool IsUnconstrained = Alignment <=
DL.getABITypeAlign(SliceTy);
5297 NewAI =
new AllocaInst(
5298 SliceTy, AI.getAddressSpace(),
nullptr,
5299 IsUnconstrained ?
DL.getPrefTypeAlign(SliceTy) : Alignment,
5300 AI.
getName() +
".sroa." + Twine(
P.begin() - AS.begin()),
5307 LLVM_DEBUG(
dbgs() <<
"Rewriting alloca partition " <<
"[" <<
P.beginOffset()
5308 <<
"," <<
P.endOffset() <<
") to: " << *NewAI <<
"\n");
5313 unsigned PPWOldSize = PostPromotionWorklist.size();
5314 unsigned NumUses = 0;
5315 SmallSetVector<PHINode *, 8> PHIUsers;
5316 SmallSetVector<SelectInst *, 8> SelectUsers;
5318 AllocaSliceRewriter
Rewriter(
DL, AS, *
this, AI, *NewAI,
P.beginOffset(),
5319 P.endOffset(), IsIntegerPromotable, VecTy,
5320 PHIUsers, SelectUsers);
5321 bool Promotable =
true;
5323 if (
auto DeletedValues =
Rewriter.rewriteTreeStructuredMerge(
P)) {
5324 NumUses += DeletedValues->
size() + 1;
5325 for (
Value *V : *DeletedValues)
5326 DeadInsts.push_back(V);
5328 for (Slice *S :
P.splitSliceTails()) {
5332 for (Slice &S :
P) {
5338 NumAllocaPartitionUses += NumUses;
5339 MaxUsesPerAllocaPartition.updateMax(NumUses);
5343 for (PHINode *
PHI : PHIUsers)
5347 SelectUsers.
clear();
5352 NewSelectsToRewrite;
5354 for (SelectInst *Sel : SelectUsers) {
5355 std::optional<RewriteableMemOps>
Ops =
5360 SelectUsers.clear();
5361 NewSelectsToRewrite.
clear();
5368 for (Use *U : AS.getDeadUsesIfPromotable()) {
5370 Value::dropDroppableUse(*U);
5373 DeadInsts.push_back(OldInst);
5375 if (PHIUsers.empty() && SelectUsers.empty()) {
5377 PromotableAllocas.insert(NewAI);
5382 SpeculatablePHIs.insert_range(PHIUsers);
5383 SelectsToRewrite.reserve(SelectsToRewrite.size() +
5384 NewSelectsToRewrite.
size());
5386 std::make_move_iterator(NewSelectsToRewrite.
begin()),
5387 std::make_move_iterator(NewSelectsToRewrite.
end())))
5388 SelectsToRewrite.insert(std::move(KV));
5389 Worklist.insert(NewAI);
5393 while (PostPromotionWorklist.size() > PPWOldSize)
5394 PostPromotionWorklist.pop_back();
5404 Worklist.insert(NewAI);
5451 int64_t BitExtractOffset) {
5453 bool HasFragment =
false;
5454 bool HasBitExtract =
false;
5463 HasBitExtract =
true;
5464 int64_t ExtractOffsetInBits =
Op.getArg(0);
5465 int64_t ExtractSizeInBits =
Op.getArg(1);
5474 assert(BitExtractOffset <= 0);
5475 int64_t AdjustedOffset = ExtractOffsetInBits + BitExtractOffset;
5481 if (AdjustedOffset < 0)
5484 Ops.push_back(
Op.getOp());
5485 Ops.push_back(std::max<int64_t>(0, AdjustedOffset));
5486 Ops.push_back(ExtractSizeInBits);
5489 Op.appendToVector(
Ops);
5494 if (HasFragment && HasBitExtract)
5497 if (!HasBitExtract) {
5516 std::optional<DIExpression::FragmentInfo> NewFragment,
5517 int64_t BitExtractAdjustment) {
5527 BitExtractAdjustment);
5528 if (!NewFragmentExpr)
5534 BeforeInst->
getParent()->insertDbgRecordBefore(DVR,
5547 BeforeInst->
getParent()->insertDbgRecordBefore(DVR,
5553 if (!NewAddr->
hasMetadata(LLVMContext::MD_DIAssignID)) {
5561 LLVM_DEBUG(
dbgs() <<
"Created new DVRAssign: " << *NewAssign <<
"\n");
5567bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
5568 if (AS.begin() == AS.end())
5571 unsigned NumPartitions = 0;
5573 const DataLayout &
DL = AI.getModule()->getDataLayout();
5576 Changed |= presplitLoadsAndStores(AI, AS);
5584 bool IsSorted =
true;
5586 uint64_t AllocaSize =
5587 DL.getTypeAllocSize(AI.getAllocatedType()).getFixedValue();
5588 const uint64_t MaxBitVectorSize = 1024;
5589 if (AllocaSize <= MaxBitVectorSize) {
5592 SmallBitVector SplittableOffset(AllocaSize + 1,
true);
5594 for (
unsigned O = S.beginOffset() + 1;
5595 O < S.endOffset() && O < AllocaSize; O++)
5596 SplittableOffset.reset(O);
5598 for (Slice &S : AS) {
5599 if (!S.isSplittable())
5602 if ((S.beginOffset() > AllocaSize || SplittableOffset[S.beginOffset()]) &&
5603 (S.endOffset() > AllocaSize || SplittableOffset[S.endOffset()]))
5608 S.makeUnsplittable();
5615 for (Slice &S : AS) {
5616 if (!S.isSplittable())
5619 if (S.beginOffset() == 0 && S.endOffset() >= AllocaSize)
5624 S.makeUnsplittable();
5639 Fragment(AllocaInst *AI, uint64_t O, uint64_t S)
5645 for (
auto &
P : AS.partitions()) {
5646 if (AllocaInst *NewAI = rewritePartition(AI, AS, P)) {
5649 uint64_t SizeOfByte = 8;
5650 uint64_t AllocaSize =
5651 DL.getTypeSizeInBits(NewAI->getAllocatedType()).getFixedValue();
5653 uint64_t Size = std::min(AllocaSize, P.size() * SizeOfByte);
5654 Fragments.push_back(
5655 Fragment(NewAI, P.beginOffset() * SizeOfByte, Size));
5661 NumAllocaPartitions += NumPartitions;
5662 MaxPartitionsPerAlloca.updateMax(NumPartitions);
5666 auto MigrateOne = [&](DbgVariableRecord *DbgVariable) {
5671 const Value *DbgPtr = DbgVariable->getAddress();
5673 DbgVariable->getFragmentOrEntireVariable();
5676 int64_t CurrentExprOffsetInBytes = 0;
5677 SmallVector<uint64_t> PostOffsetOps;
5679 ->extractLeadingOffset(CurrentExprOffsetInBytes, PostOffsetOps))
5683 int64_t ExtractOffsetInBits = 0;
5687 ExtractOffsetInBits =
Op.getArg(0);
5692 DIBuilder DIB(*AI.getModule(),
false);
5693 for (
auto Fragment : Fragments) {
5694 int64_t OffsetFromLocationInBits;
5695 std::optional<DIExpression::FragmentInfo> NewDbgFragment;
5700 DL, &AI, Fragment.Offset, Fragment.Size, DbgPtr,
5701 CurrentExprOffsetInBytes * 8, ExtractOffsetInBits, VarFrag,
5702 NewDbgFragment, OffsetFromLocationInBits))
5708 if (NewDbgFragment && !NewDbgFragment->SizeInBits)
5713 if (!NewDbgFragment)
5714 NewDbgFragment = DbgVariable->getFragment();
5718 int64_t OffestFromNewAllocaInBits =
5719 OffsetFromLocationInBits - ExtractOffsetInBits;
5722 int64_t BitExtractOffset =
5723 std::min<int64_t>(0, OffestFromNewAllocaInBits);
5728 OffestFromNewAllocaInBits =
5729 std::max(int64_t(0), OffestFromNewAllocaInBits);
5735 DIExpression *NewExpr = DIExpression::get(AI.getContext(), PostOffsetOps);
5736 if (OffestFromNewAllocaInBits > 0) {
5737 int64_t OffsetInBytes = (OffestFromNewAllocaInBits + 7) / 8;
5743 auto RemoveOne = [DbgVariable](
auto *OldDII) {
5744 auto SameVariableFragment = [](
const auto *
LHS,
const auto *
RHS) {
5745 return LHS->getVariable() ==
RHS->getVariable() &&
5746 LHS->getDebugLoc()->getInlinedAt() ==
5747 RHS->getDebugLoc()->getInlinedAt();
5749 if (SameVariableFragment(OldDII, DbgVariable))
5750 OldDII->eraseFromParent();
5755 NewDbgFragment, BitExtractOffset);
5769void SROA::clobberUse(Use &U) {
5779 DeadInsts.push_back(OldI);
5801bool SROA::propagateStoredValuesToLoads(AllocaInst &AI, AllocaSlices &AS) {
5806 LLVM_DEBUG(
dbgs() <<
"Attempting to propagate values on " << AI <<
"\n");
5807 bool AllSameAndValid =
true;
5808 Type *PartitionType =
nullptr;
5810 uint64_t BeginOffset = 0;
5811 uint64_t EndOffset = 0;
5813 auto Flush = [&]() {
5814 if (AllSameAndValid && !Insts.
empty()) {
5815 LLVM_DEBUG(
dbgs() <<
"Propagate values on slice [" << BeginOffset <<
", "
5816 << EndOffset <<
")\n");
5818 SSAUpdater
SSA(&NewPHIs);
5820 BasicLoadAndStorePromoter Promoter(Insts,
SSA, PartitionType);
5821 Promoter.run(Insts);
5823 AllSameAndValid =
true;
5824 PartitionType =
nullptr;
5828 for (Slice &S : AS) {
5832 dbgs() <<
"Ignoring slice: ";
5833 AS.print(
dbgs(), &S);
5837 if (S.beginOffset() >= EndOffset) {
5839 BeginOffset = S.beginOffset();
5840 EndOffset = S.endOffset();
5841 }
else if (S.beginOffset() != BeginOffset || S.endOffset() != EndOffset) {
5842 if (AllSameAndValid) {
5844 dbgs() <<
"Slice does not match range [" << BeginOffset <<
", "
5845 << EndOffset <<
")";
5846 AS.print(
dbgs(), &S);
5848 AllSameAndValid =
false;
5850 EndOffset = std::max(EndOffset, S.endOffset());
5857 if (!LI->
isSimple() || (PartitionType && UserTy != PartitionType))
5858 AllSameAndValid =
false;
5859 PartitionType = UserTy;
5862 Type *UserTy =
SI->getValueOperand()->getType();
5863 if (!
SI->isSimple() || (PartitionType && UserTy != PartitionType))
5864 AllSameAndValid =
false;
5865 PartitionType = UserTy;
5868 AllSameAndValid =
false;
5881std::pair<
bool ,
bool >
5882SROA::runOnAlloca(AllocaInst &AI) {
5884 bool CFGChanged =
false;
5887 ++NumAllocasAnalyzed;
5890 if (AI.use_empty()) {
5891 AI.eraseFromParent();
5895 const DataLayout &
DL = AI.getDataLayout();
5898 auto *AT = AI.getAllocatedType();
5899 TypeSize
Size =
DL.getTypeAllocSize(AT);
5900 if (AI.isArrayAllocation() || !AT->isSized() ||
Size.isScalable() ||
5901 Size.getFixedValue() == 0)
5906 IRBuilderTy IRB(&AI);
5907 AggLoadStoreRewriter AggRewriter(
DL, IRB);
5908 Changed |= AggRewriter.rewrite(AI);
5911 AllocaSlices AS(
DL, AI);
5916 if (AS.isEscapedReadOnly()) {
5917 Changed |= propagateStoredValuesToLoads(AI, AS);
5921 for (
auto &
P : AS.partitions()) {
5927 std::optional<Value *> ProtectedFieldDisc;
5928 auto SliceHasMismatch = [&](Slice &S) {
5930 if (
II->getIntrinsicID() == Intrinsic::lifetime_start ||
5931 II->getIntrinsicID() == Intrinsic::lifetime_end)
5933 if (!ProtectedFieldDisc)
5934 ProtectedFieldDisc = S.ProtectedFieldDisc;
5935 return *ProtectedFieldDisc != S.ProtectedFieldDisc;
5938 if (SliceHasMismatch(S))
5940 for (Slice *S :
P.splitSliceTails())
5941 if (SliceHasMismatch(*S))
5946 for (Instruction *DeadUser : AS.getDeadUsers()) {
5948 for (Use &DeadOp : DeadUser->operands())
5955 DeadInsts.push_back(DeadUser);
5958 for (Use *DeadOp : AS.getDeadOperands()) {
5959 clobberUse(*DeadOp);
5962 for (IntrinsicInst *PFPUser : AS.getPFPUsers()) {
5963 PFPUser->replaceAllUsesWith(PFPUser->getArgOperand(0));
5965 DeadInsts.push_back(PFPUser);
5970 if (AS.begin() == AS.end())
5973 Changed |= splitAlloca(AI, AS);
5976 while (!SpeculatablePHIs.empty())
5980 auto RemainingSelectsToRewrite = SelectsToRewrite.takeVector();
5981 while (!RemainingSelectsToRewrite.empty()) {
5982 const auto [
K,
V] = RemainingSelectsToRewrite.pop_back_val();
5999bool SROA::deleteDeadInstructions(
6000 SmallPtrSetImpl<AllocaInst *> &DeletedAllocas) {
6002 while (!DeadInsts.empty()) {
6012 DeletedAllocas.
insert(AI);
6014 OldDII->eraseFromParent();
6020 for (Use &Operand :
I->operands())
6025 DeadInsts.push_back(U);
6029 I->eraseFromParent();
6039bool SROA::promoteAllocas() {
6040 if (PromotableAllocas.empty())
6047 NumPromoted += PromotableAllocas.size();
6048 PromoteMemToReg(PromotableAllocas.getArrayRef(), DTU->getDomTree(), AC);
6051 PromotableAllocas.clear();
6055std::pair<
bool ,
bool > SROA::runSROA(Function &
F) {
6058 const DataLayout &
DL =
F.getDataLayout();
6063 if (
DL.getTypeAllocSize(AI->getAllocatedType()).isScalable() &&
6065 PromotableAllocas.insert(AI);
6067 Worklist.insert(AI);
6072 bool CFGChanged =
false;
6075 SmallPtrSet<AllocaInst *, 4> DeletedAllocas;
6078 while (!Worklist.empty()) {
6079 auto [IterationChanged, IterationCFGChanged] =
6080 runOnAlloca(*Worklist.pop_back_val());
6082 CFGChanged |= IterationCFGChanged;
6084 Changed |= deleteDeadInstructions(DeletedAllocas);
6088 if (!DeletedAllocas.
empty()) {
6089 Worklist.set_subtract(DeletedAllocas);
6090 PostPromotionWorklist.set_subtract(DeletedAllocas);
6091 PromotableAllocas.set_subtract(DeletedAllocas);
6092 DeletedAllocas.
clear();
6098 Worklist = PostPromotionWorklist;
6099 PostPromotionWorklist.clear();
6100 }
while (!Worklist.empty());
6102 assert((!CFGChanged ||
Changed) &&
"Can not only modify the CFG.");
6104 "Should not have modified the CFG when told to preserve it.");
6107 for (
auto &BB :
F) {
6120 SROA(&
F.getContext(), &DTU, &AC, PreserveCFG).runSROA(
F);
6133 OS, MapClassName2PassName);
6155 if (skipFunction(
F))
6158 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
6160 getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
F);
6167 void getAnalysisUsage(AnalysisUsage &AU)
const override {
6174 StringRef getPassName()
const override {
return "SROA"; }
6179char SROALegacyPass::ID = 0;
6187 "Scalar Replacement Of Aggregates",
false,
false)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
static bool runOnFunction(Function &F, bool PostInlining)
This is the interface for a simple mod/ref and alias analysis over globals.
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
This defines the Use class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
print mir2vec MIR2Vec Vocabulary Printer Pass
This file implements a map that provides insertion order iteration.
uint64_t IntrinsicInst * II
PassBuilder PB(Machine, PassOpts->PTO, std::nullopt, &PIC)
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the PointerIntPair class.
This file provides a collection of visitors which walk the (instruction) uses of a pointer.
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static unsigned getNumElements(Type *Ty)
bool isDead(const MachineInstr &MI, const MachineRegisterInfo &MRI)
void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)
static void migrateDebugInfo(AllocaInst *OldAlloca, bool IsSplit, uint64_t OldAllocaOffsetInBits, uint64_t SliceSizeInBits, Instruction *OldInst, Instruction *Inst, Value *Dest, Value *Value, const DataLayout &DL)
Find linked dbg.assign and generate a new one with the correct FragmentInfo.
static VectorType * isVectorPromotionViable(Partition &P, const DataLayout &DL, unsigned VScale)
Test whether the given alloca partitioning and range of slices can be promoted to a vector.
static Align getAdjustedAlignment(Instruction *I, uint64_t Offset)
Compute the adjusted alignment for a load or store from an offset.
static VectorType * checkVectorTypesForPromotion(Partition &P, const DataLayout &DL, SmallVectorImpl< VectorType * > &CandidateTys, bool HaveCommonEltTy, Type *CommonEltTy, bool HaveVecPtrTy, bool HaveCommonVecPtrTy, VectorType *CommonVecPtrTy, unsigned VScale)
Test whether any vector type in CandidateTys is viable for promotion.
static std::pair< Type *, IntegerType * > findCommonType(AllocaSlices::const_iterator B, AllocaSlices::const_iterator E, uint64_t EndOffset)
Walk the range of a partitioning looking for a common type to cover this sequence of slices.
static Type * stripAggregateTypeWrapping(const DataLayout &DL, Type *Ty)
Strip aggregate type wrapping.
static FragCalcResult calculateFragment(DILocalVariable *Variable, uint64_t NewStorageSliceOffsetInBits, uint64_t NewStorageSliceSizeInBits, std::optional< DIExpression::FragmentInfo > StorageFragment, std::optional< DIExpression::FragmentInfo > CurrentFragment, DIExpression::FragmentInfo &Target)
static DIExpression * createOrReplaceFragment(const DIExpression *Expr, DIExpression::FragmentInfo Frag, int64_t BitExtractOffset)
Create or replace an existing fragment in a DIExpression with Frag.
static Value * insertInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *Old, Value *V, uint64_t Offset, const Twine &Name)
static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S, VectorType *Ty, uint64_t ElementSize, const DataLayout &DL, unsigned VScale)
Test whether the given slice use can be promoted to a vector.
static Value * getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr, APInt Offset, Type *PointerTy, const Twine &NamePrefix)
Compute an adjusted pointer from Ptr by Offset bytes where the resulting pointer has PointerTy.
static bool isIntegerWideningViableForSlice(const Slice &S, uint64_t AllocBeginOffset, Type *AllocaTy, const DataLayout &DL, bool &WholeAllocaOp)
Test whether a slice of an alloca is valid for integer widening.
static Value * extractVector(IRBuilderTy &IRB, Value *V, unsigned BeginIndex, unsigned EndIndex, const Twine &Name)
static Value * foldPHINodeOrSelectInst(Instruction &I)
A helper that folds a PHI node or a select.
static bool rewriteSelectInstMemOps(SelectInst &SI, const RewriteableMemOps &Ops, IRBuilderTy &IRB, DomTreeUpdater *DTU)
static void rewriteMemOpOfSelect(SelectInst &SI, T &I, SelectHandSpeculativity Spec, DomTreeUpdater &DTU)
static Value * foldSelectInst(SelectInst &SI)
bool isKillAddress(const DbgVariableRecord *DVR)
static Value * insertVector(IRBuilderTy &IRB, Value *Old, Value *V, unsigned BeginIndex, const Twine &Name)
static bool isIntegerWideningViable(Partition &P, Type *AllocaTy, const DataLayout &DL)
Test whether the given alloca partition's integer operations can be widened to promotable ones.
static void speculatePHINodeLoads(IRBuilderTy &IRB, PHINode &PN)
static VectorType * createAndCheckVectorTypesForPromotion(SetVector< Type * > &OtherTys, ArrayRef< VectorType * > CandidateTysCopy, function_ref< void(Type *)> CheckCandidateType, Partition &P, const DataLayout &DL, SmallVectorImpl< VectorType * > &CandidateTys, bool &HaveCommonEltTy, Type *&CommonEltTy, bool &HaveVecPtrTy, bool &HaveCommonVecPtrTy, VectorType *&CommonVecPtrTy, unsigned VScale)
static DebugVariable getAggregateVariable(DbgVariableRecord *DVR)
static bool isSafePHIToSpeculate(PHINode &PN)
PHI instructions that use an alloca and are subsequently loaded can be rewritten to load both input p...
static Value * extractInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *V, IntegerType *Ty, uint64_t Offset, const Twine &Name)
static void insertNewDbgInst(DIBuilder &DIB, DbgVariableRecord *Orig, AllocaInst *NewAddr, DIExpression *NewAddrExpr, Instruction *BeforeInst, std::optional< DIExpression::FragmentInfo > NewFragment, int64_t BitExtractAdjustment)
Insert a new DbgRecord.
static void speculateSelectInstLoads(SelectInst &SI, LoadInst &LI, IRBuilderTy &IRB)
static Value * mergeTwoVectors(Value *V0, Value *V1, const DataLayout &DL, Type *NewAIEltTy, IRBuilder<> &Builder)
This function takes two vector values and combines them into a single vector by concatenating their e...
const DIExpression * getAddressExpression(const DbgVariableRecord *DVR)
static Type * getTypePartition(const DataLayout &DL, Type *Ty, uint64_t Offset, uint64_t Size)
Try to find a partition of the aggregate type passed in for a given offset and size.
static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy, unsigned VScale=0)
Test whether we can convert a value from the old to the new type.
static SelectHandSpeculativity isSafeLoadOfSelectToSpeculate(LoadInst &LI, SelectInst &SI, bool PreserveCFG)
static Value * convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V, Type *NewTy)
Generic routine to convert an SSA value to a value of a different type.
This file provides the interface for LLVM's Scalar Replacement of Aggregates pass.
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static SymbolRef::Type getType(const Symbol *Sym)
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
Virtual Register Rewriter
Builder for the alloca slices.
SliceBuilder(const DataLayout &DL, AllocaInst &AI, AllocaSlices &AS)
An iterator over partitions of the alloca's slices.
bool operator==(const partition_iterator &RHS) const
friend class AllocaSlices
partition_iterator & operator++()
Class for arbitrary precision integers.
an instruction to allocate memory on the stack
LLVM_ABI bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
PointerType * getType() const
Overload to return most specific pointer type.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
const Function * getParent() const
Return the enclosing method, or null if none.
InstListType::iterator iterator
Instruction iterators...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Represents analyses that only rely on functions' control flow.
LLVM_ABI CaptureInfo getCaptureInfo(unsigned OpNo) const
Return which pointer components this operand may capture.
bool onlyReadsMemory(unsigned OpNo) const
bool isDataOperand(const Use *U) const
This is the shared class of boolean and integer constants.
static LLVM_ABI Constant * get(ArrayRef< Constant * > V)
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static DIAssignID * getDistinct(LLVMContext &Context)
LLVM_ABI DbgInstPtr insertDbgAssign(Instruction *LinkedInstr, Value *Val, DILocalVariable *SrcVar, DIExpression *ValExpr, Value *Addr, DIExpression *AddrExpr, const DILocation *DL)
Insert a new llvm.dbg.assign intrinsic call.
iterator_range< expr_op_iterator > expr_ops() const
DbgVariableFragmentInfo FragmentInfo
LLVM_ABI bool startsWithDeref() const
Return whether the first element a DW_OP_deref.
static LLVM_ABI bool calculateFragmentIntersect(const DataLayout &DL, const Value *SliceStart, uint64_t SliceOffsetInBits, uint64_t SliceSizeInBits, const Value *DbgPtr, int64_t DbgPtrOffsetInBits, int64_t DbgExtractOffsetInBits, DIExpression::FragmentInfo VarFrag, std::optional< DIExpression::FragmentInfo > &Result, int64_t &OffsetFromLocationInBits)
Computes a fragment, bit-extract operation if needed, and new constant offset to describe a part of a...
static LLVM_ABI std::optional< DIExpression * > createFragmentExpression(const DIExpression *Expr, unsigned OffsetInBits, unsigned SizeInBits)
Create a DIExpression to describe one part of an aggregate variable that is fragmented across multipl...
static LLVM_ABI DIExpression * prepend(const DIExpression *Expr, uint8_t Flags, int64_t Offset=0)
Prepend DIExpr with a deref and offset operation and optionally turn it into a stack value or/and an ...
A parsed version of the target data layout string in and methods for querying it.
LLVM_ABI void moveBefore(DbgRecord *MoveBefore)
DebugLoc getDebugLoc() const
void setDebugLoc(DebugLoc Loc)
Record of a variable value-assignment, aka a non instruction representation of the dbg....
LLVM_ABI void setKillAddress()
Kill the address component.
LLVM_ABI bool isKillLocation() const
LocationType getType() const
LLVM_ABI bool isKillAddress() const
Check whether this kills the address component.
LLVM_ABI void replaceVariableLocationOp(Value *OldValue, Value *NewValue, bool AllowEmpty=false)
Value * getValue(unsigned OpIdx=0) const
static LLVM_ABI DbgVariableRecord * createLinkedDVRAssign(Instruction *LinkedInstr, Value *Val, DILocalVariable *Variable, DIExpression *Expression, Value *Address, DIExpression *AddressExpression, const DILocation *DI)
LLVM_ABI void setAssignId(DIAssignID *New)
DIExpression * getExpression() const
static LLVM_ABI DbgVariableRecord * createDVRDeclare(Value *Address, DILocalVariable *DV, DIExpression *Expr, const DILocation *DI)
static LLVM_ABI DbgVariableRecord * createDbgVariableRecord(Value *Location, DILocalVariable *DV, DIExpression *Expr, const DILocation *DI)
DILocalVariable * getVariable() const
LLVM_ABI void setKillLocation()
bool isDbgDeclare() const
void setAddress(Value *V)
DIExpression * getAddressExpression() const
LLVM_ABI DILocation * getInlinedAt() const
Identifies a unique instance of a variable.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Analysis pass which computes a DominatorTree.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Class to represent fixed width SIMD vectors.
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
FunctionPass class - This class is used to implement most global optimizations.
unsigned getVScaleValue() const
Return the value for vscale based on the vscale_range attribute or 0 when unknown.
const BasicBlock & getEntryBlock() const
LLVM_ABI bool accumulateConstantOffset(const DataLayout &DL, APInt &Offset, function_ref< bool(Value &, APInt &)> ExternalAnalysis=nullptr) const
Accumulate the constant address offset of this GEP if possible.
Value * getPointerOperand()
iterator_range< op_iterator > indices()
Type * getSourceElementType() const
LLVM_ABI GEPNoWrapFlags getNoWrapFlags() const
Get the nowrap flags for the GEP instruction.
This provides the default implementation of the IRBuilder 'InsertHelper' method that is called whenev...
virtual void InsertHelper(Instruction *I, const Twine &Name, BasicBlock::iterator InsertPt) const
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Base class for instruction visitors.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
bool hasMetadata() const
Return true if this instruction has any metadata attached to it.
LLVM_ABI bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Class to represent integer types.
@ MAX_INT_BITS
Maximum number of bits that can be specified.
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
An instruction for reading from memory.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
void setAlignment(Align Align)
Value * getPointerOperand()
bool isVolatile() const
Return true if this is a load from a volatile memory location.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Type * getPointerOperandType() const
static unsigned getPointerOperandIndex()
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Align getAlign() const
Return the alignment of the access that is being performed.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
LLVMContext & getContext() const
LLVM_ABI StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
This is the common base class for memset/memcpy/memmove.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
PointerIntPair - This class implements a pair of a pointer and small integer.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
PreservedAnalyses & preserve()
Mark an analysis as preserved.
PtrUseVisitor(const DataLayout &DL)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Run the pass over the function.
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
SROAPass(SROAOptions PreserveCFG)
If PreserveCFG is set, then the pass is not allowed to modify CFG in any way, even if it would update...
Helper class for SSA formation on a set of values defined in multiple blocks.
This class represents the LLVM 'select' instruction.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
void clear()
Completely clear the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
typename SuperClass::const_iterator const_iterator
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAlignment(Align Align)
Value * getValueOperand()
static unsigned getPointerOperandIndex()
Value * getPointerOperand()
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
StringRef - Represent a constant reference to a string, i.e.
static constexpr size_t npos
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
size_t rfind(char C, size_t From=npos) const
Search for the last character C in the string.
size_t find(char C, size_t From=0) const
Search for the first character C in the string.
LLVM_ABI size_t find_first_not_of(char C, size_t From=0) const
Find the first character in the string that is not C or npos if not found.
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
TypeSize getSizeInBytes() const
LLVM_ABI unsigned getElementContainingOffset(uint64_t FixedOffset) const
Given a valid byte offset into the structure, returns the structure index that contains it.
TypeSize getElementOffset(unsigned Idx) const
TypeSize getSizeInBits() const
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
element_iterator element_end() const
element_iterator element_begin() const
Type * getElementType(unsigned N) const
Type::subtype_iterator element_iterator
Target - Wrapper for Target specific information.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM_ABI unsigned getIntegerBitWidth() const
bool isArrayTy() const
True if this is an instance of ArrayType.
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
bool isPointerTy() const
True if this is an instance of PointerType.
Type * getArrayElementType() const
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
bool isStructTy() const
True if this is an instance of StructType.
bool isTargetExtTy() const
Return true if this is a target extension type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
const Use & getOperandUse(unsigned i) const
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI const Value * stripInBoundsOffsets(function_ref< void(const Value *)> Func=[](const Value *) {}) const
Strip off pointer casts and inbounds GEPs.
iterator_range< user_iterator > users()
LLVM_ABI void dropDroppableUsesIn(User &Usr)
Remove every use of this value in User that can safely be removed.
LLVM_ABI const Value * stripAndAccumulateConstantOffsets(const DataLayout &DL, APInt &Offset, bool AllowNonInbounds, bool AllowInvariantGroup=false, function_ref< bool(Value &Value, APInt &Offset)> ExternalAnalysis=nullptr, bool LookThroughIntToPtr=false) const
Accumulate the constant offset this value has compared to a base pointer.
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
iterator_range< use_iterator > uses()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
static VectorType * getWithSizeAndScalar(VectorType *SizeTy, Type *EltTy)
This static method attempts to construct a VectorType with the same size-in-bits as SizeTy but with a...
static LLVM_ABI bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
CRTP base class which implements the entire standard iterator facade in terms of a minimal subset of ...
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Return a range of dbg_assign records for which Inst performs the assignment they encode.
LLVM_ABI void deleteAssignmentMarkers(const Instruction *Inst)
Delete the llvm.dbg.assign intrinsics linked to Inst.
initializer< Ty > init(const Ty &Val)
@ DW_OP_LLVM_extract_bits_zext
Only used in LLVM metadata.
@ DW_OP_LLVM_fragment
Only used in LLVM metadata.
@ DW_OP_LLVM_extract_bits_sext
Only used in LLVM metadata.
@ User
could "use" a pointer
NodeAddr< PhiNode * > Phi
NodeAddr< UseNode * > Use
Context & getContext() const
friend class Instruction
Iterator for Instructions in a `BasicBlock.
LLVM_ABI iterator begin() const
This is an optimization pass for GlobalISel generic memory operations.
static cl::opt< bool > SROASkipMem2Reg("sroa-skip-mem2reg", cl::init(false), cl::Hidden)
Disable running mem2reg during SROA in order to test or debug SROA.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
bool operator<(int64_t V1, const APSInt &V2)
FunctionAddr VTableAddr Value
void stable_sort(R &&Range)
LLVM_ABI bool RemoveRedundantDbgInstrs(BasicBlock *BB)
Try to remove redundant dbg.value instructions from given basic block.
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI void PromoteMemToReg(ArrayRef< AllocaInst * > Allocas, DominatorTree &DT, AssumptionCache *AC=nullptr)
Promote the specified list of alloca instructions into scalar registers, inserting PHI nodes as appro...
LLVM_ABI bool isAssumeLikeIntrinsic(const Instruction *I)
Return true if it is an intrinsic that cannot be speculated but also cannot trap.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
auto successors(const MachineBasicBlock *BB)
bool operator!=(uint64_t V1, const APInt &V2)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI std::optional< RegOrConstant > getVectorSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI)
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
auto unique(Range &&R, Predicate P)
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
LLVM_ABI bool isAllocaPromotable(const AllocaInst *AI)
Return true if this alloca is legal for promotion.
auto dyn_cast_or_null(const Y &Val)
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
bool capturesFullProvenance(CaptureComponents CC)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
LLVM_ABI bool isSafeToLoadUnconditionally(Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, Instruction *ScanFrom, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI void initializeSROALegacyPassPass(PassRegistry &)
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
LLVM_ABI TinyPtrVector< DbgVariableRecord * > findDVRValues(Value *V)
As above, for DVRValues.
LLVM_ABI void llvm_unreachable_internal(const char *msg=nullptr, const char *file=nullptr, unsigned line=0)
This function calls abort(), and prints the optional message to stderr.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
constexpr int PoisonMaskElem
iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI bool isAssignmentTrackingEnabled(const Module &M)
Return true if assignment tracking is enabled for module M.
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
LLVM_ABI TinyPtrVector< DbgVariableRecord * > findDVRDeclares(Value *V)
Finds dbg.declare records declaring local variables as living in the memory that 'V' points to.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
cl::opt< bool > ProfcheckDisableMetadataFixes("profcheck-disable-metadata-fixes", cl::Hidden, cl::init(false), cl::desc("Disable metadata propagation fixes discovered through Issue #147390"))
LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI FunctionPass * createSROAPass(bool PreserveCFG=true)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
AAMDNodes shift(size_t Offset) const
Create a new AAMDNode that describes this AAMDNode after applying a constant offset to the start of t...
LLVM_ABI AAMDNodes adjustForAccess(unsigned AccessSize)
Create a new AAMDNode for accessing AccessSize bytes of this AAMDNode.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Describes an element of a Bitfield.
static Bitfield::Type get(StorageType Packed)
Unpacks the field from the Packed value.
static void set(StorageType &Packed, typename Bitfield::Type Value)
Sets the typed value in the provided Packed value.
A CRTP mix-in to automatically provide informational APIs needed for passes.