47#include "llvm/Config/llvm-config.h"
104#define DEBUG_TYPE "sroa"
106STATISTIC(NumAllocasAnalyzed,
"Number of allocas analyzed for replacement");
107STATISTIC(NumAllocaPartitions,
"Number of alloca partitions formed");
108STATISTIC(MaxPartitionsPerAlloca,
"Maximum number of partitions per alloca");
109STATISTIC(NumAllocaPartitionUses,
"Number of alloca partition uses rewritten");
110STATISTIC(MaxUsesPerAllocaPartition,
"Maximum number of uses of a partition");
111STATISTIC(NumNewAllocas,
"Number of new, smaller allocas introduced");
112STATISTIC(NumPromoted,
"Number of allocas promoted to SSA values");
113STATISTIC(NumLoadsSpeculated,
"Number of loads speculated to allow promotion");
115 "Number of loads rewritten into predicated loads to allow promotion");
118 "Number of stores rewritten into predicated loads to allow promotion");
120STATISTIC(NumVectorized,
"Number of vectorized aggregates");
131class AllocaSliceRewriter;
135class SelectHandSpeculativity {
136 unsigned char Storage = 0;
140 SelectHandSpeculativity() =
default;
141 SelectHandSpeculativity &setAsSpeculatable(
bool isTrueVal);
142 bool isSpeculatable(
bool isTrueVal)
const;
143 bool areAllSpeculatable()
const;
144 bool areAnySpeculatable()
const;
145 bool areNoneSpeculatable()
const;
147 explicit operator intptr_t()
const {
return static_cast<intptr_t
>(Storage); }
148 explicit SelectHandSpeculativity(intptr_t Storage_) : Storage(Storage_) {}
150static_assert(
sizeof(SelectHandSpeculativity) ==
sizeof(
unsigned char));
152using PossiblySpeculatableLoad =
155using RewriteableMemOp =
156 std::variant<PossiblySpeculatableLoad, UnspeculatableStore>;
178 LLVMContext *
const C;
179 DomTreeUpdater *
const DTU;
180 AssumptionCache *
const AC;
181 const bool PreserveCFG;
190 SmallSetVector<AllocaInst *, 16> Worklist;
205 SmallSetVector<AllocaInst *, 16> PostPromotionWorklist;
208 SetVector<AllocaInst *, SmallVector<AllocaInst *>,
209 SmallPtrSet<AllocaInst *, 16>, 16>
217 SmallSetVector<PHINode *, 8> SpeculatablePHIs;
221 SmallMapVector<SelectInst *, RewriteableMemOps, 8> SelectsToRewrite;
239 static std::optional<RewriteableMemOps>
240 isSafeSelectToSpeculate(SelectInst &SI,
bool PreserveCFG);
243 SROA(LLVMContext *C, DomTreeUpdater *DTU, AssumptionCache *AC,
245 : C(C), DTU(DTU), AC(AC),
246 PreserveCFG(PreserveCFG_ ==
SROAOptions::PreserveCFG) {}
249 std::pair<
bool ,
bool > runSROA(Function &
F);
252 friend class AllocaSliceRewriter;
254 bool presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS);
255 std::pair<AllocaInst *, uint64_t>
256 rewritePartition(AllocaInst &AI, AllocaSlices &AS, Partition &
P);
257 bool splitAlloca(AllocaInst &AI, AllocaSlices &AS);
258 bool propagateStoredValuesToLoads(AllocaInst &AI, AllocaSlices &AS);
259 std::pair<
bool ,
bool > runOnAlloca(AllocaInst &AI);
260 void clobberUse(Use &U);
261 bool deleteDeadInstructions(SmallPtrSetImpl<AllocaInst *> &DeletedAllocas);
262 bool promoteAllocas();
276enum FragCalcResult { UseFrag, UseNoFrag,
Skip };
280 uint64_t NewStorageSliceOffsetInBits,
282 std::optional<DIExpression::FragmentInfo> StorageFragment,
283 std::optional<DIExpression::FragmentInfo> CurrentFragment,
287 if (StorageFragment) {
289 std::min(NewStorageSliceSizeInBits, StorageFragment->SizeInBits);
291 NewStorageSliceOffsetInBits + StorageFragment->OffsetInBits;
293 Target.SizeInBits = NewStorageSliceSizeInBits;
294 Target.OffsetInBits = NewStorageSliceOffsetInBits;
300 if (!CurrentFragment) {
301 if (
auto Size = Variable->getSizeInBits()) {
304 if (
Target == CurrentFragment)
311 if (!CurrentFragment || *CurrentFragment ==
Target)
317 if (
Target.startInBits() < CurrentFragment->startInBits() ||
318 Target.endInBits() > CurrentFragment->endInBits())
357 if (DVRAssignMarkerRange.empty())
363 LLVM_DEBUG(
dbgs() <<
" OldAllocaOffsetInBits: " << OldAllocaOffsetInBits
365 LLVM_DEBUG(
dbgs() <<
" SliceSizeInBits: " << SliceSizeInBits <<
"\n");
377 DVR->getExpression()->getFragmentInfo();
390 auto *Expr = DbgAssign->getExpression();
391 bool SetKillLocation =
false;
394 std::optional<DIExpression::FragmentInfo> BaseFragment;
397 if (R == BaseFragments.
end())
399 BaseFragment = R->second;
401 std::optional<DIExpression::FragmentInfo> CurrentFragment =
402 Expr->getFragmentInfo();
405 DbgAssign->getVariable(), OldAllocaOffsetInBits, SliceSizeInBits,
406 BaseFragment, CurrentFragment, NewFragment);
410 if (Result == UseFrag && !(NewFragment == CurrentFragment)) {
411 if (CurrentFragment) {
416 NewFragment.
OffsetInBits -= CurrentFragment->OffsetInBits;
429 SetKillLocation =
true;
437 Inst->
setMetadata(LLVMContext::MD_DIAssignID, NewID);
446 DbgAssign->getDebugLoc())));
449 NewAssign = DbgAssign;
468 Value && (DbgAssign->hasArgList() ||
469 !DbgAssign->getExpression()->isSingleLocationExpression());
486 if (NewAssign != DbgAssign) {
487 NewAssign->
moveBefore(DbgAssign->getIterator());
490 LLVM_DEBUG(
dbgs() <<
"Created new assign: " << *NewAssign <<
"\n");
493 for_each(DVRAssignMarkerRange, MigrateDbgAssign);
503 Twine getNameWithPrefix(
const Twine &Name)
const {
508 void SetNamePrefix(
const Twine &
P) { Prefix =
P.str(); }
510 void InsertHelper(Instruction *
I,
const Twine &Name,
528 uint64_t BeginOffset = 0;
531 uint64_t EndOffset = 0;
535 PointerIntPair<Use *, 1, bool> UseAndIsSplittable;
540 Slice(uint64_t BeginOffset, uint64_t EndOffset, Use *U,
bool IsSplittable,
541 Value *ProtectedFieldDisc)
542 : BeginOffset(BeginOffset), EndOffset(EndOffset),
543 UseAndIsSplittable(
U, IsSplittable),
544 ProtectedFieldDisc(ProtectedFieldDisc) {}
546 uint64_t beginOffset()
const {
return BeginOffset; }
547 uint64_t endOffset()
const {
return EndOffset; }
549 bool isSplittable()
const {
return UseAndIsSplittable.getInt(); }
550 void makeUnsplittable() { UseAndIsSplittable.setInt(
false); }
552 Use *getUse()
const {
return UseAndIsSplittable.getPointer(); }
554 bool isDead()
const {
return getUse() ==
nullptr; }
555 void kill() { UseAndIsSplittable.setPointer(
nullptr); }
559 Value *ProtectedFieldDisc;
568 if (beginOffset() <
RHS.beginOffset())
570 if (beginOffset() >
RHS.beginOffset())
572 if (isSplittable() !=
RHS.isSplittable())
573 return !isSplittable();
574 if (endOffset() >
RHS.endOffset())
580 [[maybe_unused]]
friend bool operator<(
const Slice &
LHS, uint64_t RHSOffset) {
581 return LHS.beginOffset() < RHSOffset;
583 [[maybe_unused]]
friend bool operator<(uint64_t LHSOffset,
const Slice &
RHS) {
584 return LHSOffset <
RHS.beginOffset();
588 return isSplittable() ==
RHS.isSplittable() &&
589 beginOffset() ==
RHS.beginOffset() && endOffset() ==
RHS.endOffset();
604 AllocaSlices(
const DataLayout &
DL, AllocaInst &AI);
610 bool isEscaped()
const {
return PointerEscapingInstr; }
611 bool isEscapedReadOnly()
const {
return PointerEscapingInstrReadOnly; }
616 using range = iterator_range<iterator>;
618 iterator
begin() {
return Slices.begin(); }
619 iterator
end() {
return Slices.end(); }
622 using const_range = iterator_range<const_iterator>;
624 const_iterator
begin()
const {
return Slices.begin(); }
625 const_iterator
end()
const {
return Slices.end(); }
629 void erase(iterator Start, iterator Stop) { Slices.erase(Start, Stop); }
637 int OldSize = Slices.size();
638 Slices.append(NewSlices.
begin(), NewSlices.
end());
639 auto SliceI = Slices.begin() + OldSize;
640 std::stable_sort(SliceI, Slices.end());
641 std::inplace_merge(Slices.begin(), SliceI, Slices.end());
658 return DeadUseIfPromotable;
669#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
670 void print(raw_ostream &OS, const_iterator
I, StringRef Indent =
" ")
const;
671 void printSlice(raw_ostream &OS, const_iterator
I,
672 StringRef Indent =
" ")
const;
673 void printUse(raw_ostream &OS, const_iterator
I,
674 StringRef Indent =
" ")
const;
675 void print(raw_ostream &OS)
const;
676 void dump(const_iterator
I)
const;
681 template <
typename DerivedT,
typename RetT =
void>
class BuilderBase;
684 friend class AllocaSlices::SliceBuilder;
686#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
714 SmallVector<Instruction *, 8> DeadUsers;
745 friend class AllocaSlices;
746 friend class AllocaSlices::partition_iterator;
748 using iterator = AllocaSlices::iterator;
752 uint64_t BeginOffset = 0, EndOffset = 0;
762 Partition(iterator SI) : SI(SI), SJ(SI) {}
768 uint64_t beginOffset()
const {
return BeginOffset; }
773 uint64_t endOffset()
const {
return EndOffset; }
778 uint64_t
size()
const {
779 assert(BeginOffset < EndOffset &&
"Partitions must span some bytes!");
780 return EndOffset - BeginOffset;
785 bool empty()
const {
return SI == SJ; }
796 iterator
begin()
const {
return SI; }
797 iterator
end()
const {
return SJ; }
829 AllocaSlices::iterator SE;
833 uint64_t MaxSplitSliceEndOffset = 0;
837 partition_iterator(AllocaSlices::iterator
SI, AllocaSlices::iterator SE)
849 assert((
P.SI != SE || !
P.SplitTails.empty()) &&
850 "Cannot advance past the end of the slices!");
853 if (!
P.SplitTails.empty()) {
854 if (
P.EndOffset >= MaxSplitSliceEndOffset) {
856 P.SplitTails.clear();
857 MaxSplitSliceEndOffset = 0;
863 [&](Slice *S) { return S->endOffset() <= P.EndOffset; });
866 return S->endOffset() == MaxSplitSliceEndOffset;
868 "Could not find the current max split slice offset!");
871 return S->endOffset() <= MaxSplitSliceEndOffset;
873 "Max split slice end offset is not actually the max!");
880 assert(P.SplitTails.empty() &&
"Failed to clear the split slices!");
890 if (S.isSplittable() && S.endOffset() > P.EndOffset) {
891 P.SplitTails.push_back(&S);
892 MaxSplitSliceEndOffset =
893 std::max(S.endOffset(), MaxSplitSliceEndOffset);
901 P.BeginOffset = P.EndOffset;
902 P.EndOffset = MaxSplitSliceEndOffset;
909 if (!P.SplitTails.empty() && P.SI->beginOffset() != P.EndOffset &&
910 !P.SI->isSplittable()) {
911 P.BeginOffset = P.EndOffset;
912 P.EndOffset = P.SI->beginOffset();
922 P.BeginOffset = P.SplitTails.empty() ? P.SI->beginOffset() : P.EndOffset;
923 P.EndOffset = P.SI->endOffset();
928 if (!P.SI->isSplittable()) {
931 assert(P.BeginOffset == P.SI->beginOffset());
935 while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
936 if (!P.SJ->isSplittable())
937 P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
949 assert(P.SI->isSplittable() &&
"Forming a splittable partition!");
952 while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset &&
953 P.SJ->isSplittable()) {
954 P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
961 if (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
962 assert(!P.SJ->isSplittable());
963 P.EndOffset = P.SJ->beginOffset();
970 "End iterators don't match between compared partition iterators!");
977 if (P.SI == RHS.P.SI && P.SplitTails.empty() == RHS.P.SplitTails.empty()) {
978 assert(P.SJ == RHS.P.SJ &&
979 "Same set of slices formed two different sized partitions!");
980 assert(P.SplitTails.size() == RHS.P.SplitTails.size() &&
981 "Same slice position with differently sized non-empty split "
1004 return make_range(partition_iterator(begin(), end()),
1005 partition_iterator(end(), end()));
1013 return SI.getOperand(1 + CI->isZero());
1014 if (
SI.getOperand(1) ==
SI.getOperand(2))
1015 return SI.getOperand(1);
1024 return PN->hasConstantValue();
1050 Value *ProtectedFieldDisc =
nullptr;
1059 if (VisitedDeadInsts.
insert(&
I).second)
1064 bool IsSplittable =
false) {
1070 <<
" which has zero size or starts outside of the "
1071 << AllocSize <<
" byte alloca:\n"
1072 <<
" alloca: " << AS.AI <<
"\n"
1073 <<
" use: " <<
I <<
"\n");
1074 return markAsDead(
I);
1077 uint64_t BeginOffset =
Offset.getZExtValue();
1078 uint64_t EndOffset = BeginOffset +
Size;
1086 assert(AllocSize >= BeginOffset);
1087 if (
Size > AllocSize - BeginOffset) {
1089 <<
Offset <<
" to remain within the " << AllocSize
1090 <<
" byte alloca:\n"
1091 <<
" alloca: " << AS.AI <<
"\n"
1092 <<
" use: " <<
I <<
"\n");
1093 EndOffset = AllocSize;
1096 AS.Slices.push_back(
1097 Slice(BeginOffset, EndOffset, U, IsSplittable, ProtectedFieldDisc));
1100 void visitBitCastInst(BitCastInst &BC) {
1102 return markAsDead(BC);
1104 return Base::visitBitCastInst(BC);
1107 void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
1109 return markAsDead(ASC);
1111 return Base::visitAddrSpaceCastInst(ASC);
1114 void visitGetElementPtrInst(GetElementPtrInst &GEPI) {
1116 return markAsDead(GEPI);
1118 return Base::visitGetElementPtrInst(GEPI);
1121 void handleLoadOrStore(
Type *Ty, Instruction &
I,
const APInt &
Offset,
1122 uint64_t
Size,
bool IsVolatile) {
1132 void visitLoadInst(LoadInst &LI) {
1134 "All simple FCA loads should have been pre-split");
1139 return PI.setEscapedReadOnly(&LI);
1142 if (
Size.isScalable()) {
1145 return PI.setAborted(&LI);
1154 void visitStoreInst(StoreInst &SI) {
1155 Value *ValOp =
SI.getValueOperand();
1157 return PI.setEscapedAndAborted(&SI);
1159 return PI.setAborted(&SI);
1161 TypeSize StoreSize =
DL.getTypeStoreSize(ValOp->
getType());
1163 unsigned VScale =
SI.getFunction()->getVScaleValue();
1165 return PI.setAborted(&SI);
1181 <<
Offset <<
" which extends past the end of the "
1182 << AllocSize <<
" byte alloca:\n"
1183 <<
" alloca: " << AS.AI <<
"\n"
1184 <<
" use: " << SI <<
"\n");
1185 return markAsDead(SI);
1189 "All simple FCA stores should have been pre-split");
1193 void visitMemSetInst(MemSetInst &
II) {
1194 assert(
II.getRawDest() == *U &&
"Pointer use is not the destination?");
1197 (IsOffsetKnown &&
Offset.uge(AllocSize)))
1199 return markAsDead(
II);
1202 return PI.setAborted(&
II);
1206 : AllocSize -
Offset.getLimitedValue(),
1210 void visitMemTransferInst(MemTransferInst &
II) {
1214 return markAsDead(
II);
1218 if (VisitedDeadInsts.
count(&
II))
1222 return PI.setAborted(&
II);
1229 if (
Offset.uge(AllocSize)) {
1230 SmallDenseMap<Instruction *, unsigned>::iterator MTPI =
1231 MemTransferSliceMap.
find(&
II);
1232 if (MTPI != MemTransferSliceMap.
end())
1233 AS.Slices[MTPI->second].kill();
1234 return markAsDead(
II);
1237 uint64_t RawOffset =
Offset.getLimitedValue();
1238 uint64_t
Size =
Length ?
Length->getLimitedValue() : AllocSize - RawOffset;
1242 if (*U ==
II.getRawDest() && *U ==
II.getRawSource()) {
1244 if (!
II.isVolatile())
1245 return markAsDead(
II);
1253 SmallDenseMap<Instruction *, unsigned>::iterator MTPI;
1254 std::tie(MTPI, Inserted) =
1255 MemTransferSliceMap.
insert(std::make_pair(&
II, AS.Slices.size()));
1256 unsigned PrevIdx = MTPI->second;
1258 Slice &PrevP = AS.Slices[PrevIdx];
1262 if (!
II.isVolatile() && PrevP.beginOffset() == RawOffset) {
1264 return markAsDead(
II);
1269 PrevP.makeUnsplittable();
1276 assert(AS.Slices[PrevIdx].getUse()->getUser() == &
II &&
1277 "Map index doesn't point back to a slice with this user.");
1283 void visitIntrinsicInst(IntrinsicInst &
II) {
1284 if (
II.isDroppable()) {
1285 AS.DeadUseIfPromotable.push_back(U);
1290 return PI.setAborted(&
II);
1292 if (
II.isLifetimeStartOrEnd()) {
1293 insertUse(
II,
Offset, AllocSize,
true);
1297 if (
II.getIntrinsicID() == Intrinsic::protected_field_ptr) {
1301 AS.PFPUsers.push_back(&
II);
1302 ProtectedFieldDisc =
II.getArgOperand(1);
1303 for (Use &U :
II.uses()) {
1308 visitStoreInst(*SI);
1314 ProtectedFieldDisc =
nullptr;
1318 Base::visitIntrinsicInst(
II);
1321 Instruction *hasUnsafePHIOrSelectUse(Instruction *Root, uint64_t &
Size) {
1326 SmallPtrSet<Instruction *, 4> Visited;
1336 std::tie(UsedI,
I) =
Uses.pop_back_val();
1339 TypeSize LoadSize =
DL.getTypeStoreSize(LI->
getType());
1351 TypeSize StoreSize =
DL.getTypeStoreSize(
Op->getType());
1361 if (!
GEP->hasAllZeroIndices())
1368 for (User *U :
I->users())
1371 }
while (!
Uses.empty());
1376 void visitPHINodeOrSelectInst(Instruction &
I) {
1379 return markAsDead(
I);
1385 return PI.setAborted(&
I);
1403 AS.DeadOperands.push_back(U);
1409 return PI.setAborted(&
I);
1412 uint64_t &
Size = PHIOrSelectSizes[&
I];
1415 if (Instruction *UnsafeI = hasUnsafePHIOrSelectUse(&
I,
Size))
1416 return PI.setAborted(UnsafeI);
1425 if (
Offset.uge(AllocSize)) {
1426 AS.DeadOperands.push_back(U);
1433 void visitPHINode(PHINode &PN) { visitPHINodeOrSelectInst(PN); }
1435 void visitSelectInst(SelectInst &SI) { visitPHINodeOrSelectInst(SI); }
1438 void visitInstruction(Instruction &
I) { PI.setAborted(&
I); }
1440 void visitCallBase(CallBase &CB) {
1446 PI.setEscapedReadOnly(&CB);
1450 Base::visitCallBase(CB);
1454AllocaSlices::AllocaSlices(
const DataLayout &
DL, AllocaInst &AI)
1456#
if !defined(
NDEBUG) || defined(LLVM_ENABLE_DUMP)
1459 PointerEscapingInstr(nullptr), PointerEscapingInstrReadOnly(nullptr) {
1461 SliceBuilder::PtrInfo PtrI =
PB.visitPtr(AI);
1462 if (PtrI.isEscaped() || PtrI.isAborted()) {
1465 PointerEscapingInstr = PtrI.getEscapingInst() ? PtrI.getEscapingInst()
1466 : PtrI.getAbortingInst();
1467 assert(PointerEscapingInstr &&
"Did not track a bad instruction");
1470 PointerEscapingInstrReadOnly = PtrI.getEscapedReadOnlyInst();
1472 llvm::erase_if(Slices, [](
const Slice &S) {
return S.isDead(); });
1479#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1481void AllocaSlices::print(raw_ostream &OS, const_iterator
I,
1482 StringRef Indent)
const {
1483 printSlice(OS,
I, Indent);
1485 printUse(OS,
I, Indent);
1488void AllocaSlices::printSlice(raw_ostream &OS, const_iterator
I,
1489 StringRef Indent)
const {
1490 OS << Indent <<
"[" <<
I->beginOffset() <<
"," <<
I->endOffset() <<
")"
1491 <<
" slice #" << (
I -
begin())
1492 << (
I->isSplittable() ?
" (splittable)" :
"");
1495void AllocaSlices::printUse(raw_ostream &OS, const_iterator
I,
1496 StringRef Indent)
const {
1497 OS << Indent <<
" used by: " << *
I->getUse()->getUser() <<
"\n";
1500void AllocaSlices::print(raw_ostream &OS)
const {
1501 if (PointerEscapingInstr) {
1502 OS <<
"Can't analyze slices for alloca: " << AI <<
"\n"
1503 <<
" A pointer to this alloca escaped by:\n"
1504 <<
" " << *PointerEscapingInstr <<
"\n";
1508 if (PointerEscapingInstrReadOnly)
1509 OS <<
"Escapes into ReadOnly: " << *PointerEscapingInstrReadOnly <<
"\n";
1511 OS <<
"Slices of alloca: " << AI <<
"\n";
1525static std::pair<Type *, IntegerType *>
1529 bool TyIsCommon =
true;
1534 for (AllocaSlices::const_iterator
I =
B;
I !=
E; ++
I) {
1535 Use *U =
I->getUse();
1538 if (
I->beginOffset() !=
B->beginOffset() ||
I->endOffset() != EndOffset)
1541 Type *UserTy =
nullptr;
1545 UserTy =
SI->getValueOperand()->getType();
1553 if (UserITy->getBitWidth() % 8 != 0 ||
1554 UserITy->getBitWidth() / 8 > (EndOffset -
B->beginOffset()))
1559 if (!ITy || ITy->
getBitWidth() < UserITy->getBitWidth())
1565 if (!UserTy || (Ty && Ty != UserTy))
1571 return {TyIsCommon ? Ty :
nullptr, ITy};
1602 Type *LoadType =
nullptr;
1615 if (LoadType != LI->
getType())
1624 if (BBI->mayWriteToMemory())
1627 MaxAlign = std::max(MaxAlign, LI->
getAlign());
1634 APInt(APWidth,
DL.getTypeStoreSize(LoadType).getFixedValue());
1671 IRB.SetInsertPoint(&PN);
1673 PN.
getName() +
".sroa.speculated");
1703 IRB.SetInsertPoint(TI);
1705 LoadInst *Load = IRB.CreateAlignedLoad(
1706 LoadTy, InVal, Alignment,
1707 (PN.
getName() +
".sroa.speculate.load." + Pred->getName()));
1708 ++NumLoadsSpeculated;
1710 Load->setAAMetadata(AATags);
1712 InjectedLoads[Pred] = Load;
1719SelectHandSpeculativity &
1720SelectHandSpeculativity::setAsSpeculatable(
bool isTrueVal) {
1728bool SelectHandSpeculativity::isSpeculatable(
bool isTrueVal)
const {
1733bool SelectHandSpeculativity::areAllSpeculatable()
const {
1734 return isSpeculatable(
true) &&
1735 isSpeculatable(
false);
1738bool SelectHandSpeculativity::areAnySpeculatable()
const {
1739 return isSpeculatable(
true) ||
1740 isSpeculatable(
false);
1742bool SelectHandSpeculativity::areNoneSpeculatable()
const {
1743 return !areAnySpeculatable();
1746static SelectHandSpeculativity
1749 SelectHandSpeculativity
Spec;
1755 Spec.setAsSpeculatable(
Value ==
SI.getTrueValue());
1762std::optional<RewriteableMemOps>
1763SROA::isSafeSelectToSpeculate(SelectInst &SI,
bool PreserveCFG) {
1764 RewriteableMemOps
Ops;
1766 for (User *U :
SI.users()) {
1776 Ops.emplace_back(Store);
1787 PossiblySpeculatableLoad
Load(LI);
1793 Ops.emplace_back(Load);
1797 SelectHandSpeculativity Spec =
1803 Ops.emplace_back(Load);
1813 Value *TV =
SI.getTrueValue();
1814 Value *FV =
SI.getFalseValue();
1819 IRB.SetInsertPoint(&LI);
1823 LI.
getName() +
".sroa.speculate.load.true");
1826 LI.
getName() +
".sroa.speculate.load.false");
1827 NumLoadsSpeculated += 2;
1839 Value *V = IRB.CreateSelect(
SI.getCondition(), TL, FL,
1840 LI.
getName() +
".sroa.speculated",
1847template <
typename T>
1849 SelectHandSpeculativity
Spec,
1856 if (
Spec.areNoneSpeculatable())
1858 SI.getMetadata(LLVMContext::MD_prof), &DTU);
1861 SI.getMetadata(LLVMContext::MD_prof), &DTU,
1863 if (
Spec.isSpeculatable(
true))
1874 bool IsThen = SuccBB == HeadBI->getSuccessor(0);
1875 int SuccIdx = IsThen ? 0 : 1;
1876 auto *NewMemOpBB = SuccBB ==
Tail ? Head : SuccBB;
1877 auto &CondMemOp =
cast<T>(*
I.clone());
1878 if (NewMemOpBB != Head) {
1879 NewMemOpBB->setName(Head->
getName() + (IsThen ?
".then" :
".else"));
1881 ++NumLoadsPredicated;
1883 ++NumStoresPredicated;
1885 CondMemOp.dropUBImplyingAttrsAndMetadata();
1886 ++NumLoadsSpeculated;
1888 CondMemOp.insertBefore(NewMemOpBB->getTerminator()->getIterator());
1889 Value *Ptr =
SI.getOperand(1 + SuccIdx);
1890 CondMemOp.setOperand(
I.getPointerOperandIndex(), Ptr);
1892 CondMemOp.setName(
I.getName() + (IsThen ?
".then" :
".else") +
".val");
1900 I.replaceAllUsesWith(PN);
1905 SelectHandSpeculativity
Spec,
1916 const RewriteableMemOps &
Ops,
1918 bool CFGChanged =
false;
1921 for (
const RewriteableMemOp &
Op :
Ops) {
1922 SelectHandSpeculativity
Spec;
1924 if (
auto *
const *US = std::get_if<UnspeculatableStore>(&
Op)) {
1927 auto PSL = std::get<PossiblySpeculatableLoad>(
Op);
1928 I = PSL.getPointer();
1929 Spec = PSL.getInt();
1931 if (
Spec.areAllSpeculatable()) {
1934 assert(DTU &&
"Should not get here when not allowed to modify the CFG!");
1938 I->eraseFromParent();
1943 SI.eraseFromParent();
1951 const Twine &NamePrefix) {
1953 Ptr = IRB.CreateInBoundsPtrAdd(Ptr, IRB.getInt(
Offset),
1954 NamePrefix +
"sroa_idx");
1955 return IRB.CreatePointerBitCastOrAddrSpaceCast(Ptr,
PointerTy,
1956 NamePrefix +
"sroa_cast");
1971 unsigned VScale = 0) {
1981 "We can't have the same bitwidth for different int types");
1985 TypeSize NewSize =
DL.getTypeSizeInBits(NewTy);
1986 TypeSize OldSize =
DL.getTypeSizeInBits(OldTy);
2013 if (NewSize != OldSize)
2029 return OldAS == NewAS ||
2030 (!
DL.isNonIntegralAddressSpace(OldAS) &&
2031 !
DL.isNonIntegralAddressSpace(NewAS) &&
2032 DL.getPointerSize(OldAS) ==
DL.getPointerSize(NewAS));
2038 return !
DL.isNonIntegralPointerType(NewTy);
2042 if (!
DL.isNonIntegralPointerType(OldTy))
2062 Type *OldTy = V->getType();
2069 "Value not convertable to type");
2076 "Integer types must be the exact same to convert.");
2080 auto CreateBitCastLike = [&IRB](
Value *In,
Type *Ty) ->
Value * {
2081 Type *InTy = In->getType();
2089 return IRB.CreateBitCast(IRB.CreateInsertVector(VTy,
2099 return IRB.CreateExtractVector(Ty, IRB.CreateBitCast(In, VTy),
2103 return IRB.CreateBitCast(In, Ty);
2112 return IRB.CreateIntToPtr(CreateBitCastLike(V,
DL.getIntPtrType(NewTy)),
2122 return CreateBitCastLike(IRB.CreatePtrToInt(V,
DL.getIntPtrType(OldTy)),
2135 if (OldAS != NewAS) {
2136 assert(
DL.getPointerSize(OldAS) ==
DL.getPointerSize(NewAS));
2137 return IRB.CreateIntToPtr(
2138 CreateBitCastLike(IRB.CreatePtrToInt(V,
DL.getIntPtrType(OldTy)),
2139 DL.getIntPtrType(NewTy)),
2144 return CreateBitCastLike(V, NewTy);
2158 std::max(S.beginOffset(),
P.beginOffset()) -
P.beginOffset();
2159 uint64_t BeginIndex = BeginOffset / ElementSize;
2160 if (BeginIndex * ElementSize != BeginOffset ||
2163 uint64_t EndOffset = std::min(S.endOffset(),
P.endOffset()) -
P.beginOffset();
2164 uint64_t EndIndex = EndOffset / ElementSize;
2165 if (EndIndex * ElementSize != EndOffset ||
2169 assert(EndIndex > BeginIndex &&
"Empty vector!");
2170 uint64_t NumElements = EndIndex - BeginIndex;
2171 Type *SliceTy = (NumElements == 1)
2172 ? Ty->getElementType()
2178 Use *U = S.getUse();
2181 if (
MI->isVolatile())
2183 if (!S.isSplittable())
2186 if (!
II->isLifetimeStartOrEnd() && !
II->isDroppable())
2193 if (LTy->isStructTy())
2195 if (
P.beginOffset() > S.beginOffset() ||
P.endOffset() < S.endOffset()) {
2196 assert(LTy->isIntegerTy());
2202 if (
SI->isVolatile())
2204 Type *STy =
SI->getValueOperand()->getType();
2208 if (
P.beginOffset() > S.beginOffset() ||
P.endOffset() < S.endOffset()) {
2228 bool HaveCommonEltTy,
Type *CommonEltTy,
2229 bool HaveVecPtrTy,
bool HaveCommonVecPtrTy,
2230 VectorType *CommonVecPtrTy,
unsigned VScale) {
2232 if (CandidateTys.
empty())
2239 if (HaveVecPtrTy && !HaveCommonVecPtrTy)
2243 if (!HaveCommonEltTy && HaveVecPtrTy) {
2245 CandidateTys.
clear();
2247 }
else if (!HaveCommonEltTy && !HaveVecPtrTy) {
2250 if (!VTy->getElementType()->isIntegerTy())
2252 VTy->getContext(), VTy->getScalarSizeInBits())));
2259 assert(
DL.getTypeSizeInBits(RHSTy).getFixedValue() ==
2260 DL.getTypeSizeInBits(LHSTy).getFixedValue() &&
2261 "Cannot have vector types of different sizes!");
2262 assert(RHSTy->getElementType()->isIntegerTy() &&
2263 "All non-integer types eliminated!");
2264 assert(LHSTy->getElementType()->isIntegerTy() &&
2265 "All non-integer types eliminated!");
2271 assert(
DL.getTypeSizeInBits(RHSTy).getFixedValue() ==
2272 DL.getTypeSizeInBits(LHSTy).getFixedValue() &&
2273 "Cannot have vector types of different sizes!");
2274 assert(RHSTy->getElementType()->isIntegerTy() &&
2275 "All non-integer types eliminated!");
2276 assert(LHSTy->getElementType()->isIntegerTy() &&
2277 "All non-integer types eliminated!");
2281 llvm::sort(CandidateTys, RankVectorTypesComp);
2282 CandidateTys.erase(
llvm::unique(CandidateTys, RankVectorTypesEq),
2283 CandidateTys.end());
2289 assert(VTy->getElementType() == CommonEltTy &&
2290 "Unaccounted for element type!");
2291 assert(VTy == CandidateTys[0] &&
2292 "Different vector types with the same element type!");
2295 CandidateTys.resize(1);
2302 std::numeric_limits<unsigned short>::max();
2308 DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
2312 if (ElementSize % 8)
2314 assert((
DL.getTypeSizeInBits(VTy).getFixedValue() % 8) == 0 &&
2315 "vector size not a multiple of element size?");
2318 for (
const Slice &S :
P)
2322 for (
const Slice *S :
P.splitSliceTails())
2328 return VTy != CandidateTys.
end() ? *VTy :
nullptr;
2335 bool &HaveCommonEltTy,
Type *&CommonEltTy,
bool &HaveVecPtrTy,
2336 bool &HaveCommonVecPtrTy,
VectorType *&CommonVecPtrTy,
unsigned VScale) {
2338 CandidateTysCopy.
size() ? CandidateTysCopy[0] :
nullptr;
2341 for (
Type *Ty : OtherTys) {
2344 unsigned TypeSize =
DL.getTypeSizeInBits(Ty).getFixedValue();
2347 for (
VectorType *
const VTy : CandidateTysCopy) {
2349 assert(CandidateTysCopy[0] == OriginalElt &&
"Different Element");
2350 unsigned VectorSize =
DL.getTypeSizeInBits(VTy).getFixedValue();
2351 unsigned ElementSize =
2352 DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
2356 CheckCandidateType(NewVTy);
2362 P,
DL, CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
2363 HaveCommonVecPtrTy, CommonVecPtrTy, VScale);
2382 Type *CommonEltTy =
nullptr;
2384 bool HaveVecPtrTy =
false;
2385 bool HaveCommonEltTy =
true;
2386 bool HaveCommonVecPtrTy =
true;
2387 auto CheckCandidateType = [&](
Type *Ty) {
2390 if (!CandidateTys.
empty()) {
2392 if (
DL.getTypeSizeInBits(VTy).getFixedValue() !=
2393 DL.getTypeSizeInBits(V).getFixedValue()) {
2394 CandidateTys.
clear();
2399 Type *EltTy = VTy->getElementType();
2402 CommonEltTy = EltTy;
2403 else if (CommonEltTy != EltTy)
2404 HaveCommonEltTy =
false;
2407 HaveVecPtrTy =
true;
2408 if (!CommonVecPtrTy)
2409 CommonVecPtrTy = VTy;
2410 else if (CommonVecPtrTy != VTy)
2411 HaveCommonVecPtrTy =
false;
2417 for (
const Slice &S :
P) {
2422 Ty =
SI->getValueOperand()->getType();
2426 auto CandTy = Ty->getScalarType();
2427 if (CandTy->isPointerTy() && (S.beginOffset() !=
P.beginOffset() ||
2428 S.endOffset() !=
P.endOffset())) {
2435 if (S.beginOffset() ==
P.beginOffset() && S.endOffset() ==
P.endOffset())
2436 CheckCandidateType(Ty);
2441 LoadStoreTys, CandidateTysCopy, CheckCandidateType,
P,
DL,
2442 CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
2443 HaveCommonVecPtrTy, CommonVecPtrTy, VScale))
2446 CandidateTys.
clear();
2448 DeferredTys, CandidateTysCopy, CheckCandidateType,
P,
DL, CandidateTys,
2449 HaveCommonEltTy, CommonEltTy, HaveVecPtrTy, HaveCommonVecPtrTy,
2450 CommonVecPtrTy, VScale);
2461 bool &WholeAllocaOp) {
2464 uint64_t RelBegin = S.beginOffset() - AllocBeginOffset;
2465 uint64_t RelEnd = S.endOffset() - AllocBeginOffset;
2467 Use *U = S.getUse();
2474 if (
II->isLifetimeStartOrEnd() ||
II->isDroppable())
2492 if (S.beginOffset() < AllocBeginOffset)
2498 WholeAllocaOp =
true;
2500 if (ITy->getBitWidth() <
DL.getTypeStoreSizeInBits(ITy).getFixedValue())
2502 }
else if (RelBegin != 0 || RelEnd !=
Size ||
2509 Type *ValueTy =
SI->getValueOperand()->getType();
2510 if (
SI->isVolatile())
2513 TypeSize StoreSize =
DL.getTypeStoreSize(ValueTy);
2518 if (S.beginOffset() < AllocBeginOffset)
2524 WholeAllocaOp =
true;
2526 if (ITy->getBitWidth() <
DL.getTypeStoreSizeInBits(ITy).getFixedValue())
2528 }
else if (RelBegin != 0 || RelEnd !=
Size ||
2537 if (!S.isSplittable())
2554 uint64_t SizeInBits =
DL.getTypeSizeInBits(AllocaTy).getFixedValue();
2560 if (SizeInBits !=
DL.getTypeStoreSizeInBits(AllocaTy).getFixedValue())
2578 bool WholeAllocaOp =
P.empty() &&
DL.isLegalInteger(SizeInBits);
2580 for (
const Slice &S :
P)
2585 for (
const Slice *S :
P.splitSliceTails())
2590 return WholeAllocaOp;
2595 const Twine &Name) {
2599 DL.getTypeStoreSize(IntTy).getFixedValue() &&
2600 "Element extends past full value");
2602 if (
DL.isBigEndian())
2603 ShAmt = 8 * (
DL.getTypeStoreSize(IntTy).getFixedValue() -
2604 DL.getTypeStoreSize(Ty).getFixedValue() -
Offset);
2606 V = IRB.CreateLShr(V, ShAmt, Name +
".shift");
2609 assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
2610 "Cannot extract to a larger integer!");
2612 V = IRB.CreateTrunc(V, Ty, Name +
".trunc");
2622 assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
2623 "Cannot insert a larger integer!");
2626 V = IRB.CreateZExt(V, IntTy, Name +
".ext");
2630 DL.getTypeStoreSize(IntTy).getFixedValue() &&
2631 "Element store outside of alloca store");
2633 if (
DL.isBigEndian())
2634 ShAmt = 8 * (
DL.getTypeStoreSize(IntTy).getFixedValue() -
2635 DL.getTypeStoreSize(Ty).getFixedValue() -
Offset);
2637 V = IRB.CreateShl(V, ShAmt, Name +
".shift");
2641 if (ShAmt || Ty->getBitWidth() < IntTy->getBitWidth()) {
2642 APInt Mask = ~Ty->getMask().zext(IntTy->getBitWidth()).shl(ShAmt);
2643 Old = IRB.CreateAnd(Old, Mask, Name +
".mask");
2645 V = IRB.CreateOr(Old, V, Name +
".insert");
2652 unsigned EndIndex,
const Twine &Name) {
2654 unsigned NumElements = EndIndex - BeginIndex;
2657 if (NumElements == VecTy->getNumElements())
2660 if (NumElements == 1) {
2661 V = IRB.CreateExtractElement(V, IRB.getInt32(BeginIndex),
2668 V = IRB.CreateShuffleVector(V, Mask, Name +
".extract");
2674 unsigned BeginIndex,
const Twine &Name) {
2676 assert(VecTy &&
"Can only insert a vector into a vector");
2681 V = IRB.CreateInsertElement(Old, V, IRB.getInt32(BeginIndex),
2690 assert(NumSubElements <= NumElements &&
"Too many elements!");
2691 if (NumSubElements == NumElements) {
2692 assert(V->getType() == VecTy &&
"Vector type mismatch");
2695 unsigned EndIndex = BeginIndex + NumSubElements;
2702 Mask.reserve(NumElements);
2703 for (
unsigned Idx = 0; Idx != NumElements; ++Idx)
2704 if (Idx >= BeginIndex && Idx < EndIndex)
2705 Mask.push_back(Idx - BeginIndex);
2708 V = IRB.CreateShuffleVector(V, Mask, Name +
".expand");
2712 for (
unsigned Idx = 0; Idx != NumElements; ++Idx)
2713 if (Idx >= BeginIndex && Idx < EndIndex)
2714 Mask.push_back(Idx);
2716 Mask.push_back(Idx + NumElements);
2717 V = IRB.CreateShuffleVector(V, Old, Mask, Name +
"blend");
2756 const char *DebugName) {
2757 Type *EltType = VecType->getElementType();
2758 if (EltType != NewAIEltTy) {
2760 unsigned TotalBits =
2761 VecType->getNumElements() *
DL.getTypeSizeInBits(EltType);
2762 unsigned NewNumElts = TotalBits /
DL.getTypeSizeInBits(NewAIEltTy);
2765 V = Builder.CreateBitCast(V, NewVecType);
2766 VecType = NewVecType;
2767 LLVM_DEBUG(
dbgs() <<
" bitcast " << DebugName <<
": " << *V <<
"\n");
2771 BitcastIfNeeded(V0, VecType0,
"V0");
2772 BitcastIfNeeded(V1, VecType1,
"V1");
2774 unsigned NumElts0 = VecType0->getNumElements();
2775 unsigned NumElts1 = VecType1->getNumElements();
2779 if (NumElts0 == NumElts1) {
2780 for (
unsigned i = 0; i < NumElts0 + NumElts1; ++i)
2781 ShuffleMask.push_back(i);
2785 unsigned SmallSize = std::min(NumElts0, NumElts1);
2786 unsigned LargeSize = std::max(NumElts0, NumElts1);
2787 bool IsV0Smaller = NumElts0 < NumElts1;
2788 Value *&ExtendedVec = IsV0Smaller ? V0 : V1;
2790 for (
unsigned i = 0; i < SmallSize; ++i)
2792 for (
unsigned i = SmallSize; i < LargeSize; ++i)
2794 ExtendedVec = Builder.CreateShuffleVector(
2796 LLVM_DEBUG(
dbgs() <<
" shufflevector: " << *ExtendedVec <<
"\n");
2797 for (
unsigned i = 0; i < NumElts0; ++i)
2798 ShuffleMask.push_back(i);
2799 for (
unsigned i = 0; i < NumElts1; ++i)
2800 ShuffleMask.push_back(LargeSize + i);
2803 return Builder.CreateShuffleVector(V0, V1, ShuffleMask);
2814class AllocaSliceRewriter :
public InstVisitor<AllocaSliceRewriter, bool> {
2816 friend class InstVisitor<AllocaSliceRewriter, bool>;
2818 using Base = InstVisitor<AllocaSliceRewriter, bool>;
2820 const DataLayout &
DL;
2823 AllocaInst &OldAI, &NewAI;
2824 const uint64_t NewAllocaBeginOffset, NewAllocaEndOffset;
2844 uint64_t ElementSize;
2848 uint64_t BeginOffset = 0;
2849 uint64_t EndOffset = 0;
2853 uint64_t NewBeginOffset = 0, NewEndOffset = 0;
2855 uint64_t SliceSize = 0;
2856 bool IsSplittable =
false;
2857 bool IsSplit =
false;
2858 Use *OldUse =
nullptr;
2862 SmallSetVector<PHINode *, 8> &PHIUsers;
2863 SmallSetVector<SelectInst *, 8> &SelectUsers;
2871 Value *getPtrToNewAI(
unsigned AddrSpace,
bool IsVolatile) {
2875 Type *AccessTy = IRB.getPtrTy(AddrSpace);
2876 return IRB.CreateAddrSpaceCast(&NewAI, AccessTy);
2880 AllocaSliceRewriter(
const DataLayout &
DL, AllocaSlices &AS, SROA &
Pass,
2881 AllocaInst &OldAI, AllocaInst &NewAI,
Type *NewAllocaTy,
2882 uint64_t NewAllocaBeginOffset,
2883 uint64_t NewAllocaEndOffset,
bool IsIntegerPromotable,
2884 VectorType *PromotableVecTy,
2885 SmallSetVector<PHINode *, 8> &PHIUsers,
2886 SmallSetVector<SelectInst *, 8> &SelectUsers)
2887 :
DL(
DL), AS(AS),
Pass(
Pass), OldAI(OldAI), NewAI(NewAI),
2888 NewAllocaBeginOffset(NewAllocaBeginOffset),
2889 NewAllocaEndOffset(NewAllocaEndOffset), NewAllocaTy(NewAllocaTy),
2890 IntTy(IsIntegerPromotable
2893 DL.getTypeSizeInBits(NewAllocaTy).getFixedValue())
2895 VecTy(PromotableVecTy),
2896 ElementTy(VecTy ? VecTy->getElementType() : nullptr),
2897 ElementSize(VecTy ?
DL.getTypeSizeInBits(ElementTy).getFixedValue() / 8
2899 PHIUsers(PHIUsers), SelectUsers(SelectUsers),
2902 assert((
DL.getTypeSizeInBits(ElementTy).getFixedValue() % 8) == 0 &&
2903 "Only multiple-of-8 sized vector elements are viable");
2906 assert((!IntTy && !VecTy) || (IntTy && !VecTy) || (!IntTy && VecTy));
2909 bool visit(AllocaSlices::const_iterator
I) {
2910 bool CanSROA =
true;
2911 BeginOffset =
I->beginOffset();
2912 EndOffset =
I->endOffset();
2913 IsSplittable =
I->isSplittable();
2915 BeginOffset < NewAllocaBeginOffset || EndOffset > NewAllocaEndOffset;
2916 LLVM_DEBUG(
dbgs() <<
" rewriting " << (IsSplit ?
"split " :
""));
2921 assert(BeginOffset < NewAllocaEndOffset);
2922 assert(EndOffset > NewAllocaBeginOffset);
2923 NewBeginOffset = std::max(BeginOffset, NewAllocaBeginOffset);
2924 NewEndOffset = std::min(EndOffset, NewAllocaEndOffset);
2926 SliceSize = NewEndOffset - NewBeginOffset;
2927 LLVM_DEBUG(
dbgs() <<
" Begin:(" << BeginOffset <<
", " << EndOffset
2928 <<
") NewBegin:(" << NewBeginOffset <<
", "
2929 << NewEndOffset <<
") NewAllocaBegin:("
2930 << NewAllocaBeginOffset <<
", " << NewAllocaEndOffset
2932 assert(IsSplit || NewBeginOffset == BeginOffset);
2933 OldUse =
I->getUse();
2937 IRB.SetInsertPoint(OldUserI);
2938 IRB.SetCurrentDebugLocation(OldUserI->
getDebugLoc());
2939 IRB.getInserter().SetNamePrefix(Twine(NewAI.
getName()) +
"." +
2940 Twine(BeginOffset) +
".");
2986 std::optional<SmallVector<Value *, 4>>
2987 rewriteTreeStructuredMerge(Partition &
P) {
2989 if (
P.splitSliceTails().size() > 0)
2990 return std::nullopt;
2992 SmallVector<Value *, 4> DeletedValues;
2993 LoadInst *TheLoad =
nullptr;
2998 uint64_t BeginOffset;
3001 StoreInfo(StoreInst *SI, uint64_t Begin, uint64_t End,
Value *Val)
3002 :
Store(
SI), BeginOffset(Begin), EndOffset(End), StoredValue(Val) {}
3009 Type *AllocatedEltTy =
3013 unsigned AllocatedEltTySize =
DL.getTypeSizeInBits(AllocatedEltTy);
3020 auto IsTypeValidForTreeStructuredMerge = [&](
Type *Ty) ->
bool {
3022 return FixedVecTy &&
3023 DL.getTypeSizeInBits(FixedVecTy->getElementType()) % 8 == 0 &&
3024 !FixedVecTy->getElementType()->isPointerTy();
3027 for (Slice &S :
P) {
3035 if (TheLoad || !IsTypeValidForTreeStructuredMerge(LI->
getType()) ||
3036 S.beginOffset() != NewAllocaBeginOffset ||
3037 S.endOffset() != NewAllocaEndOffset || LI->
isVolatile())
3038 return std::nullopt;
3046 if (!IsTypeValidForTreeStructuredMerge(
3047 SI->getValueOperand()->getType()) ||
3049 return std::nullopt;
3051 unsigned NumElts = VecTy->getNumElements();
3052 unsigned EltSize =
DL.getTypeSizeInBits(VecTy->getElementType());
3053 if (NumElts * EltSize % AllocatedEltTySize != 0)
3054 return std::nullopt;
3055 StoreInfos.
emplace_back(SI, S.beginOffset(), S.endOffset(),
3056 SI->getValueOperand());
3060 return std::nullopt;
3065 return std::nullopt;
3068 if (StoreInfos.
size() < 2)
3069 return std::nullopt;
3073 llvm::sort(StoreInfos, [](
const StoreInfo &
A,
const StoreInfo &
B) {
3074 return A.BeginOffset <
B.BeginOffset;
3078 uint64_t ExpectedStart = NewAllocaBeginOffset;
3079 for (
auto &StoreInfo : StoreInfos) {
3080 uint64_t BeginOff = StoreInfo.BeginOffset;
3081 uint64_t EndOff = StoreInfo.EndOffset;
3084 if (BeginOff != ExpectedStart)
3085 return std::nullopt;
3087 ExpectedStart = EndOff;
3090 if (ExpectedStart != NewAllocaEndOffset)
3091 return std::nullopt;
3102 BasicBlock *StoreBB = StoreInfos[0].Store->getParent();
3104 for (
auto &StoreInfo : StoreInfos) {
3105 if (StoreInfo.Store->getParent() != StoreBB)
3106 return std::nullopt;
3107 if (LoadBB == StoreBB && !StoreInfo.Store->comesBefore(TheLoad))
3108 return std::nullopt;
3114 dbgs() <<
"Tree structured merge rewrite:\n Load: " << *TheLoad
3115 <<
"\n Ordered stores:\n";
3116 for (
auto [i, Info] :
enumerate(StoreInfos))
3117 dbgs() <<
" [" << i <<
"] Range[" <<
Info.BeginOffset <<
", "
3118 <<
Info.EndOffset <<
") \tStore: " << *
Info.Store
3119 <<
"\tValue: " << *
Info.StoredValue <<
"\n";
3124 std::queue<Value *> VecElements;
3126 for (
const auto &Info : StoreInfos) {
3128 VecElements.push(
Info.StoredValue);
3132 while (VecElements.size() > 1) {
3133 const auto NumElts = VecElements.size();
3134 for ([[maybe_unused]]
const auto _ :
llvm::seq(NumElts / 2)) {
3135 Value *V0 = VecElements.front();
3137 Value *V1 = VecElements.front();
3141 VecElements.push(Merged);
3143 if (NumElts % 2 == 1) {
3144 Value *
V = VecElements.front();
3146 VecElements.push(V);
3151 Value *MergedValue = VecElements.front();
3152 Builder.CreateAlignedStore(MergedValue, &NewAI, getSliceAlign());
3157 TheLoad->
getName() +
".sroa.new.load"));
3160 return DeletedValues;
3168 bool visitInstruction(Instruction &
I) {
3176 assert(IsSplit || BeginOffset == NewBeginOffset);
3177 uint64_t
Offset = NewBeginOffset - NewAllocaBeginOffset;
3179 StringRef OldName = OldPtr->
getName();
3181 size_t LastSROAPrefix = OldName.
rfind(
".sroa.");
3183 OldName = OldName.
substr(LastSROAPrefix + strlen(
".sroa."));
3188 OldName = OldName.
substr(IndexEnd + 1);
3192 OldName = OldName.
substr(OffsetEnd + 1);
3196 OldName = OldName.
substr(0, OldName.
find(
".sroa_"));
3208 Align getSliceAlign() {
3210 NewBeginOffset - NewAllocaBeginOffset);
3213 unsigned getIndex(uint64_t
Offset) {
3214 assert(VecTy &&
"Can only call getIndex when rewriting a vector");
3215 uint64_t RelOffset =
Offset - NewAllocaBeginOffset;
3216 assert(RelOffset / ElementSize < UINT32_MAX &&
"Index out of bounds");
3217 uint32_t
Index = RelOffset / ElementSize;
3218 assert(Index * ElementSize == RelOffset);
3222 void deleteIfTriviallyDead(
Value *V) {
3225 Pass.DeadInsts.push_back(
I);
3228 Value *rewriteVectorizedLoadInst(LoadInst &LI) {
3229 unsigned BeginIndex = getIndex(NewBeginOffset);
3230 unsigned EndIndex = getIndex(NewEndOffset);
3231 assert(EndIndex > BeginIndex &&
"Empty vector!");
3234 IRB.CreateAlignedLoad(NewAllocaTy, &NewAI, NewAI.
getAlign(),
"load");
3236 Load->copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access,
3237 LLVMContext::MD_access_group});
3238 return extractVector(IRB, Load, BeginIndex, EndIndex,
"vec");
3241 Value *rewriteIntegerLoad(LoadInst &LI) {
3242 assert(IntTy &&
"We cannot insert an integer to the alloca");
3245 IRB.CreateAlignedLoad(NewAllocaTy, &NewAI, NewAI.
getAlign(),
"load");
3247 assert(NewBeginOffset >= NewAllocaBeginOffset &&
"Out of bounds offset");
3248 uint64_t
Offset = NewBeginOffset - NewAllocaBeginOffset;
3249 if (
Offset > 0 || NewEndOffset < NewAllocaEndOffset) {
3250 IntegerType *ExtractTy = Type::getIntNTy(LI.
getContext(), SliceSize * 8);
3259 "Can only handle an extract for an overly wide load");
3261 V = IRB.CreateZExt(V, LI.
getType());
3265 bool visitLoadInst(LoadInst &LI) {
3274 Type *TargetTy = IsSplit ? Type::getIntNTy(LI.
getContext(), SliceSize * 8)
3276 bool IsPtrAdjusted =
false;
3279 V = rewriteVectorizedLoadInst(LI);
3281 V = rewriteIntegerLoad(LI);
3282 }
else if (NewBeginOffset == NewAllocaBeginOffset &&
3283 NewEndOffset == NewAllocaEndOffset &&
3286 DL.getTypeStoreSize(TargetTy).getFixedValue() > SliceSize &&
3289 getPtrToNewAI(LI.getPointerAddressSpace(), LI.isVolatile());
3290 LoadInst *NewLI = IRB.CreateAlignedLoad(
3291 NewAllocaTy, NewPtr, NewAI.getAlign(), LI.isVolatile(), LI.getName());
3292 if (LI.isVolatile())
3293 NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
3294 if (NewLI->isAtomic())
3295 NewLI->setAlignment(LI.getAlign());
3300 copyMetadataForLoad(*NewLI, LI);
3304 NewLI->setAAMetadata(AATags.adjustForAccess(
3305 NewBeginOffset - BeginOffset, NewLI->getType(), DL));
3313 if (auto *AITy = dyn_cast<IntegerType>(NewAllocaTy))
3314 if (auto *TITy = dyn_cast<IntegerType>(TargetTy))
3315 if (AITy->getBitWidth() < TITy->getBitWidth()) {
3316 V = IRB.CreateZExt(V, TITy,
"load.ext");
3317 if (DL.isBigEndian())
3318 V = IRB.CreateShl(V, TITy->getBitWidth() - AITy->getBitWidth(),
3322 Type *LTy = IRB.getPtrTy(AS);
3324 IRB.CreateAlignedLoad(TargetTy, getNewAllocaSlicePtr(IRB, LTy),
3329 NewBeginOffset - BeginOffset, NewLI->
getType(),
DL));
3333 NewLI->
copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access,
3334 LLVMContext::MD_access_group});
3337 IsPtrAdjusted =
true;
3344 "Only integer type loads and stores are split");
3345 assert(SliceSize <
DL.getTypeStoreSize(LI.
getType()).getFixedValue() &&
3346 "Split load isn't smaller than original load");
3348 "Non-byte-multiple bit width");
3354 LIIt.setHeadBit(
true);
3355 IRB.SetInsertPoint(LI.
getParent(), LIIt);
3360 Value *Placeholder =
3366 Placeholder->replaceAllUsesWith(&LI);
3367 Placeholder->deleteValue();
3372 Pass.DeadInsts.push_back(&LI);
3373 deleteIfTriviallyDead(OldOp);
3378 bool rewriteVectorizedStoreInst(
Value *V, StoreInst &SI,
Value *OldOp,
3383 if (
V->getType() != VecTy) {
3384 unsigned BeginIndex = getIndex(NewBeginOffset);
3385 unsigned EndIndex = getIndex(NewEndOffset);
3386 assert(EndIndex > BeginIndex &&
"Empty vector!");
3387 unsigned NumElements = EndIndex - BeginIndex;
3389 "Too many elements!");
3390 Type *SliceTy = (NumElements == 1)
3392 : FixedVectorType::
get(ElementTy, NumElements);
3393 if (
V->getType() != SliceTy)
3398 IRB.CreateAlignedLoad(NewAllocaTy, &NewAI, NewAI.
getAlign(),
"load");
3401 StoreInst *
Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.
getAlign());
3402 Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
3403 LLVMContext::MD_access_group});
3407 Pass.DeadInsts.push_back(&SI);
3411 Store,
Store->getPointerOperand(), OrigV,
DL);
3416 bool rewriteIntegerStore(
Value *V, StoreInst &SI, AAMDNodes AATags) {
3417 assert(IntTy &&
"We cannot extract an integer from the alloca");
3419 if (
DL.getTypeSizeInBits(
V->getType()).getFixedValue() !=
3421 Value *Old = IRB.CreateAlignedLoad(NewAllocaTy, &NewAI, NewAI.
getAlign(),
3424 assert(BeginOffset >= NewAllocaBeginOffset &&
"Out of bounds offset");
3425 uint64_t
Offset = BeginOffset - NewAllocaBeginOffset;
3429 StoreInst *
Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.
getAlign());
3430 Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
3431 LLVMContext::MD_access_group});
3437 Store,
Store->getPointerOperand(),
3438 Store->getValueOperand(),
DL);
3440 Pass.DeadInsts.push_back(&SI);
3445 bool visitStoreInst(StoreInst &SI) {
3447 Value *OldOp =
SI.getOperand(1);
3450 AAMDNodes AATags =
SI.getAAMetadata();
3455 if (
V->getType()->isPointerTy())
3457 Pass.PostPromotionWorklist.insert(AI);
3459 TypeSize StoreSize =
DL.getTypeStoreSize(
V->getType());
3462 assert(
V->getType()->isIntegerTy() &&
3463 "Only integer type loads and stores are split");
3464 assert(
DL.typeSizeEqualsStoreSize(
V->getType()) &&
3465 "Non-byte-multiple bit width");
3466 IntegerType *NarrowTy = Type::getIntNTy(
SI.getContext(), SliceSize * 8);
3472 return rewriteVectorizedStoreInst(V, SI, OldOp, AATags);
3473 if (IntTy &&
V->getType()->isIntegerTy())
3474 return rewriteIntegerStore(V, SI, AATags);
3477 if (NewBeginOffset == NewAllocaBeginOffset &&
3478 NewEndOffset == NewAllocaEndOffset &&
3482 getPtrToNewAI(
SI.getPointerAddressSpace(),
SI.isVolatile());
3485 IRB.CreateAlignedStore(V, NewPtr, NewAI.
getAlign(),
SI.isVolatile());
3487 unsigned AS =
SI.getPointerAddressSpace();
3488 Value *NewPtr = getNewAllocaSlicePtr(IRB, IRB.getPtrTy(AS));
3490 IRB.CreateAlignedStore(V, NewPtr, getSliceAlign(),
SI.isVolatile());
3492 NewSI->
copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
3493 LLVMContext::MD_access_group});
3497 if (
SI.isVolatile())
3506 Pass.DeadInsts.push_back(&SI);
3507 deleteIfTriviallyDead(OldOp);
3525 assert(
Size > 0 &&
"Expected a positive number of bytes.");
3533 IRB.CreateZExt(V, SplatIntTy,
"zext"),
3543 V = IRB.CreateVectorSplat(NumElements, V,
"vsplat");
3548 bool visitMemSetInst(MemSetInst &
II) {
3552 AAMDNodes AATags =
II.getAAMetadata();
3558 assert(NewBeginOffset == BeginOffset);
3559 II.setDest(getNewAllocaSlicePtr(IRB, OldPtr->
getType()));
3560 II.setDestAlignment(getSliceAlign());
3565 "AT: Unexpected link to non-const GEP");
3566 deleteIfTriviallyDead(OldPtr);
3571 Pass.DeadInsts.push_back(&
II);
3575 const bool CanContinue = [&]() {
3578 if (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset)
3582 const uint64_t
Len =
C->getLimitedValue();
3583 if (Len > std::numeric_limits<unsigned>::max())
3585 auto *Int8Ty = IntegerType::getInt8Ty(NewAI.
getContext());
3588 DL.isLegalInteger(
DL.getTypeSizeInBits(ScalarTy).getFixedValue());
3594 Type *SizeTy =
II.getLength()->getType();
3595 unsigned Sz = NewEndOffset - NewBeginOffset;
3598 getNewAllocaSlicePtr(IRB, OldPtr->
getType()),
II.getValue(),
Size,
3599 MaybeAlign(getSliceAlign()),
II.isVolatile()));
3605 New,
New->getRawDest(),
nullptr,
DL);
3620 assert(ElementTy == ScalarTy);
3622 unsigned BeginIndex = getIndex(NewBeginOffset);
3623 unsigned EndIndex = getIndex(NewEndOffset);
3624 assert(EndIndex > BeginIndex &&
"Empty vector!");
3625 unsigned NumElements = EndIndex - BeginIndex;
3627 "Too many elements!");
3630 II.getValue(),
DL.getTypeSizeInBits(ElementTy).getFixedValue() / 8);
3632 if (NumElements > 1)
3635 Value *Old = IRB.CreateAlignedLoad(NewAllocaTy, &NewAI, NewAI.
getAlign(),
3643 uint64_t
Size = NewEndOffset - NewBeginOffset;
3644 V = getIntegerSplat(
II.getValue(),
Size);
3646 if (IntTy && (BeginOffset != NewAllocaBeginOffset ||
3647 EndOffset != NewAllocaBeginOffset)) {
3648 Value *Old = IRB.CreateAlignedLoad(NewAllocaTy, &NewAI,
3651 uint64_t
Offset = NewBeginOffset - NewAllocaBeginOffset;
3654 assert(
V->getType() == IntTy &&
3655 "Wrong type for an alloca wide integer!");
3660 assert(NewBeginOffset == NewAllocaBeginOffset);
3661 assert(NewEndOffset == NewAllocaEndOffset);
3663 V = getIntegerSplat(
II.getValue(),
3664 DL.getTypeSizeInBits(ScalarTy).getFixedValue() / 8);
3672 Value *NewPtr = getPtrToNewAI(
II.getDestAddressSpace(),
II.isVolatile());
3674 IRB.CreateAlignedStore(V, NewPtr, NewAI.
getAlign(),
II.isVolatile());
3675 New->copyMetadata(
II, {LLVMContext::MD_mem_parallel_loop_access,
3676 LLVMContext::MD_access_group});
3682 New,
New->getPointerOperand(), V,
DL);
3685 return !
II.isVolatile();
3688 bool visitMemTransferInst(MemTransferInst &
II) {
3694 AAMDNodes AATags =
II.getAAMetadata();
3696 bool IsDest = &
II.getRawDestUse() == OldUse;
3697 assert((IsDest &&
II.getRawDest() == OldPtr) ||
3698 (!IsDest &&
II.getRawSource() == OldPtr));
3700 Align SliceAlign = getSliceAlign();
3708 if (!IsSplittable) {
3709 Value *AdjustedPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
3714 DbgAssign->getAddress() ==
II.getDest())
3715 DbgAssign->replaceVariableLocationOp(
II.getDest(), AdjustedPtr);
3717 II.setDest(AdjustedPtr);
3718 II.setDestAlignment(SliceAlign);
3720 II.setSource(AdjustedPtr);
3721 II.setSourceAlignment(SliceAlign);
3725 deleteIfTriviallyDead(OldPtr);
3738 (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset ||
3739 SliceSize !=
DL.getTypeStoreSize(NewAllocaTy).getFixedValue() ||
3740 !
DL.typeSizeEqualsStoreSize(NewAllocaTy) ||
3746 if (EmitMemCpy && &OldAI == &NewAI) {
3748 assert(NewBeginOffset == BeginOffset);
3751 if (NewEndOffset != EndOffset)
3752 II.setLength(NewEndOffset - NewBeginOffset);
3756 Pass.DeadInsts.push_back(&
II);
3760 Value *OtherPtr = IsDest ?
II.getRawSource() :
II.getRawDest();
3761 if (AllocaInst *AI =
3763 assert(AI != &OldAI && AI != &NewAI &&
3764 "Splittable transfers cannot reach the same alloca on both ends.");
3765 Pass.Worklist.insert(AI);
3772 unsigned OffsetWidth =
DL.getIndexSizeInBits(OtherAS);
3773 APInt OtherOffset(OffsetWidth, NewBeginOffset - BeginOffset);
3775 (IsDest ?
II.getSourceAlign() :
II.getDestAlign()).valueOrOne();
3777 commonAlignment(OtherAlign, OtherOffset.zextOrTrunc(64).getZExtValue());
3785 Value *OurPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
3786 Type *SizeTy =
II.getLength()->getType();
3787 Constant *
Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset);
3789 Value *DestPtr, *SrcPtr;
3790 MaybeAlign DestAlign, SrcAlign;
3794 DestAlign = SliceAlign;
3796 SrcAlign = OtherAlign;
3799 DestAlign = OtherAlign;
3801 SrcAlign = SliceAlign;
3803 CallInst *
New = IRB.CreateMemCpy(DestPtr, DestAlign, SrcPtr, SrcAlign,
3806 New->setAAMetadata(AATags.
shift(NewBeginOffset - BeginOffset));
3811 &
II, New, DestPtr,
nullptr,
DL);
3816 SliceSize * 8, &
II, New, DestPtr,
nullptr,
DL);
3822 bool IsWholeAlloca = NewBeginOffset == NewAllocaBeginOffset &&
3823 NewEndOffset == NewAllocaEndOffset;
3824 uint64_t
Size = NewEndOffset - NewBeginOffset;
3825 unsigned BeginIndex = VecTy ? getIndex(NewBeginOffset) : 0;
3826 unsigned EndIndex = VecTy ? getIndex(NewEndOffset) : 0;
3827 unsigned NumElements = EndIndex - BeginIndex;
3828 IntegerType *SubIntTy =
3829 IntTy ? Type::getIntNTy(IntTy->
getContext(),
Size * 8) : nullptr;
3834 if (VecTy && !IsWholeAlloca) {
3835 if (NumElements == 1)
3836 OtherTy = VecTy->getElementType();
3839 }
else if (IntTy && !IsWholeAlloca) {
3842 OtherTy = NewAllocaTy;
3847 MaybeAlign SrcAlign = OtherAlign;
3848 MaybeAlign DstAlign = SliceAlign;
3856 DstPtr = getPtrToNewAI(
II.getDestAddressSpace(),
II.isVolatile());
3860 SrcPtr = getPtrToNewAI(
II.getSourceAddressSpace(),
II.isVolatile());
3864 if (VecTy && !IsWholeAlloca && !IsDest) {
3866 IRB.CreateAlignedLoad(NewAllocaTy, &NewAI, NewAI.
getAlign(),
"load");
3868 }
else if (IntTy && !IsWholeAlloca && !IsDest) {
3870 IRB.CreateAlignedLoad(NewAllocaTy, &NewAI, NewAI.
getAlign(),
"load");
3872 uint64_t
Offset = NewBeginOffset - NewAllocaBeginOffset;
3875 LoadInst *
Load = IRB.CreateAlignedLoad(OtherTy, SrcPtr, SrcAlign,
3876 II.isVolatile(),
"copyload");
3877 Load->copyMetadata(
II, {LLVMContext::MD_mem_parallel_loop_access,
3878 LLVMContext::MD_access_group});
3885 if (VecTy && !IsWholeAlloca && IsDest) {
3886 Value *Old = IRB.CreateAlignedLoad(NewAllocaTy, &NewAI, NewAI.
getAlign(),
3889 }
else if (IntTy && !IsWholeAlloca && IsDest) {
3890 Value *Old = IRB.CreateAlignedLoad(NewAllocaTy, &NewAI, NewAI.
getAlign(),
3893 uint64_t
Offset = NewBeginOffset - NewAllocaBeginOffset;
3899 IRB.CreateAlignedStore(Src, DstPtr, DstAlign,
II.isVolatile()));
3900 Store->copyMetadata(
II, {LLVMContext::MD_mem_parallel_loop_access,
3901 LLVMContext::MD_access_group});
3904 Src->getType(),
DL));
3910 Store, DstPtr, Src,
DL);
3915 &
II, Store, DstPtr, Src,
DL);
3919 return !
II.isVolatile();
3922 bool visitIntrinsicInst(IntrinsicInst &
II) {
3923 assert((
II.isLifetimeStartOrEnd() ||
II.isDroppable()) &&
3924 "Unexpected intrinsic!");
3928 Pass.DeadInsts.push_back(&
II);
3930 if (
II.isDroppable()) {
3931 assert(
II.getIntrinsicID() == Intrinsic::assume &&
"Expected assume");
3937 assert(
II.getArgOperand(0) == OldPtr);
3941 if (
II.getIntrinsicID() == Intrinsic::lifetime_start)
3942 New = IRB.CreateLifetimeStart(Ptr);
3944 New = IRB.CreateLifetimeEnd(Ptr);
3952 void fixLoadStoreAlign(Instruction &Root) {
3956 SmallPtrSet<Instruction *, 4> Visited;
3957 SmallVector<Instruction *, 4>
Uses;
3959 Uses.push_back(&Root);
3968 SI->setAlignment(std::min(
SI->getAlign(), getSliceAlign()));
3975 for (User *U :
I->users())
3978 }
while (!
Uses.empty());
3981 bool visitPHINode(PHINode &PN) {
3983 assert(BeginOffset >= NewAllocaBeginOffset &&
"PHIs are unsplittable");
3984 assert(EndOffset <= NewAllocaEndOffset &&
"PHIs are unsplittable");
3990 IRBuilderBase::InsertPointGuard Guard(IRB);
3993 OldPtr->
getParent()->getFirstInsertionPt());
3995 IRB.SetInsertPoint(OldPtr);
3996 IRB.SetCurrentDebugLocation(OldPtr->
getDebugLoc());
3998 Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
4003 deleteIfTriviallyDead(OldPtr);
4006 fixLoadStoreAlign(PN);
4015 bool visitSelectInst(SelectInst &SI) {
4017 assert((
SI.getTrueValue() == OldPtr ||
SI.getFalseValue() == OldPtr) &&
4018 "Pointer isn't an operand!");
4019 assert(BeginOffset >= NewAllocaBeginOffset &&
"Selects are unsplittable");
4020 assert(EndOffset <= NewAllocaEndOffset &&
"Selects are unsplittable");
4022 Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
4024 if (
SI.getOperand(1) == OldPtr)
4025 SI.setOperand(1, NewPtr);
4026 if (
SI.getOperand(2) == OldPtr)
4027 SI.setOperand(2, NewPtr);
4030 deleteIfTriviallyDead(OldPtr);
4033 fixLoadStoreAlign(SI);
4048class AggLoadStoreRewriter :
public InstVisitor<AggLoadStoreRewriter, bool> {
4050 friend class InstVisitor<AggLoadStoreRewriter, bool>;
4056 SmallPtrSet<User *, 8> Visited;
4063 const DataLayout &
DL;
4068 AggLoadStoreRewriter(
const DataLayout &
DL, IRBuilderTy &IRB)
4069 :
DL(
DL), IRB(IRB) {}
4073 bool rewrite(Instruction &
I) {
4077 while (!
Queue.empty()) {
4078 U =
Queue.pop_back_val();
4087 void enqueueUsers(Instruction &
I) {
4088 for (Use &U :
I.uses())
4089 if (Visited.
insert(
U.getUser()).second)
4090 Queue.push_back(&U);
4094 bool visitInstruction(Instruction &
I) {
return false; }
4097 template <
typename Derived>
class OpSplitter {
4104 SmallVector<unsigned, 4> Indices;
4108 SmallVector<Value *, 4> GEPIndices;
4122 const DataLayout &
DL;
4126 OpSplitter(Instruction *InsertionPoint,
Value *Ptr,
Type *BaseTy,
4127 Align BaseAlign,
const DataLayout &
DL, IRBuilderTy &IRB)
4128 : IRB(IRB), GEPIndices(1, IRB.getInt32(0)), Ptr(Ptr), BaseTy(BaseTy),
4129 BaseAlign(BaseAlign),
DL(
DL) {
4130 IRB.SetInsertPoint(InsertionPoint);
4147 void emitSplitOps(
Type *Ty,
Value *&Agg,
const Twine &Name) {
4149 unsigned Offset =
DL.getIndexedOffsetInType(BaseTy, GEPIndices);
4150 return static_cast<Derived *
>(
this)->emitFunc(
4155 unsigned OldSize = Indices.
size();
4157 for (
unsigned Idx = 0,
Size = ATy->getNumElements(); Idx !=
Size;
4159 assert(Indices.
size() == OldSize &&
"Did not return to the old size");
4161 GEPIndices.
push_back(IRB.getInt32(Idx));
4162 emitSplitOps(ATy->getElementType(), Agg, Name +
"." + Twine(Idx));
4170 unsigned OldSize = Indices.
size();
4172 for (
unsigned Idx = 0,
Size = STy->getNumElements(); Idx !=
Size;
4174 assert(Indices.
size() == OldSize &&
"Did not return to the old size");
4176 GEPIndices.
push_back(IRB.getInt32(Idx));
4177 emitSplitOps(STy->getElementType(Idx), Agg, Name +
"." + Twine(Idx));
4188 struct LoadOpSplitter :
public OpSplitter<LoadOpSplitter> {
4192 SmallVector<Value *, 4> Components;
4197 LoadOpSplitter(Instruction *InsertionPoint,
Value *Ptr,
Type *BaseTy,
4198 AAMDNodes AATags, Align BaseAlign,
const DataLayout &
DL,
4200 : OpSplitter<LoadOpSplitter>(InsertionPoint, Ptr, BaseTy, BaseAlign,
DL,
4206 void emitFunc(
Type *Ty,
Value *&Agg, Align Alignment,
const Twine &Name) {
4210 IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name +
".gep");
4212 IRB.CreateAlignedLoad(Ty,
GEP, Alignment, Name +
".load");
4218 Load->setAAMetadata(
4224 Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name +
".insert");
4229 void recordFakeUses(LoadInst &LI) {
4230 for (Use &U : LI.
uses())
4232 if (
II->getIntrinsicID() == Intrinsic::fake_use)
4238 void emitFakeUses() {
4239 for (Instruction *
I : FakeUses) {
4240 IRB.SetInsertPoint(
I);
4241 for (
auto *V : Components)
4242 IRB.CreateIntrinsic(Intrinsic::fake_use, {
V});
4243 I->eraseFromParent();
4248 bool visitLoadInst(LoadInst &LI) {
4257 Splitter.recordFakeUses(LI);
4260 Splitter.emitFakeUses();
4267 struct StoreOpSplitter :
public OpSplitter<StoreOpSplitter> {
4268 StoreOpSplitter(Instruction *InsertionPoint,
Value *Ptr,
Type *BaseTy,
4269 AAMDNodes AATags, StoreInst *AggStore, Align BaseAlign,
4270 const DataLayout &
DL, IRBuilderTy &IRB)
4271 : OpSplitter<StoreOpSplitter>(InsertionPoint, Ptr, BaseTy, BaseAlign,
4273 AATags(AATags), AggStore(AggStore) {}
4275 StoreInst *AggStore;
4278 void emitFunc(
Type *Ty,
Value *&Agg, Align Alignment,
const Twine &Name) {
4284 Value *ExtractValue =
4285 IRB.CreateExtractValue(Agg, Indices, Name +
".extract");
4286 Value *InBoundsGEP =
4287 IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name +
".gep");
4289 IRB.CreateAlignedStore(ExtractValue, InBoundsGEP, Alignment);
4305 uint64_t SizeInBits =
4306 DL.getTypeSizeInBits(
Store->getValueOperand()->getType());
4308 SizeInBits, AggStore, Store,
4309 Store->getPointerOperand(),
Store->getValueOperand(),
4313 "AT: unexpected debug.assign linked to store through "
4320 bool visitStoreInst(StoreInst &SI) {
4321 if (!
SI.isSimple() ||
SI.getPointerOperand() != *U)
4324 if (
V->getType()->isSingleValueType())
4329 StoreOpSplitter Splitter(&SI, *U,
V->getType(),
SI.getAAMetadata(), &SI,
4331 Splitter.emitSplitOps(
V->getType(), V,
V->getName() +
".fca");
4336 SI.eraseFromParent();
4340 bool visitBitCastInst(BitCastInst &BC) {
4345 bool visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
4355 bool unfoldGEPSelect(GetElementPtrInst &GEPI) {
4374 if (!ZI->getSrcTy()->isIntegerTy(1))
4387 dbgs() <<
" original: " << *Sel <<
"\n";
4388 dbgs() <<
" " << GEPI <<
"\n";);
4390 auto GetNewOps = [&](
Value *SelOp) {
4403 Cond =
SI->getCondition();
4404 True =
SI->getTrueValue();
4405 False =
SI->getFalseValue();
4409 Cond = Sel->getOperand(0);
4410 True = ConstantInt::get(Sel->getType(), 1);
4411 False = ConstantInt::get(Sel->getType(), 0);
4416 IRB.SetInsertPoint(&GEPI);
4420 Value *NTrue = IRB.CreateGEP(Ty, TrueOps[0],
ArrayRef(TrueOps).drop_front(),
4421 True->
getName() +
".sroa.gep", NW);
4424 IRB.CreateGEP(Ty, FalseOps[0],
ArrayRef(FalseOps).drop_front(),
4425 False->
getName() +
".sroa.gep", NW);
4427 Value *NSel = MDFrom
4428 ? IRB.CreateSelect(
Cond, NTrue, NFalse,
4429 Sel->getName() +
".sroa.sel", MDFrom)
4430 : IRB.CreateSelectWithUnknownProfile(
4432 Sel->getName() +
".sroa.sel");
4433 Visited.
erase(&GEPI);
4438 enqueueUsers(*NSelI);
4441 dbgs() <<
" " << *NFalse <<
"\n";
4442 dbgs() <<
" " << *NSel <<
"\n";);
4451 bool unfoldGEPPhi(GetElementPtrInst &GEPI) {
4456 auto IsInvalidPointerOperand = [](
Value *
V) {
4460 return !AI->isStaticAlloca();
4464 if (
any_of(
Phi->operands(), IsInvalidPointerOperand))
4479 [](
Value *V) { return isa<ConstantInt>(V); }))
4492 dbgs() <<
" original: " << *
Phi <<
"\n";
4493 dbgs() <<
" " << GEPI <<
"\n";);
4495 auto GetNewOps = [&](
Value *PhiOp) {
4505 IRB.SetInsertPoint(Phi);
4506 PHINode *NewPhi = IRB.CreatePHI(GEPI.
getType(),
Phi->getNumIncomingValues(),
4507 Phi->getName() +
".sroa.phi");
4513 for (
unsigned I = 0,
E =
Phi->getNumIncomingValues();
I !=
E; ++
I) {
4522 IRB.CreateGEP(SourceTy, NewOps[0],
ArrayRef(NewOps).drop_front(),
4528 Visited.
erase(&GEPI);
4532 enqueueUsers(*NewPhi);
4538 dbgs() <<
"\n " << *NewPhi <<
'\n');
4543 bool visitGetElementPtrInst(GetElementPtrInst &GEPI) {
4544 if (unfoldGEPSelect(GEPI))
4547 if (unfoldGEPPhi(GEPI))
4554 bool visitPHINode(PHINode &PN) {
4559 bool visitSelectInst(SelectInst &SI) {
4573 if (Ty->isSingleValueType())
4576 uint64_t AllocSize =
DL.getTypeAllocSize(Ty).getFixedValue();
4581 InnerTy = ArrTy->getElementType();
4585 InnerTy = STy->getElementType(Index);
4590 if (AllocSize >
DL.getTypeAllocSize(InnerTy).getFixedValue() ||
4591 TypeSize >
DL.getTypeSizeInBits(InnerTy).getFixedValue())
4612 if (
Offset == 0 &&
DL.getTypeAllocSize(Ty).getFixedValue() ==
Size)
4614 if (
Offset >
DL.getTypeAllocSize(Ty).getFixedValue() ||
4615 (
DL.getTypeAllocSize(Ty).getFixedValue() -
Offset) <
Size)
4622 ElementTy = AT->getElementType();
4623 TyNumElements = AT->getNumElements();
4628 ElementTy = VT->getElementType();
4629 TyNumElements = VT->getNumElements();
4631 uint64_t ElementSize =
DL.getTypeAllocSize(ElementTy).getFixedValue();
4633 if (NumSkippedElements >= TyNumElements)
4635 Offset -= NumSkippedElements * ElementSize;
4647 if (
Size == ElementSize)
4651 if (NumElements * ElementSize !=
Size)
4675 uint64_t ElementSize =
DL.getTypeAllocSize(ElementTy).getFixedValue();
4676 if (
Offset >= ElementSize)
4687 if (
Size == ElementSize)
4694 if (Index == EndIndex)
4704 assert(Index < EndIndex);
4743bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
4757 struct SplitOffsets {
4759 std::vector<uint64_t> Splits;
4761 SmallDenseMap<Instruction *, SplitOffsets, 8> SplitOffsetsMap;
4774 SmallPtrSet<LoadInst *, 8> UnsplittableLoads;
4776 LLVM_DEBUG(
dbgs() <<
" Searching for candidate loads and stores\n");
4777 for (
auto &
P : AS.partitions()) {
4778 for (Slice &S :
P) {
4780 if (!S.isSplittable() || S.endOffset() <=
P.endOffset()) {
4785 UnsplittableLoads.
insert(LI);
4788 UnsplittableLoads.
insert(LI);
4791 assert(
P.endOffset() > S.beginOffset() &&
4792 "Empty or backwards partition!");
4801 auto IsLoadSimplyStored = [](LoadInst *LI) {
4802 for (User *LU : LI->
users()) {
4804 if (!SI || !
SI->isSimple())
4809 if (!IsLoadSimplyStored(LI)) {
4810 UnsplittableLoads.
insert(LI);
4816 if (S.getUse() != &
SI->getOperandUse(
SI->getPointerOperandIndex()))
4820 if (!StoredLoad || !StoredLoad->isSimple())
4822 assert(!
SI->isVolatile() &&
"Cannot split volatile stores!");
4832 auto &
Offsets = SplitOffsetsMap[
I];
4834 "Should not have splits the first time we see an instruction!");
4836 Offsets.Splits.push_back(
P.endOffset() - S.beginOffset());
4841 for (Slice *S :
P.splitSliceTails()) {
4842 auto SplitOffsetsMapI =
4844 if (SplitOffsetsMapI == SplitOffsetsMap.
end())
4846 auto &
Offsets = SplitOffsetsMapI->second;
4850 "Cannot have an empty set of splits on the second partition!");
4852 P.beginOffset() -
Offsets.S->beginOffset() &&
4853 "Previous split does not end where this one begins!");
4857 if (S->endOffset() >
P.endOffset())
4866 llvm::erase_if(Stores, [&UnsplittableLoads, &SplitOffsetsMap](StoreInst *SI) {
4872 if (UnsplittableLoads.
count(LI))
4875 auto LoadOffsetsI = SplitOffsetsMap.
find(LI);
4876 if (LoadOffsetsI == SplitOffsetsMap.
end())
4878 auto &LoadOffsets = LoadOffsetsI->second;
4881 auto &StoreOffsets = SplitOffsetsMap[
SI];
4886 if (LoadOffsets.Splits == StoreOffsets.Splits)
4890 <<
" " << *LI <<
"\n"
4891 <<
" " << *SI <<
"\n");
4897 UnsplittableLoads.
insert(LI);
4906 return UnsplittableLoads.
count(LI);
4911 return UnsplittableLoads.
count(LI);
4921 IRBuilderTy IRB(&AI);
4928 SmallPtrSet<AllocaInst *, 4> ResplitPromotableAllocas;
4938 SmallDenseMap<LoadInst *, std::vector<LoadInst *>, 1> SplitLoadsMap;
4939 std::vector<LoadInst *> SplitLoads;
4940 const DataLayout &
DL = AI.getDataLayout();
4941 for (LoadInst *LI : Loads) {
4944 auto &
Offsets = SplitOffsetsMap[LI];
4945 unsigned SliceSize =
Offsets.S->endOffset() -
Offsets.S->beginOffset();
4947 "Load must have type size equal to store size");
4949 "Load must be >= slice size");
4951 uint64_t BaseOffset =
Offsets.S->beginOffset();
4952 assert(BaseOffset + SliceSize > BaseOffset &&
4953 "Cannot represent alloca access size using 64-bit integers!");
4956 IRB.SetInsertPoint(LI);
4960 uint64_t PartOffset = 0, PartSize =
Offsets.Splits.front();
4963 auto *PartTy = Type::getIntNTy(LI->
getContext(), PartSize * 8);
4966 LoadInst *PLoad = IRB.CreateAlignedLoad(
4969 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
4970 PartPtrTy,
BasePtr->getName() +
"."),
4973 PLoad->
copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
4974 LLVMContext::MD_access_group});
4978 SplitLoads.push_back(PLoad);
4982 Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
4986 <<
", " << NewSlices.
back().endOffset()
4987 <<
"): " << *PLoad <<
"\n");
4994 PartOffset =
Offsets.Splits[Idx];
4996 PartSize = (Idx <
Size ?
Offsets.Splits[Idx] : SliceSize) - PartOffset;
5002 bool DeferredStores =
false;
5003 for (User *LU : LI->
users()) {
5005 if (!Stores.
empty() && SplitOffsetsMap.
count(SI)) {
5006 DeferredStores =
true;
5012 Value *StoreBasePtr =
SI->getPointerOperand();
5013 IRB.SetInsertPoint(SI);
5014 AAMDNodes AATags =
SI->getAAMetadata();
5016 LLVM_DEBUG(
dbgs() <<
" Splitting store of load: " << *SI <<
"\n");
5018 for (
int Idx = 0,
Size = SplitLoads.size(); Idx <
Size; ++Idx) {
5019 LoadInst *PLoad = SplitLoads[Idx];
5020 uint64_t PartOffset = Idx == 0 ? 0 :
Offsets.Splits[Idx - 1];
5021 auto *PartPtrTy =
SI->getPointerOperandType();
5023 auto AS =
SI->getPointerAddressSpace();
5024 StoreInst *PStore = IRB.CreateAlignedStore(
5027 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
5028 PartPtrTy, StoreBasePtr->
getName() +
"."),
5031 PStore->
copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access,
5032 LLVMContext::MD_access_group,
5033 LLVMContext::MD_DIAssignID});
5038 LLVM_DEBUG(
dbgs() <<
" +" << PartOffset <<
":" << *PStore <<
"\n");
5046 ResplitPromotableAllocas.
insert(OtherAI);
5047 Worklist.insert(OtherAI);
5050 Worklist.insert(OtherAI);
5054 DeadInsts.push_back(SI);
5059 SplitLoadsMap.
insert(std::make_pair(LI, std::move(SplitLoads)));
5062 DeadInsts.push_back(LI);
5071 for (StoreInst *SI : Stores) {
5076 assert(StoreSize > 0 &&
"Cannot have a zero-sized integer store!");
5080 "Slice size should always match load size exactly!");
5081 uint64_t BaseOffset =
Offsets.S->beginOffset();
5082 assert(BaseOffset + StoreSize > BaseOffset &&
5083 "Cannot represent alloca access size using 64-bit integers!");
5091 auto SplitLoadsMapI = SplitLoadsMap.
find(LI);
5092 std::vector<LoadInst *> *SplitLoads =
nullptr;
5093 if (SplitLoadsMapI != SplitLoadsMap.
end()) {
5094 SplitLoads = &SplitLoadsMapI->second;
5096 "Too few split loads for the number of splits in the store!");
5101 uint64_t PartOffset = 0, PartSize =
Offsets.Splits.front();
5104 auto *PartTy = Type::getIntNTy(Ty->
getContext(), PartSize * 8);
5106 auto *StorePartPtrTy =
SI->getPointerOperandType();
5111 PLoad = (*SplitLoads)[Idx];
5113 IRB.SetInsertPoint(LI);
5115 PLoad = IRB.CreateAlignedLoad(
5118 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
5119 LoadPartPtrTy, LoadBasePtr->
getName() +
"."),
5122 PLoad->
copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
5123 LLVMContext::MD_access_group});
5127 IRB.SetInsertPoint(SI);
5128 auto AS =
SI->getPointerAddressSpace();
5129 StoreInst *PStore = IRB.CreateAlignedStore(
5132 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
5133 StorePartPtrTy, StoreBasePtr->
getName() +
"."),
5136 PStore->
copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access,
5137 LLVMContext::MD_access_group});
5143 Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
5147 <<
", " << NewSlices.
back().endOffset()
5148 <<
"): " << *PStore <<
"\n");
5158 PartOffset =
Offsets.Splits[Idx];
5160 PartSize = (Idx <
Size ?
Offsets.Splits[Idx] : StoreSize) - PartOffset;
5170 assert(OtherAI != &AI &&
"We can't re-split our own alloca!");
5171 ResplitPromotableAllocas.
insert(OtherAI);
5172 Worklist.insert(OtherAI);
5175 assert(OtherAI != &AI &&
"We can't re-split our own alloca!");
5176 Worklist.insert(OtherAI);
5191 DeadInsts.push_back(LI);
5193 DeadInsts.push_back(SI);
5202 AS.insert(NewSlices);
5206 for (
auto I = AS.begin(),
E = AS.end();
I !=
E; ++
I)
5212 PromotableAllocas.set_subtract(ResplitPromotableAllocas);
5228static std::tuple<Type *, bool, VectorType *>
5246 if (VecTy && VecTy->getElementType()->isFloatingPointTy() &&
5247 VecTy->getElementCount().getFixedValue() > 1)
5248 return {VecTy,
false, VecTy};
5252 auto [CommonUseTy, LargestIntTy] =
5255 TypeSize CommonUseSize =
DL.getTypeAllocSize(CommonUseTy);
5261 return {VecTy,
false, VecTy};
5270 P.beginOffset(),
P.size())) {
5274 if (TypePartitionTy->isArrayTy() &&
5275 TypePartitionTy->getArrayElementType()->isIntegerTy() &&
5276 DL.isLegalInteger(
P.size() * 8))
5280 return {TypePartitionTy,
true,
nullptr};
5282 return {VecTy,
false, VecTy};
5286 DL.getTypeAllocSize(LargestIntTy).getFixedValue() >=
P.size() &&
5288 return {LargestIntTy,
true,
nullptr};
5291 return {TypePartitionTy,
false,
nullptr};
5296 DL.getTypeAllocSize(LargestIntTy).getFixedValue() >=
P.size())
5297 return {LargestIntTy,
false,
nullptr};
5300 if (
DL.isLegalInteger(
P.size() * 8))
5317std::pair<AllocaInst *, uint64_t>
5318SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS, Partition &
P) {
5319 const DataLayout &
DL = AI.getDataLayout();
5321 auto [PartitionTy, IsIntegerWideningViable, VecTy] =
5331 if (PartitionTy == AI.getAllocatedType() &&
P.beginOffset() == 0) {
5341 const bool IsUnconstrained = Alignment <=
DL.getABITypeAlign(PartitionTy);
5342 NewAI =
new AllocaInst(
5343 PartitionTy, AI.getAddressSpace(),
nullptr,
5344 IsUnconstrained ?
DL.getPrefTypeAlign(PartitionTy) : Alignment,
5345 AI.
getName() +
".sroa." + Twine(
P.begin() - AS.begin()),
5352 LLVM_DEBUG(
dbgs() <<
"Rewriting alloca partition " <<
"[" <<
P.beginOffset()
5353 <<
"," <<
P.endOffset() <<
") to: " << *NewAI <<
"\n");
5358 unsigned PPWOldSize = PostPromotionWorklist.size();
5359 unsigned NumUses = 0;
5360 SmallSetVector<PHINode *, 8> PHIUsers;
5361 SmallSetVector<SelectInst *, 8> SelectUsers;
5364 DL, AS, *
this, AI, *NewAI, PartitionTy,
P.beginOffset(),
P.endOffset(),
5365 IsIntegerWideningViable, VecTy, PHIUsers, SelectUsers);
5366 bool Promotable =
true;
5368 if (
auto DeletedValues =
Rewriter.rewriteTreeStructuredMerge(
P)) {
5369 NumUses += DeletedValues->
size() + 1;
5370 for (
Value *V : *DeletedValues)
5371 DeadInsts.push_back(V);
5373 for (Slice *S :
P.splitSliceTails()) {
5377 for (Slice &S :
P) {
5383 NumAllocaPartitionUses += NumUses;
5384 MaxUsesPerAllocaPartition.updateMax(NumUses);
5388 for (PHINode *
PHI : PHIUsers)
5392 SelectUsers.
clear();
5397 NewSelectsToRewrite;
5399 for (SelectInst *Sel : SelectUsers) {
5400 std::optional<RewriteableMemOps>
Ops =
5405 SelectUsers.clear();
5406 NewSelectsToRewrite.
clear();
5413 for (Use *U : AS.getDeadUsesIfPromotable()) {
5415 Value::dropDroppableUse(*U);
5418 DeadInsts.push_back(OldInst);
5420 if (PHIUsers.empty() && SelectUsers.empty()) {
5422 PromotableAllocas.insert(NewAI);
5427 SpeculatablePHIs.insert_range(PHIUsers);
5428 SelectsToRewrite.reserve(SelectsToRewrite.size() +
5429 NewSelectsToRewrite.
size());
5431 std::make_move_iterator(NewSelectsToRewrite.
begin()),
5432 std::make_move_iterator(NewSelectsToRewrite.
end())))
5433 SelectsToRewrite.insert(std::move(KV));
5434 Worklist.insert(NewAI);
5438 while (PostPromotionWorklist.size() > PPWOldSize)
5439 PostPromotionWorklist.pop_back();
5444 return {
nullptr, 0};
5449 Worklist.insert(NewAI);
5452 return {NewAI,
DL.getTypeSizeInBits(PartitionTy).getFixedValue()};
5496 int64_t BitExtractOffset) {
5498 bool HasFragment =
false;
5499 bool HasBitExtract =
false;
5508 HasBitExtract =
true;
5509 int64_t ExtractOffsetInBits =
Op.getArg(0);
5510 int64_t ExtractSizeInBits =
Op.getArg(1);
5519 assert(BitExtractOffset <= 0);
5520 int64_t AdjustedOffset = ExtractOffsetInBits + BitExtractOffset;
5526 if (AdjustedOffset < 0)
5529 Ops.push_back(
Op.getOp());
5530 Ops.push_back(std::max<int64_t>(0, AdjustedOffset));
5531 Ops.push_back(ExtractSizeInBits);
5534 Op.appendToVector(
Ops);
5539 if (HasFragment && HasBitExtract)
5542 if (!HasBitExtract) {
5561 std::optional<DIExpression::FragmentInfo> NewFragment,
5562 int64_t BitExtractAdjustment) {
5572 BitExtractAdjustment);
5573 if (!NewFragmentExpr)
5579 BeforeInst->
getParent()->insertDbgRecordBefore(DVR,
5592 BeforeInst->
getParent()->insertDbgRecordBefore(DVR,
5598 if (!NewAddr->
hasMetadata(LLVMContext::MD_DIAssignID)) {
5606 LLVM_DEBUG(
dbgs() <<
"Created new DVRAssign: " << *NewAssign <<
"\n");
5612bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
5613 if (AS.begin() == AS.end())
5616 unsigned NumPartitions = 0;
5618 const DataLayout &
DL = AI.getModule()->getDataLayout();
5621 Changed |= presplitLoadsAndStores(AI, AS);
5629 bool IsSorted =
true;
5631 uint64_t AllocaSize = AI.getAllocationSize(
DL)->getFixedValue();
5632 const uint64_t MaxBitVectorSize = 1024;
5633 if (AllocaSize <= MaxBitVectorSize) {
5636 SmallBitVector SplittableOffset(AllocaSize + 1,
true);
5638 for (
unsigned O = S.beginOffset() + 1;
5639 O < S.endOffset() && O < AllocaSize; O++)
5640 SplittableOffset.reset(O);
5642 for (Slice &S : AS) {
5643 if (!S.isSplittable())
5646 if ((S.beginOffset() > AllocaSize || SplittableOffset[S.beginOffset()]) &&
5647 (S.endOffset() > AllocaSize || SplittableOffset[S.endOffset()]))
5652 S.makeUnsplittable();
5659 for (Slice &S : AS) {
5660 if (!S.isSplittable())
5663 if (S.beginOffset() == 0 && S.endOffset() >= AllocaSize)
5668 S.makeUnsplittable();
5683 Fragment(AllocaInst *AI, uint64_t O, uint64_t S)
5689 for (
auto &
P : AS.partitions()) {
5690 auto [NewAI, ActiveBits] = rewritePartition(AI, AS, P);
5694 uint64_t SizeOfByte = 8;
5696 uint64_t Size = std::min(ActiveBits, P.size() * SizeOfByte);
5697 Fragments.push_back(
5698 Fragment(NewAI, P.beginOffset() * SizeOfByte, Size));
5704 NumAllocaPartitions += NumPartitions;
5705 MaxPartitionsPerAlloca.updateMax(NumPartitions);
5709 auto MigrateOne = [&](DbgVariableRecord *DbgVariable) {
5714 const Value *DbgPtr = DbgVariable->getAddress();
5716 DbgVariable->getFragmentOrEntireVariable();
5719 int64_t CurrentExprOffsetInBytes = 0;
5720 SmallVector<uint64_t> PostOffsetOps;
5722 ->extractLeadingOffset(CurrentExprOffsetInBytes, PostOffsetOps))
5726 int64_t ExtractOffsetInBits = 0;
5730 ExtractOffsetInBits =
Op.getArg(0);
5735 DIBuilder DIB(*AI.getModule(),
false);
5736 for (
auto Fragment : Fragments) {
5737 int64_t OffsetFromLocationInBits;
5738 std::optional<DIExpression::FragmentInfo> NewDbgFragment;
5743 DL, &AI, Fragment.Offset, Fragment.Size, DbgPtr,
5744 CurrentExprOffsetInBytes * 8, ExtractOffsetInBits, VarFrag,
5745 NewDbgFragment, OffsetFromLocationInBits))
5751 if (NewDbgFragment && !NewDbgFragment->SizeInBits)
5756 if (!NewDbgFragment)
5757 NewDbgFragment = DbgVariable->getFragment();
5761 int64_t OffestFromNewAllocaInBits =
5762 OffsetFromLocationInBits - ExtractOffsetInBits;
5765 int64_t BitExtractOffset =
5766 std::min<int64_t>(0, OffestFromNewAllocaInBits);
5771 OffestFromNewAllocaInBits =
5772 std::max(int64_t(0), OffestFromNewAllocaInBits);
5778 DIExpression *NewExpr = DIExpression::get(AI.getContext(), PostOffsetOps);
5779 if (OffestFromNewAllocaInBits > 0) {
5780 int64_t OffsetInBytes = (OffestFromNewAllocaInBits + 7) / 8;
5786 auto RemoveOne = [DbgVariable](
auto *OldDII) {
5787 auto SameVariableFragment = [](
const auto *
LHS,
const auto *
RHS) {
5788 return LHS->getVariable() ==
RHS->getVariable() &&
5789 LHS->getDebugLoc()->getInlinedAt() ==
5790 RHS->getDebugLoc()->getInlinedAt();
5792 if (SameVariableFragment(OldDII, DbgVariable))
5793 OldDII->eraseFromParent();
5798 NewDbgFragment, BitExtractOffset);
5812void SROA::clobberUse(Use &U) {
5822 DeadInsts.push_back(OldI);
5844bool SROA::propagateStoredValuesToLoads(AllocaInst &AI, AllocaSlices &AS) {
5849 LLVM_DEBUG(
dbgs() <<
"Attempting to propagate values on " << AI <<
"\n");
5850 bool AllSameAndValid =
true;
5851 Type *PartitionType =
nullptr;
5853 uint64_t BeginOffset = 0;
5854 uint64_t EndOffset = 0;
5856 auto Flush = [&]() {
5857 if (AllSameAndValid && !Insts.
empty()) {
5858 LLVM_DEBUG(
dbgs() <<
"Propagate values on slice [" << BeginOffset <<
", "
5859 << EndOffset <<
")\n");
5861 SSAUpdater
SSA(&NewPHIs);
5863 BasicLoadAndStorePromoter Promoter(Insts,
SSA, PartitionType);
5864 Promoter.run(Insts);
5866 AllSameAndValid =
true;
5867 PartitionType =
nullptr;
5871 for (Slice &S : AS) {
5875 dbgs() <<
"Ignoring slice: ";
5876 AS.print(
dbgs(), &S);
5880 if (S.beginOffset() >= EndOffset) {
5882 BeginOffset = S.beginOffset();
5883 EndOffset = S.endOffset();
5884 }
else if (S.beginOffset() != BeginOffset || S.endOffset() != EndOffset) {
5885 if (AllSameAndValid) {
5887 dbgs() <<
"Slice does not match range [" << BeginOffset <<
", "
5888 << EndOffset <<
")";
5889 AS.print(
dbgs(), &S);
5891 AllSameAndValid =
false;
5893 EndOffset = std::max(EndOffset, S.endOffset());
5900 if (!LI->
isSimple() || (PartitionType && UserTy != PartitionType))
5901 AllSameAndValid =
false;
5902 PartitionType = UserTy;
5905 Type *UserTy =
SI->getValueOperand()->getType();
5906 if (!
SI->isSimple() || (PartitionType && UserTy != PartitionType))
5907 AllSameAndValid =
false;
5908 PartitionType = UserTy;
5911 AllSameAndValid =
false;
5924std::pair<
bool ,
bool >
5925SROA::runOnAlloca(AllocaInst &AI) {
5927 bool CFGChanged =
false;
5930 ++NumAllocasAnalyzed;
5933 if (AI.use_empty()) {
5934 AI.eraseFromParent();
5938 const DataLayout &
DL = AI.getDataLayout();
5941 std::optional<TypeSize>
Size = AI.getAllocationSize(
DL);
5942 if (AI.isArrayAllocation() || !
Size ||
Size->isScalable() ||
Size->isZero())
5947 IRBuilderTy IRB(&AI);
5948 AggLoadStoreRewriter AggRewriter(
DL, IRB);
5949 Changed |= AggRewriter.rewrite(AI);
5952 AllocaSlices AS(
DL, AI);
5957 if (AS.isEscapedReadOnly()) {
5958 Changed |= propagateStoredValuesToLoads(AI, AS);
5962 for (
auto &
P : AS.partitions()) {
5968 std::optional<Value *> ProtectedFieldDisc;
5969 auto SliceHasMismatch = [&](Slice &S) {
5971 if (
II->getIntrinsicID() == Intrinsic::lifetime_start ||
5972 II->getIntrinsicID() == Intrinsic::lifetime_end)
5974 if (!ProtectedFieldDisc)
5975 ProtectedFieldDisc = S.ProtectedFieldDisc;
5976 return *ProtectedFieldDisc != S.ProtectedFieldDisc;
5979 if (SliceHasMismatch(S))
5981 for (Slice *S :
P.splitSliceTails())
5982 if (SliceHasMismatch(*S))
5987 for (Instruction *DeadUser : AS.getDeadUsers()) {
5989 for (Use &DeadOp : DeadUser->operands())
5996 DeadInsts.push_back(DeadUser);
5999 for (Use *DeadOp : AS.getDeadOperands()) {
6000 clobberUse(*DeadOp);
6003 for (IntrinsicInst *PFPUser : AS.getPFPUsers()) {
6004 PFPUser->replaceAllUsesWith(PFPUser->getArgOperand(0));
6006 DeadInsts.push_back(PFPUser);
6011 if (AS.begin() == AS.end())
6014 Changed |= splitAlloca(AI, AS);
6017 while (!SpeculatablePHIs.empty())
6021 auto RemainingSelectsToRewrite = SelectsToRewrite.takeVector();
6022 while (!RemainingSelectsToRewrite.empty()) {
6023 const auto [
K,
V] = RemainingSelectsToRewrite.pop_back_val();
6040bool SROA::deleteDeadInstructions(
6041 SmallPtrSetImpl<AllocaInst *> &DeletedAllocas) {
6043 while (!DeadInsts.empty()) {
6053 DeletedAllocas.
insert(AI);
6055 OldDII->eraseFromParent();
6061 for (Use &Operand :
I->operands())
6066 DeadInsts.push_back(U);
6070 I->eraseFromParent();
6080bool SROA::promoteAllocas() {
6081 if (PromotableAllocas.empty())
6088 NumPromoted += PromotableAllocas.size();
6089 PromoteMemToReg(PromotableAllocas.getArrayRef(), DTU->getDomTree(), AC);
6092 PromotableAllocas.clear();
6096std::pair<
bool ,
bool > SROA::runSROA(Function &
F) {
6099 const DataLayout &
DL =
F.getDataLayout();
6104 std::optional<TypeSize>
Size = AI->getAllocationSize(
DL);
6106 PromotableAllocas.insert(AI);
6108 Worklist.insert(AI);
6113 bool CFGChanged =
false;
6116 SmallPtrSet<AllocaInst *, 4> DeletedAllocas;
6119 while (!Worklist.empty()) {
6120 auto [IterationChanged, IterationCFGChanged] =
6121 runOnAlloca(*Worklist.pop_back_val());
6123 CFGChanged |= IterationCFGChanged;
6125 Changed |= deleteDeadInstructions(DeletedAllocas);
6129 if (!DeletedAllocas.
empty()) {
6130 Worklist.set_subtract(DeletedAllocas);
6131 PostPromotionWorklist.set_subtract(DeletedAllocas);
6132 PromotableAllocas.set_subtract(DeletedAllocas);
6133 DeletedAllocas.
clear();
6139 Worklist = PostPromotionWorklist;
6140 PostPromotionWorklist.clear();
6141 }
while (!Worklist.empty());
6143 assert((!CFGChanged ||
Changed) &&
"Can not only modify the CFG.");
6145 "Should not have modified the CFG when told to preserve it.");
6148 for (
auto &BB :
F) {
6161 SROA(&
F.getContext(), &DTU, &AC, PreserveCFG).runSROA(
F);
6174 OS, MapClassName2PassName);
6196 if (skipFunction(
F))
6199 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
6201 getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
F);
6208 void getAnalysisUsage(AnalysisUsage &AU)
const override {
6215 StringRef getPassName()
const override {
return "SROA"; }
6220char SROALegacyPass::ID = 0;
6228 "Scalar Replacement Of Aggregates",
false,
false)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
static bool runOnFunction(Function &F, bool PostInlining)
This is the interface for a simple mod/ref and alias analysis over globals.
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
This defines the Use class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
print mir2vec MIR2Vec Vocabulary Printer Pass
This file implements a map that provides insertion order iteration.
static std::optional< AllocFnsTy > getAllocationSize(const CallBase *CB, const TargetLibraryInfo *TLI)
uint64_t IntrinsicInst * II
PassBuilder PB(Machine, PassOpts->PTO, std::nullopt, &PIC)
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the PointerIntPair class.
This file provides a collection of visitors which walk the (instruction) uses of a pointer.
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static unsigned getNumElements(Type *Ty)
bool isDead(const MachineInstr &MI, const MachineRegisterInfo &MRI)
void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)
static void migrateDebugInfo(AllocaInst *OldAlloca, bool IsSplit, uint64_t OldAllocaOffsetInBits, uint64_t SliceSizeInBits, Instruction *OldInst, Instruction *Inst, Value *Dest, Value *Value, const DataLayout &DL)
Find linked dbg.assign and generate a new one with the correct FragmentInfo.
static std::tuple< Type *, bool, VectorType * > selectPartitionType(Partition &P, const DataLayout &DL, AllocaInst &AI, LLVMContext &C)
Select a partition type for an alloca partition.
static VectorType * isVectorPromotionViable(Partition &P, const DataLayout &DL, unsigned VScale)
Test whether the given alloca partitioning and range of slices can be promoted to a vector.
static Align getAdjustedAlignment(Instruction *I, uint64_t Offset)
Compute the adjusted alignment for a load or store from an offset.
static VectorType * checkVectorTypesForPromotion(Partition &P, const DataLayout &DL, SmallVectorImpl< VectorType * > &CandidateTys, bool HaveCommonEltTy, Type *CommonEltTy, bool HaveVecPtrTy, bool HaveCommonVecPtrTy, VectorType *CommonVecPtrTy, unsigned VScale)
Test whether any vector type in CandidateTys is viable for promotion.
static std::pair< Type *, IntegerType * > findCommonType(AllocaSlices::const_iterator B, AllocaSlices::const_iterator E, uint64_t EndOffset)
Walk the range of a partitioning looking for a common type to cover this sequence of slices.
static Type * stripAggregateTypeWrapping(const DataLayout &DL, Type *Ty)
Strip aggregate type wrapping.
static FragCalcResult calculateFragment(DILocalVariable *Variable, uint64_t NewStorageSliceOffsetInBits, uint64_t NewStorageSliceSizeInBits, std::optional< DIExpression::FragmentInfo > StorageFragment, std::optional< DIExpression::FragmentInfo > CurrentFragment, DIExpression::FragmentInfo &Target)
static DIExpression * createOrReplaceFragment(const DIExpression *Expr, DIExpression::FragmentInfo Frag, int64_t BitExtractOffset)
Create or replace an existing fragment in a DIExpression with Frag.
static Value * insertInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *Old, Value *V, uint64_t Offset, const Twine &Name)
static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S, VectorType *Ty, uint64_t ElementSize, const DataLayout &DL, unsigned VScale)
Test whether the given slice use can be promoted to a vector.
static Value * getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr, APInt Offset, Type *PointerTy, const Twine &NamePrefix)
Compute an adjusted pointer from Ptr by Offset bytes where the resulting pointer has PointerTy.
static bool isIntegerWideningViableForSlice(const Slice &S, uint64_t AllocBeginOffset, Type *AllocaTy, const DataLayout &DL, bool &WholeAllocaOp)
Test whether a slice of an alloca is valid for integer widening.
static Value * extractVector(IRBuilderTy &IRB, Value *V, unsigned BeginIndex, unsigned EndIndex, const Twine &Name)
static Value * foldPHINodeOrSelectInst(Instruction &I)
A helper that folds a PHI node or a select.
static bool rewriteSelectInstMemOps(SelectInst &SI, const RewriteableMemOps &Ops, IRBuilderTy &IRB, DomTreeUpdater *DTU)
static void rewriteMemOpOfSelect(SelectInst &SI, T &I, SelectHandSpeculativity Spec, DomTreeUpdater &DTU)
static Value * foldSelectInst(SelectInst &SI)
bool isKillAddress(const DbgVariableRecord *DVR)
static Value * insertVector(IRBuilderTy &IRB, Value *Old, Value *V, unsigned BeginIndex, const Twine &Name)
static bool isIntegerWideningViable(Partition &P, Type *AllocaTy, const DataLayout &DL)
Test whether the given alloca partition's integer operations can be widened to promotable ones.
static void speculatePHINodeLoads(IRBuilderTy &IRB, PHINode &PN)
static VectorType * createAndCheckVectorTypesForPromotion(SetVector< Type * > &OtherTys, ArrayRef< VectorType * > CandidateTysCopy, function_ref< void(Type *)> CheckCandidateType, Partition &P, const DataLayout &DL, SmallVectorImpl< VectorType * > &CandidateTys, bool &HaveCommonEltTy, Type *&CommonEltTy, bool &HaveVecPtrTy, bool &HaveCommonVecPtrTy, VectorType *&CommonVecPtrTy, unsigned VScale)
static DebugVariable getAggregateVariable(DbgVariableRecord *DVR)
static bool isSafePHIToSpeculate(PHINode &PN)
PHI instructions that use an alloca and are subsequently loaded can be rewritten to load both input p...
static Value * extractInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *V, IntegerType *Ty, uint64_t Offset, const Twine &Name)
static void insertNewDbgInst(DIBuilder &DIB, DbgVariableRecord *Orig, AllocaInst *NewAddr, DIExpression *NewAddrExpr, Instruction *BeforeInst, std::optional< DIExpression::FragmentInfo > NewFragment, int64_t BitExtractAdjustment)
Insert a new DbgRecord.
static void speculateSelectInstLoads(SelectInst &SI, LoadInst &LI, IRBuilderTy &IRB)
static Value * mergeTwoVectors(Value *V0, Value *V1, const DataLayout &DL, Type *NewAIEltTy, IRBuilder<> &Builder)
This function takes two vector values and combines them into a single vector by concatenating their e...
const DIExpression * getAddressExpression(const DbgVariableRecord *DVR)
static Type * getTypePartition(const DataLayout &DL, Type *Ty, uint64_t Offset, uint64_t Size)
Try to find a partition of the aggregate type passed in for a given offset and size.
static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy, unsigned VScale=0)
Test whether we can convert a value from the old to the new type.
static SelectHandSpeculativity isSafeLoadOfSelectToSpeculate(LoadInst &LI, SelectInst &SI, bool PreserveCFG)
static Value * convertValue(const DataLayout &DL, IRBuilderTy &IRB, Value *V, Type *NewTy)
Generic routine to convert an SSA value to a value of a different type.
This file provides the interface for LLVM's Scalar Replacement of Aggregates pass.
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static SymbolRef::Type getType(const Symbol *Sym)
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
Virtual Register Rewriter
Builder for the alloca slices.
SliceBuilder(const DataLayout &DL, AllocaInst &AI, AllocaSlices &AS)
An iterator over partitions of the alloca's slices.
bool operator==(const partition_iterator &RHS) const
friend class AllocaSlices
partition_iterator & operator++()
Class for arbitrary precision integers.
an instruction to allocate memory on the stack
LLVM_ABI bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
PointerType * getType() const
Overload to return most specific pointer type.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
const Function * getParent() const
Return the enclosing method, or null if none.
InstListType::iterator iterator
Instruction iterators...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Represents analyses that only rely on functions' control flow.
LLVM_ABI CaptureInfo getCaptureInfo(unsigned OpNo) const
Return which pointer components this operand may capture.
bool onlyReadsMemory(unsigned OpNo) const
bool isDataOperand(const Use *U) const
This is the shared class of boolean and integer constants.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static DIAssignID * getDistinct(LLVMContext &Context)
LLVM_ABI DbgInstPtr insertDbgAssign(Instruction *LinkedInstr, Value *Val, DILocalVariable *SrcVar, DIExpression *ValExpr, Value *Addr, DIExpression *AddrExpr, const DILocation *DL)
Insert a new llvm.dbg.assign intrinsic call.
iterator_range< expr_op_iterator > expr_ops() const
DbgVariableFragmentInfo FragmentInfo
LLVM_ABI bool startsWithDeref() const
Return whether the first element a DW_OP_deref.
static LLVM_ABI bool calculateFragmentIntersect(const DataLayout &DL, const Value *SliceStart, uint64_t SliceOffsetInBits, uint64_t SliceSizeInBits, const Value *DbgPtr, int64_t DbgPtrOffsetInBits, int64_t DbgExtractOffsetInBits, DIExpression::FragmentInfo VarFrag, std::optional< DIExpression::FragmentInfo > &Result, int64_t &OffsetFromLocationInBits)
Computes a fragment, bit-extract operation if needed, and new constant offset to describe a part of a...
static LLVM_ABI std::optional< DIExpression * > createFragmentExpression(const DIExpression *Expr, unsigned OffsetInBits, unsigned SizeInBits)
Create a DIExpression to describe one part of an aggregate variable that is fragmented across multipl...
static LLVM_ABI DIExpression * prepend(const DIExpression *Expr, uint8_t Flags, int64_t Offset=0)
Prepend DIExpr with a deref and offset operation and optionally turn it into a stack value or/and an ...
A parsed version of the target data layout string in and methods for querying it.
LLVM_ABI void moveBefore(DbgRecord *MoveBefore)
DebugLoc getDebugLoc() const
void setDebugLoc(DebugLoc Loc)
Record of a variable value-assignment, aka a non instruction representation of the dbg....
LLVM_ABI void setKillAddress()
Kill the address component.
LLVM_ABI bool isKillLocation() const
LocationType getType() const
LLVM_ABI bool isKillAddress() const
Check whether this kills the address component.
LLVM_ABI void replaceVariableLocationOp(Value *OldValue, Value *NewValue, bool AllowEmpty=false)
Value * getValue(unsigned OpIdx=0) const
static LLVM_ABI DbgVariableRecord * createLinkedDVRAssign(Instruction *LinkedInstr, Value *Val, DILocalVariable *Variable, DIExpression *Expression, Value *Address, DIExpression *AddressExpression, const DILocation *DI)
LLVM_ABI void setAssignId(DIAssignID *New)
DIExpression * getExpression() const
static LLVM_ABI DbgVariableRecord * createDVRDeclare(Value *Address, DILocalVariable *DV, DIExpression *Expr, const DILocation *DI)
static LLVM_ABI DbgVariableRecord * createDbgVariableRecord(Value *Location, DILocalVariable *DV, DIExpression *Expr, const DILocation *DI)
DILocalVariable * getVariable() const
LLVM_ABI void setKillLocation()
bool isDbgDeclare() const
void setAddress(Value *V)
DIExpression * getAddressExpression() const
LLVM_ABI DILocation * getInlinedAt() const
Identifies a unique instance of a variable.
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
iterator find(const_arg_type_t< KeyT > Val)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Analysis pass which computes a DominatorTree.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Class to represent fixed width SIMD vectors.
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
FunctionPass class - This class is used to implement most global optimizations.
unsigned getVScaleValue() const
Return the value for vscale based on the vscale_range attribute or 0 when unknown.
const BasicBlock & getEntryBlock() const
LLVM_ABI bool accumulateConstantOffset(const DataLayout &DL, APInt &Offset, function_ref< bool(Value &, APInt &)> ExternalAnalysis=nullptr) const
Accumulate the constant address offset of this GEP if possible.
Value * getPointerOperand()
iterator_range< op_iterator > indices()
Type * getSourceElementType() const
LLVM_ABI GEPNoWrapFlags getNoWrapFlags() const
Get the nowrap flags for the GEP instruction.
This provides the default implementation of the IRBuilder 'InsertHelper' method that is called whenev...
virtual void InsertHelper(Instruction *I, const Twine &Name, BasicBlock::iterator InsertPt) const
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Base class for instruction visitors.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
bool hasMetadata() const
Return true if this instruction has any metadata attached to it.
LLVM_ABI bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Class to represent integer types.
@ MAX_INT_BITS
Maximum number of bits that can be specified.
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
void setAlignment(Align Align)
Value * getPointerOperand()
bool isVolatile() const
Return true if this is a load from a volatile memory location.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Type * getPointerOperandType() const
static unsigned getPointerOperandIndex()
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Align getAlign() const
Return the alignment of the access that is being performed.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
LLVMContext & getContext() const
LLVM_ABI StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
This is the common base class for memset/memcpy/memmove.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
PointerIntPair - This class implements a pair of a pointer and small integer.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
PreservedAnalyses & preserve()
Mark an analysis as preserved.
PtrUseVisitor(const DataLayout &DL)
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Run the pass over the function.
void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
SROAPass(SROAOptions PreserveCFG)
If PreserveCFG is set, then the pass is not allowed to modify CFG in any way, even if it would update...
Helper class for SSA formation on a set of values defined in multiple blocks.
This class represents the LLVM 'select' instruction.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
void clear()
Completely clear the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
typename SuperClass::const_iterator const_iterator
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAlignment(Align Align)
Value * getValueOperand()
static unsigned getPointerOperandIndex()
Value * getPointerOperand()
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
StringRef - Represent a constant reference to a string, i.e.
static constexpr size_t npos
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
size_t rfind(char C, size_t From=npos) const
Search for the last character C in the string.
size_t find(char C, size_t From=0) const
Search for the first character C in the string.
LLVM_ABI size_t find_first_not_of(char C, size_t From=0) const
Find the first character in the string that is not C or npos if not found.
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
TypeSize getSizeInBytes() const
LLVM_ABI unsigned getElementContainingOffset(uint64_t FixedOffset) const
Given a valid byte offset into the structure, returns the structure index that contains it.
TypeSize getElementOffset(unsigned Idx) const
TypeSize getSizeInBits() const
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
element_iterator element_end() const
element_iterator element_begin() const
Type * getElementType(unsigned N) const
Type::subtype_iterator element_iterator
Target - Wrapper for Target specific information.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM_ABI unsigned getIntegerBitWidth() const
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
bool isPointerTy() const
True if this is an instance of PointerType.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
bool isStructTy() const
True if this is an instance of StructType.
bool isTargetExtTy() const
Return true if this is a target extension type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
const Use & getOperandUse(unsigned i) const
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVMContext & getContext() const
All values hold a context through their type.
LLVM_ABI const Value * stripInBoundsOffsets(function_ref< void(const Value *)> Func=[](const Value *) {}) const
Strip off pointer casts and inbounds GEPs.
iterator_range< user_iterator > users()
LLVM_ABI void dropDroppableUsesIn(User &Usr)
Remove every use of this value in User that can safely be removed.
LLVM_ABI const Value * stripAndAccumulateConstantOffsets(const DataLayout &DL, APInt &Offset, bool AllowNonInbounds, bool AllowInvariantGroup=false, function_ref< bool(Value &Value, APInt &Offset)> ExternalAnalysis=nullptr, bool LookThroughIntToPtr=false) const
Accumulate the constant offset this value has compared to a base pointer.
iterator_range< use_iterator > uses()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
static VectorType * getWithSizeAndScalar(VectorType *SizeTy, Type *EltTy)
This static method attempts to construct a VectorType with the same size-in-bits as SizeTy but with a...
static LLVM_ABI bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
CRTP base class which implements the entire standard iterator facade in terms of a minimal subset of ...
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Return a range of dbg_assign records for which Inst performs the assignment they encode.
LLVM_ABI void deleteAssignmentMarkers(const Instruction *Inst)
Delete the llvm.dbg.assign intrinsics linked to Inst.
initializer< Ty > init(const Ty &Val)
@ DW_OP_LLVM_extract_bits_zext
Only used in LLVM metadata.
@ DW_OP_LLVM_fragment
Only used in LLVM metadata.
@ DW_OP_LLVM_extract_bits_sext
Only used in LLVM metadata.
@ User
could "use" a pointer
NodeAddr< PhiNode * > Phi
NodeAddr< UseNode * > Use
Context & getContext() const
friend class Instruction
Iterator for Instructions in a `BasicBlock.
LLVM_ABI iterator begin() const
This is an optimization pass for GlobalISel generic memory operations.
static cl::opt< bool > SROASkipMem2Reg("sroa-skip-mem2reg", cl::init(false), cl::Hidden)
Disable running mem2reg during SROA in order to test or debug SROA.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
bool operator<(int64_t V1, const APSInt &V2)
FunctionAddr VTableAddr Value
void stable_sort(R &&Range)
LLVM_ABI bool RemoveRedundantDbgInstrs(BasicBlock *BB)
Try to remove redundant dbg.value instructions from given basic block.
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI void PromoteMemToReg(ArrayRef< AllocaInst * > Allocas, DominatorTree &DT, AssumptionCache *AC=nullptr)
Promote the specified list of alloca instructions into scalar registers, inserting PHI nodes as appro...
LLVM_ABI bool isAssumeLikeIntrinsic(const Instruction *I)
Return true if it is an intrinsic that cannot be speculated but also cannot trap.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
auto successors(const MachineBasicBlock *BB)
bool operator!=(uint64_t V1, const APInt &V2)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI std::optional< RegOrConstant > getVectorSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI)
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
auto unique(Range &&R, Predicate P)
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
LLVM_ABI bool isAllocaPromotable(const AllocaInst *AI)
Return true if this alloca is legal for promotion.
auto dyn_cast_or_null(const Y &Val)
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
bool capturesFullProvenance(CaptureComponents CC)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
LLVM_ABI bool isSafeToLoadUnconditionally(Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, Instruction *ScanFrom, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI void initializeSROALegacyPassPass(PassRegistry &)
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
LLVM_ABI TinyPtrVector< DbgVariableRecord * > findDVRValues(Value *V)
As above, for DVRValues.
LLVM_ABI void llvm_unreachable_internal(const char *msg=nullptr, const char *file=nullptr, unsigned line=0)
This function calls abort(), and prints the optional message to stderr.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
constexpr int PoisonMaskElem
iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI bool isAssignmentTrackingEnabled(const Module &M)
Return true if assignment tracking is enabled for module M.
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
LLVM_ABI TinyPtrVector< DbgVariableRecord * > findDVRDeclares(Value *V)
Finds dbg.declare records declaring local variables as living in the memory that 'V' points to.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
cl::opt< bool > ProfcheckDisableMetadataFixes("profcheck-disable-metadata-fixes", cl::Hidden, cl::init(false), cl::desc("Disable metadata propagation fixes discovered through Issue #147390"))
LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI FunctionPass * createSROAPass(bool PreserveCFG=true)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
AAMDNodes shift(size_t Offset) const
Create a new AAMDNode that describes this AAMDNode after applying a constant offset to the start of t...
LLVM_ABI AAMDNodes adjustForAccess(unsigned AccessSize)
Create a new AAMDNode for accessing AccessSize bytes of this AAMDNode.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Describes an element of a Bitfield.
static Bitfield::Type get(StorageType Packed)
Unpacks the field from the Packed value.
static void set(StorageType &Packed, typename Bitfield::Type Value)
Sets the typed value in the provided Packed value.
A CRTP mix-in to automatically provide informational APIs needed for passes.