102#define DEBUG_TYPE "sroa"
104STATISTIC(NumAllocasAnalyzed,
"Number of allocas analyzed for replacement");
105STATISTIC(NumAllocaPartitions,
"Number of alloca partitions formed");
106STATISTIC(MaxPartitionsPerAlloca,
"Maximum number of partitions per alloca");
107STATISTIC(NumAllocaPartitionUses,
"Number of alloca partition uses rewritten");
108STATISTIC(MaxUsesPerAllocaPartition,
"Maximum number of uses of a partition");
109STATISTIC(NumNewAllocas,
"Number of new, smaller allocas introduced");
110STATISTIC(NumPromoted,
"Number of allocas promoted to SSA values");
111STATISTIC(NumLoadsSpeculated,
"Number of loads speculated to allow promotion");
113 "Number of loads rewritten into predicated loads to allow promotion");
116 "Number of stores rewritten into predicated loads to allow promotion");
118STATISTIC(NumVectorized,
"Number of vectorized aggregates");
129class AllocaSliceRewriter;
133class SelectHandSpeculativity {
134 unsigned char Storage = 0;
138 SelectHandSpeculativity() =
default;
139 SelectHandSpeculativity &setAsSpeculatable(
bool isTrueVal);
140 bool isSpeculatable(
bool isTrueVal)
const;
141 bool areAllSpeculatable()
const;
142 bool areAnySpeculatable()
const;
143 bool areNoneSpeculatable()
const;
145 explicit operator intptr_t()
const {
return static_cast<intptr_t
>(Storage); }
146 explicit SelectHandSpeculativity(intptr_t Storage_) : Storage(Storage_) {}
148static_assert(
sizeof(SelectHandSpeculativity) ==
sizeof(
unsigned char));
150using PossiblySpeculatableLoad =
153using RewriteableMemOp =
154 std::variant<PossiblySpeculatableLoad, UnspeculatableStore>;
176 LLVMContext *
const C;
177 DomTreeUpdater *
const DTU;
178 AssumptionCache *
const AC;
179 const bool PreserveCFG;
180 const bool AggregateToVector;
189 SmallSetVector<AllocaInst *, 16> Worklist;
204 SmallSetVector<AllocaInst *, 16> PostPromotionWorklist;
207 SetVector<AllocaInst *, SmallVector<AllocaInst *>,
208 SmallPtrSet<AllocaInst *, 16>, 16>
216 SmallSetVector<PHINode *, 8> SpeculatablePHIs;
220 SmallMapVector<SelectInst *, RewriteableMemOps, 8> SelectsToRewrite;
238 static std::optional<RewriteableMemOps>
239 isSafeSelectToSpeculate(SelectInst &SI,
bool PreserveCFG);
242 SROA(LLVMContext *C, DomTreeUpdater *DTU, AssumptionCache *AC,
244 : C(C), DTU(DTU), AC(AC),
245 PreserveCFG(
Options.
CFG == SROAOptions::PreserveCFG),
246 AggregateToVector(
Options.AggregateToVector) {}
249 std::pair<
bool ,
bool > runSROA(Function &
F);
252 friend class AllocaSliceRewriter;
254 bool presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS);
255 std::pair<AllocaInst *, uint64_t>
256 rewritePartition(AllocaInst &AI, AllocaSlices &AS, Partition &
P);
257 bool splitAlloca(AllocaInst &AI, AllocaSlices &AS);
258 bool propagateStoredValuesToLoads(AllocaInst &AI, AllocaSlices &AS);
259 std::pair<
bool ,
bool > runOnAlloca(AllocaInst &AI);
260 void clobberUse(Use &U);
261 bool deleteDeadInstructions(SmallPtrSetImpl<AllocaInst *> &DeletedAllocas);
262 bool promoteAllocas();
276enum FragCalcResult { UseFrag, UseNoFrag,
Skip };
280 uint64_t NewStorageSliceOffsetInBits,
282 std::optional<DIExpression::FragmentInfo> StorageFragment,
283 std::optional<DIExpression::FragmentInfo> CurrentFragment,
287 if (StorageFragment) {
289 std::min(NewStorageSliceSizeInBits, StorageFragment->SizeInBits);
291 NewStorageSliceOffsetInBits + StorageFragment->OffsetInBits;
293 Target.SizeInBits = NewStorageSliceSizeInBits;
294 Target.OffsetInBits = NewStorageSliceOffsetInBits;
300 if (!CurrentFragment) {
301 if (
auto Size = Variable->getSizeInBits()) {
304 if (
Target == CurrentFragment)
311 if (!CurrentFragment || *CurrentFragment ==
Target)
317 if (
Target.startInBits() < CurrentFragment->startInBits() ||
318 Target.endInBits() > CurrentFragment->endInBits())
357 if (DVRAssignMarkerRange.empty())
363 LLVM_DEBUG(
dbgs() <<
" OldAllocaOffsetInBits: " << OldAllocaOffsetInBits
365 LLVM_DEBUG(
dbgs() <<
" SliceSizeInBits: " << SliceSizeInBits <<
"\n");
377 DVR->getExpression()->getFragmentInfo();
390 auto *Expr = DbgAssign->getExpression();
391 bool SetKillLocation =
false;
394 std::optional<DIExpression::FragmentInfo> BaseFragment;
397 if (R == BaseFragments.
end())
399 BaseFragment = R->second;
401 std::optional<DIExpression::FragmentInfo> CurrentFragment =
402 Expr->getFragmentInfo();
405 DbgAssign->getVariable(), OldAllocaOffsetInBits, SliceSizeInBits,
406 BaseFragment, CurrentFragment, NewFragment);
410 if (Result == UseFrag && !(NewFragment == CurrentFragment)) {
411 if (CurrentFragment) {
416 NewFragment.
OffsetInBits -= CurrentFragment->OffsetInBits;
429 SetKillLocation =
true;
437 Inst->
setMetadata(LLVMContext::MD_DIAssignID, NewID);
446 DbgAssign->getDebugLoc())));
449 NewAssign = DbgAssign;
468 Value && (DbgAssign->hasArgList() ||
469 !DbgAssign->getExpression()->isSingleLocationExpression());
486 if (NewAssign != DbgAssign) {
487 NewAssign->
moveBefore(DbgAssign->getIterator());
490 LLVM_DEBUG(
dbgs() <<
"Created new assign: " << *NewAssign <<
"\n");
493 for_each(DVRAssignMarkerRange, MigrateDbgAssign);
503 Twine getNameWithPrefix(
const Twine &Name)
const {
508 void SetNamePrefix(
const Twine &
P) { Prefix =
P.str(); }
510 void InsertHelper(Instruction *
I,
const Twine &Name,
528 uint64_t BeginOffset = 0;
531 uint64_t EndOffset = 0;
535 PointerIntPair<Use *, 1, bool> UseAndIsSplittable;
540 Slice(uint64_t BeginOffset, uint64_t EndOffset, Use *U,
bool IsSplittable)
541 : BeginOffset(BeginOffset), EndOffset(EndOffset),
542 UseAndIsSplittable(
U, IsSplittable) {}
544 uint64_t beginOffset()
const {
return BeginOffset; }
545 uint64_t endOffset()
const {
return EndOffset; }
547 bool isSplittable()
const {
return UseAndIsSplittable.getInt(); }
548 void makeUnsplittable() { UseAndIsSplittable.setInt(
false); }
550 Use *getUse()
const {
return UseAndIsSplittable.getPointer(); }
552 bool isDead()
const {
return getUse() ==
nullptr; }
553 void kill() { UseAndIsSplittable.setPointer(
nullptr); }
562 if (beginOffset() <
RHS.beginOffset())
564 if (beginOffset() >
RHS.beginOffset())
566 if (isSplittable() !=
RHS.isSplittable())
567 return !isSplittable();
568 if (endOffset() >
RHS.endOffset())
574 [[maybe_unused]]
friend bool operator<(
const Slice &
LHS, uint64_t RHSOffset) {
575 return LHS.beginOffset() < RHSOffset;
577 [[maybe_unused]]
friend bool operator<(uint64_t LHSOffset,
const Slice &
RHS) {
578 return LHSOffset <
RHS.beginOffset();
582 return isSplittable() ==
RHS.isSplittable() &&
583 beginOffset() ==
RHS.beginOffset() && endOffset() ==
RHS.endOffset();
598 AllocaSlices(
const DataLayout &
DL, AllocaInst &AI);
604 bool isEscaped()
const {
return PointerEscapingInstr; }
605 bool isEscapedReadOnly()
const {
return PointerEscapingInstrReadOnly; }
610 using range = iterator_range<iterator>;
612 iterator
begin() {
return Slices.begin(); }
613 iterator
end() {
return Slices.end(); }
616 using const_range = iterator_range<const_iterator>;
618 const_iterator
begin()
const {
return Slices.begin(); }
619 const_iterator
end()
const {
return Slices.end(); }
623 void erase(iterator Start, iterator Stop) { Slices.erase(Start, Stop); }
631 int OldSize = Slices.size();
632 Slices.append(NewSlices.
begin(), NewSlices.
end());
633 auto SliceI = Slices.begin() + OldSize;
634 std::stable_sort(SliceI, Slices.end());
635 std::inplace_merge(Slices.begin(), SliceI, Slices.end());
648 return DeadUseIfPromotable;
659#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
660 void print(raw_ostream &OS, const_iterator
I, StringRef Indent =
" ")
const;
661 void printSlice(raw_ostream &OS, const_iterator
I,
662 StringRef Indent =
" ")
const;
663 void printUse(raw_ostream &OS, const_iterator
I,
664 StringRef Indent =
" ")
const;
665 void print(raw_ostream &OS)
const;
666 void dump(const_iterator
I)
const;
671 template <
typename DerivedT,
typename RetT =
void>
class BuilderBase;
674 friend class AllocaSlices::SliceBuilder;
676#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
704 SmallVector<Instruction *, 8> DeadUsers;
731 friend class AllocaSlices;
732 friend class AllocaSlices::partition_iterator;
734 using iterator = AllocaSlices::iterator;
738 uint64_t BeginOffset = 0, EndOffset = 0;
748 Partition(iterator SI) : SI(SI), SJ(SI) {}
754 uint64_t beginOffset()
const {
return BeginOffset; }
759 uint64_t endOffset()
const {
return EndOffset; }
764 uint64_t
size()
const {
765 assert(BeginOffset < EndOffset &&
"Partitions must span some bytes!");
766 return EndOffset - BeginOffset;
771 bool empty()
const {
return SI == SJ; }
782 iterator
begin()
const {
return SI; }
783 iterator
end()
const {
return SJ; }
815 AllocaSlices::iterator SE;
819 uint64_t MaxSplitSliceEndOffset = 0;
823 partition_iterator(AllocaSlices::iterator
SI, AllocaSlices::iterator SE)
835 assert((
P.SI != SE || !
P.SplitTails.empty()) &&
836 "Cannot advance past the end of the slices!");
839 if (!
P.SplitTails.empty()) {
840 if (
P.EndOffset >= MaxSplitSliceEndOffset) {
842 P.SplitTails.clear();
843 MaxSplitSliceEndOffset = 0;
849 [&](Slice *S) { return S->endOffset() <= P.EndOffset; });
852 return S->endOffset() == MaxSplitSliceEndOffset;
854 "Could not find the current max split slice offset!");
857 return S->endOffset() <= MaxSplitSliceEndOffset;
859 "Max split slice end offset is not actually the max!");
866 assert(P.SplitTails.empty() &&
"Failed to clear the split slices!");
876 if (S.isSplittable() && S.endOffset() > P.EndOffset) {
877 P.SplitTails.push_back(&S);
878 MaxSplitSliceEndOffset =
879 std::max(S.endOffset(), MaxSplitSliceEndOffset);
887 P.BeginOffset = P.EndOffset;
888 P.EndOffset = MaxSplitSliceEndOffset;
895 if (!P.SplitTails.empty() && P.SI->beginOffset() != P.EndOffset &&
896 !P.SI->isSplittable()) {
897 P.BeginOffset = P.EndOffset;
898 P.EndOffset = P.SI->beginOffset();
908 P.BeginOffset = P.SplitTails.empty() ? P.SI->beginOffset() : P.EndOffset;
909 P.EndOffset = P.SI->endOffset();
914 if (!P.SI->isSplittable()) {
917 assert(P.BeginOffset == P.SI->beginOffset());
921 while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
922 if (!P.SJ->isSplittable())
923 P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
935 assert(P.SI->isSplittable() &&
"Forming a splittable partition!");
938 while (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset &&
939 P.SJ->isSplittable()) {
940 P.EndOffset = std::max(P.EndOffset, P.SJ->endOffset());
947 if (P.SJ != SE && P.SJ->beginOffset() < P.EndOffset) {
948 assert(!P.SJ->isSplittable());
949 P.EndOffset = P.SJ->beginOffset();
956 "End iterators don't match between compared partition iterators!");
963 if (P.SI == RHS.P.SI && P.SplitTails.empty() == RHS.P.SplitTails.empty()) {
964 assert(P.SJ == RHS.P.SJ &&
965 "Same set of slices formed two different sized partitions!");
966 assert(P.SplitTails.size() == RHS.P.SplitTails.size() &&
967 "Same slice position with differently sized non-empty split "
990 return make_range(partition_iterator(begin(), end()),
991 partition_iterator(end(), end()));
999 return SI.getOperand(1 + CI->isZero());
1000 if (
SI.getOperand(1) ==
SI.getOperand(2))
1001 return SI.getOperand(1);
1010 return PN->hasConstantValue();
1041 if (VisitedDeadInsts.
insert(&
I).second)
1046 bool IsSplittable =
false) {
1052 <<
" which has zero size or starts outside of the "
1053 << AllocSize <<
" byte alloca:\n"
1054 <<
" alloca: " << AS.AI <<
"\n"
1055 <<
" use: " <<
I <<
"\n");
1056 return markAsDead(
I);
1059 uint64_t BeginOffset =
Offset.getZExtValue();
1060 uint64_t EndOffset = BeginOffset +
Size;
1068 assert(AllocSize >= BeginOffset);
1069 if (
Size > AllocSize - BeginOffset) {
1071 <<
Offset <<
" to remain within the " << AllocSize
1072 <<
" byte alloca:\n"
1073 <<
" alloca: " << AS.AI <<
"\n"
1074 <<
" use: " <<
I <<
"\n");
1075 EndOffset = AllocSize;
1078 AS.Slices.push_back(Slice(BeginOffset, EndOffset, U, IsSplittable));
1081 void visitBitCastInst(BitCastInst &BC) {
1083 return markAsDead(BC);
1085 return Base::visitBitCastInst(BC);
1088 void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
1090 return markAsDead(ASC);
1092 return Base::visitAddrSpaceCastInst(ASC);
1095 void visitGetElementPtrInst(GetElementPtrInst &GEPI) {
1097 return markAsDead(GEPI);
1099 return Base::visitGetElementPtrInst(GEPI);
1102 void handleLoadOrStore(
Type *Ty, Instruction &
I,
const APInt &
Offset,
1103 uint64_t
Size,
bool IsVolatile) {
1113 void visitLoadInst(LoadInst &LI) {
1115 "All simple FCA loads should have been pre-split");
1120 return PI.setEscapedReadOnly(&LI);
1123 if (
Size.isScalable()) {
1126 return PI.setAborted(&LI);
1135 void visitStoreInst(StoreInst &SI) {
1136 Value *ValOp =
SI.getValueOperand();
1138 return PI.setEscapedAndAborted(&SI);
1140 return PI.setAborted(&SI);
1142 TypeSize StoreSize =
DL.getTypeStoreSize(ValOp->
getType());
1144 unsigned VScale =
SI.getFunction()->getVScaleValue();
1146 return PI.setAborted(&SI);
1162 <<
Offset <<
" which extends past the end of the "
1163 << AllocSize <<
" byte alloca:\n"
1164 <<
" alloca: " << AS.AI <<
"\n"
1165 <<
" use: " << SI <<
"\n");
1166 return markAsDead(SI);
1170 "All simple FCA stores should have been pre-split");
1174 void visitMemSetInst(MemSetInst &
II) {
1175 assert(
II.getRawDest() == *U &&
"Pointer use is not the destination?");
1178 (IsOffsetKnown &&
Offset.uge(AllocSize)))
1180 return markAsDead(
II);
1183 return PI.setAborted(&
II);
1187 : AllocSize -
Offset.getLimitedValue(),
1191 void visitMemTransferInst(MemTransferInst &
II) {
1195 return markAsDead(
II);
1199 if (VisitedDeadInsts.
count(&
II))
1203 return PI.setAborted(&
II);
1210 if (
Offset.uge(AllocSize)) {
1211 auto MTPI = MemTransferSliceMap.
find(&
II);
1212 if (MTPI != MemTransferSliceMap.
end())
1213 AS.Slices[MTPI->second].kill();
1214 return markAsDead(
II);
1217 uint64_t RawOffset =
Offset.getLimitedValue();
1218 uint64_t
Size =
Length ?
Length->getLimitedValue() : AllocSize - RawOffset;
1222 if (*U ==
II.getRawDest() && *U ==
II.getRawSource()) {
1224 if (!
II.isVolatile())
1225 return markAsDead(
II);
1233 SmallDenseMap<Instruction *, unsigned>::iterator MTPI;
1234 std::tie(MTPI, Inserted) =
1235 MemTransferSliceMap.
insert(std::make_pair(&
II, AS.Slices.size()));
1236 unsigned PrevIdx = MTPI->second;
1238 Slice &PrevP = AS.Slices[PrevIdx];
1242 if (!
II.isVolatile() && PrevP.beginOffset() == RawOffset) {
1244 return markAsDead(
II);
1249 PrevP.makeUnsplittable();
1256 assert(AS.Slices[PrevIdx].getUse()->getUser() == &
II &&
1257 "Map index doesn't point back to a slice with this user.");
1263 void visitIntrinsicInst(IntrinsicInst &
II) {
1264 if (
II.isDroppable()) {
1265 AS.DeadUseIfPromotable.push_back(U);
1270 return PI.setAborted(&
II);
1272 if (
II.isLifetimeStartOrEnd()) {
1273 insertUse(
II,
Offset, AllocSize,
true);
1277 Base::visitIntrinsicInst(
II);
1280 Instruction *hasUnsafePHIOrSelectUse(Instruction *Root, uint64_t &
Size) {
1285 SmallPtrSet<Instruction *, 4> Visited;
1295 std::tie(UsedI,
I) =
Uses.pop_back_val();
1298 TypeSize LoadSize =
DL.getTypeStoreSize(LI->
getType());
1310 TypeSize StoreSize =
DL.getTypeStoreSize(
Op->getType());
1320 if (!
GEP->hasAllZeroIndices())
1327 for (User *U :
I->users())
1330 }
while (!
Uses.empty());
1335 void visitPHINodeOrSelectInst(Instruction &
I) {
1338 return markAsDead(
I);
1344 return PI.setAborted(&
I);
1362 AS.DeadOperands.push_back(U);
1368 return PI.setAborted(&
I);
1371 uint64_t &
Size = PHIOrSelectSizes[&
I];
1374 if (Instruction *UnsafeI = hasUnsafePHIOrSelectUse(&
I,
Size))
1375 return PI.setAborted(UnsafeI);
1384 if (
Offset.uge(AllocSize)) {
1385 AS.DeadOperands.push_back(U);
1392 void visitPHINode(PHINode &PN) { visitPHINodeOrSelectInst(PN); }
1394 void visitSelectInst(SelectInst &SI) { visitPHINodeOrSelectInst(SI); }
1397 void visitInstruction(Instruction &
I) { PI.setAborted(&
I); }
1399 void visitCallBase(CallBase &CB) {
1405 PI.setEscapedReadOnly(&CB);
1409 Base::visitCallBase(CB);
1413AllocaSlices::AllocaSlices(
const DataLayout &
DL, AllocaInst &AI)
1415#
if !defined(
NDEBUG) || defined(LLVM_ENABLE_DUMP)
1418 PointerEscapingInstr(nullptr), PointerEscapingInstrReadOnly(nullptr) {
1420 SliceBuilder::PtrInfo PtrI =
PB.visitPtr(AI);
1421 if (PtrI.isEscaped() || PtrI.isAborted()) {
1424 PointerEscapingInstr = PtrI.getEscapingInst() ? PtrI.getEscapingInst()
1425 : PtrI.getAbortingInst();
1426 assert(PointerEscapingInstr &&
"Did not track a bad instruction");
1429 PointerEscapingInstrReadOnly = PtrI.getEscapedReadOnlyInst();
1431 llvm::erase_if(Slices, [](
const Slice &S) {
return S.isDead(); });
1438#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1440void AllocaSlices::print(raw_ostream &OS, const_iterator
I,
1441 StringRef Indent)
const {
1442 printSlice(OS,
I, Indent);
1444 printUse(OS,
I, Indent);
1447void AllocaSlices::printSlice(raw_ostream &OS, const_iterator
I,
1448 StringRef Indent)
const {
1449 OS << Indent <<
"[" <<
I->beginOffset() <<
"," <<
I->endOffset() <<
")"
1450 <<
" slice #" << (
I -
begin())
1451 << (
I->isSplittable() ?
" (splittable)" :
"");
1454void AllocaSlices::printUse(raw_ostream &OS, const_iterator
I,
1455 StringRef Indent)
const {
1456 OS << Indent <<
" used by: " << *
I->getUse()->getUser() <<
"\n";
1459void AllocaSlices::print(raw_ostream &OS)
const {
1460 if (PointerEscapingInstr) {
1461 OS <<
"Can't analyze slices for alloca: " << AI <<
"\n"
1462 <<
" A pointer to this alloca escaped by:\n"
1463 <<
" " << *PointerEscapingInstr <<
"\n";
1467 if (PointerEscapingInstrReadOnly)
1468 OS <<
"Escapes into ReadOnly: " << *PointerEscapingInstrReadOnly <<
"\n";
1470 OS <<
"Slices of alloca: " << AI <<
"\n";
1484static std::pair<Type *, IntegerType *>
1488 bool TyIsCommon =
true;
1493 for (AllocaSlices::const_iterator
I =
B;
I !=
E; ++
I) {
1494 Use *U =
I->getUse();
1497 if (
I->beginOffset() !=
B->beginOffset() ||
I->endOffset() != EndOffset)
1500 Type *UserTy =
nullptr;
1504 UserTy =
SI->getValueOperand()->getType();
1512 if (UserITy->getBitWidth() % 8 != 0 ||
1513 UserITy->getBitWidth() / 8 > (EndOffset -
B->beginOffset()))
1518 if (!ITy || ITy->
getBitWidth() < UserITy->getBitWidth())
1524 if (!UserTy || (Ty && Ty != UserTy))
1530 return {TyIsCommon ? Ty :
nullptr, ITy};
1561 Type *LoadType =
nullptr;
1574 if (LoadType != LI->
getType())
1583 if (BBI->mayWriteToMemory())
1586 MaxAlign = std::max(MaxAlign, LI->
getAlign());
1593 APInt(APWidth,
DL.getTypeStoreSize(LoadType).getFixedValue());
1630 IRB.SetInsertPoint(&PN);
1632 PN.
getName() +
".sroa.speculated");
1662 IRB.SetInsertPoint(TI);
1664 LoadInst *Load = IRB.CreateAlignedLoad(
1665 LoadTy, InVal, Alignment,
1666 (PN.
getName() +
".sroa.speculate.load." + Pred->getName()));
1667 ++NumLoadsSpeculated;
1669 Load->setAAMetadata(AATags);
1671 InjectedLoads[Pred] = Load;
1678SelectHandSpeculativity &
1679SelectHandSpeculativity::setAsSpeculatable(
bool isTrueVal) {
1687bool SelectHandSpeculativity::isSpeculatable(
bool isTrueVal)
const {
1692bool SelectHandSpeculativity::areAllSpeculatable()
const {
1693 return isSpeculatable(
true) &&
1694 isSpeculatable(
false);
1697bool SelectHandSpeculativity::areAnySpeculatable()
const {
1698 return isSpeculatable(
true) ||
1699 isSpeculatable(
false);
1701bool SelectHandSpeculativity::areNoneSpeculatable()
const {
1702 return !areAnySpeculatable();
1705static SelectHandSpeculativity
1708 SelectHandSpeculativity
Spec;
1714 Spec.setAsSpeculatable(
Value ==
SI.getTrueValue());
1715 else if (PreserveCFG)
1721std::optional<RewriteableMemOps>
1722SROA::isSafeSelectToSpeculate(SelectInst &SI,
bool PreserveCFG) {
1723 RewriteableMemOps
Ops;
1725 for (User *U :
SI.users()) {
1733 if (
Store->isVolatile() || PreserveCFG)
1735 Ops.emplace_back(Store);
1746 PossiblySpeculatableLoad
Load(LI);
1752 Ops.emplace_back(Load);
1756 SelectHandSpeculativity Spec =
1758 if (PreserveCFG && !Spec.areAllSpeculatable())
1762 Ops.emplace_back(Load);
1772 Value *TV =
SI.getTrueValue();
1773 Value *FV =
SI.getFalseValue();
1778 IRB.SetInsertPoint(&LI);
1782 LI.
getName() +
".sroa.speculate.load.true");
1785 LI.
getName() +
".sroa.speculate.load.false");
1786 NumLoadsSpeculated += 2;
1798 Value *V = IRB.CreateSelect(
SI.getCondition(), TL, FL,
1799 LI.
getName() +
".sroa.speculated",
1806template <
typename T>
1808 SelectHandSpeculativity
Spec,
1815 if (
Spec.areNoneSpeculatable())
1817 SI.getMetadata(LLVMContext::MD_prof), &DTU);
1820 SI.getMetadata(LLVMContext::MD_prof), &DTU,
1822 if (
Spec.isSpeculatable(
true))
1833 bool IsThen = SuccBB == HeadBI->getSuccessor(0);
1834 int SuccIdx = IsThen ? 0 : 1;
1835 auto *NewMemOpBB = SuccBB ==
Tail ? Head : SuccBB;
1836 auto &CondMemOp =
cast<T>(*
I.clone());
1837 if (NewMemOpBB != Head) {
1838 NewMemOpBB->setName(Head->
getName() + (IsThen ?
".then" :
".else"));
1840 ++NumLoadsPredicated;
1842 ++NumStoresPredicated;
1844 CondMemOp.dropUBImplyingAttrsAndMetadata();
1845 ++NumLoadsSpeculated;
1847 CondMemOp.insertBefore(NewMemOpBB->getTerminator()->getIterator());
1848 Value *Ptr =
SI.getOperand(1 + SuccIdx);
1849 CondMemOp.setOperand(
I.getPointerOperandIndex(), Ptr);
1851 CondMemOp.setName(
I.getName() + (IsThen ?
".then" :
".else") +
".val");
1859 I.replaceAllUsesWith(PN);
1864 SelectHandSpeculativity
Spec,
1875 const RewriteableMemOps &
Ops,
1877 bool CFGChanged =
false;
1880 for (
const RewriteableMemOp &
Op :
Ops) {
1881 SelectHandSpeculativity
Spec;
1883 if (
auto *
const *US = std::get_if<UnspeculatableStore>(&
Op)) {
1886 auto PSL = std::get<PossiblySpeculatableLoad>(
Op);
1887 I = PSL.getPointer();
1888 Spec = PSL.getInt();
1890 if (
Spec.areAllSpeculatable()) {
1893 assert(DTU &&
"Should not get here when not allowed to modify the CFG!");
1897 I->eraseFromParent();
1902 SI.eraseFromParent();
1910 const Twine &NamePrefix) {
1912 Ptr = IRB.CreateInBoundsPtrAdd(Ptr, IRB.getInt(
Offset),
1913 NamePrefix +
"sroa_idx");
1914 return IRB.CreatePointerBitCastOrAddrSpaceCast(Ptr,
PointerTy,
1915 NamePrefix +
"sroa_cast");
1930 unsigned VScale = 0) {
1940 "We can't have the same bitwidth for different int types");
1944 TypeSize NewSize =
DL.getTypeSizeInBits(NewTy);
1945 TypeSize OldSize =
DL.getTypeSizeInBits(OldTy);
1972 if (NewSize != OldSize)
1988 return OldAS == NewAS ||
1989 (!
DL.isNonIntegralAddressSpace(OldAS) &&
1990 !
DL.isNonIntegralAddressSpace(NewAS) &&
1991 DL.getPointerSize(OldAS) ==
DL.getPointerSize(NewAS));
1997 return !
DL.isNonIntegralPointerType(NewTy);
2001 if (!
DL.isNonIntegralPointerType(OldTy))
2024 std::max(S.beginOffset(),
P.beginOffset()) -
P.beginOffset();
2025 uint64_t BeginIndex = BeginOffset / ElementSize;
2026 if (BeginIndex * ElementSize != BeginOffset ||
2029 uint64_t EndOffset = std::min(S.endOffset(),
P.endOffset()) -
P.beginOffset();
2030 uint64_t EndIndex = EndOffset / ElementSize;
2031 if (EndIndex * ElementSize != EndOffset ||
2035 assert(EndIndex > BeginIndex &&
"Empty vector!");
2036 uint64_t NumElements = EndIndex - BeginIndex;
2037 Type *SliceTy = (NumElements == 1)
2038 ? Ty->getElementType()
2044 Use *U = S.getUse();
2047 if (
MI->isVolatile())
2049 if (!S.isSplittable())
2052 if (!
II->isLifetimeStartOrEnd() && !
II->isDroppable())
2059 if (LTy->isStructTy())
2061 if (
P.beginOffset() > S.beginOffset() ||
P.endOffset() < S.endOffset()) {
2062 assert(LTy->isIntegerTy());
2068 if (
SI->isVolatile())
2070 Type *STy =
SI->getValueOperand()->getType();
2074 if (
P.beginOffset() > S.beginOffset() ||
P.endOffset() < S.endOffset()) {
2094 bool HaveCommonEltTy,
Type *CommonEltTy,
2095 bool HaveVecPtrTy,
bool HaveCommonVecPtrTy,
2096 VectorType *CommonVecPtrTy,
unsigned VScale) {
2098 if (CandidateTys.
empty())
2105 if (HaveVecPtrTy && !HaveCommonVecPtrTy)
2109 if (!HaveCommonEltTy && HaveVecPtrTy) {
2111 CandidateTys.
clear();
2113 }
else if (!HaveCommonEltTy && !HaveVecPtrTy) {
2116 if (!VTy->getElementType()->isIntegerTy())
2118 VTy->getContext(), VTy->getScalarSizeInBits())));
2125 assert(
DL.getTypeSizeInBits(RHSTy).getFixedValue() ==
2126 DL.getTypeSizeInBits(LHSTy).getFixedValue() &&
2127 "Cannot have vector types of different sizes!");
2128 assert(RHSTy->getElementType()->isIntegerTy() &&
2129 "All non-integer types eliminated!");
2130 assert(LHSTy->getElementType()->isIntegerTy() &&
2131 "All non-integer types eliminated!");
2137 assert(
DL.getTypeSizeInBits(RHSTy).getFixedValue() ==
2138 DL.getTypeSizeInBits(LHSTy).getFixedValue() &&
2139 "Cannot have vector types of different sizes!");
2140 assert(RHSTy->getElementType()->isIntegerTy() &&
2141 "All non-integer types eliminated!");
2142 assert(LHSTy->getElementType()->isIntegerTy() &&
2143 "All non-integer types eliminated!");
2147 llvm::sort(CandidateTys, RankVectorTypesComp);
2148 CandidateTys.erase(
llvm::unique(CandidateTys, RankVectorTypesEq),
2149 CandidateTys.end());
2155 assert(VTy->getElementType() == CommonEltTy &&
2156 "Unaccounted for element type!");
2157 assert(VTy == CandidateTys[0] &&
2158 "Different vector types with the same element type!");
2161 CandidateTys.resize(1);
2168 std::numeric_limits<unsigned short>::max();
2174 DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
2178 if (ElementSize % 8)
2180 assert((
DL.getTypeSizeInBits(VTy).getFixedValue() % 8) == 0 &&
2181 "vector size not a multiple of element size?");
2184 for (
const Slice &S :
P)
2188 for (
const Slice *S :
P.splitSliceTails())
2194 return VTy != CandidateTys.
end() ? *VTy :
nullptr;
2201 bool &HaveCommonEltTy,
Type *&CommonEltTy,
bool &HaveVecPtrTy,
2202 bool &HaveCommonVecPtrTy,
VectorType *&CommonVecPtrTy,
unsigned VScale) {
2204 CandidateTysCopy.
size() ? CandidateTysCopy[0] :
nullptr;
2207 for (
Type *Ty : OtherTys) {
2210 unsigned TypeSize =
DL.getTypeSizeInBits(Ty).getFixedValue();
2213 for (
VectorType *
const VTy : CandidateTysCopy) {
2215 assert(CandidateTysCopy[0] == OriginalElt &&
"Different Element");
2216 unsigned VectorSize =
DL.getTypeSizeInBits(VTy).getFixedValue();
2217 unsigned ElementSize =
2218 DL.getTypeSizeInBits(VTy->getElementType()).getFixedValue();
2222 CheckCandidateType(NewVTy);
2228 P,
DL, CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
2229 HaveCommonVecPtrTy, CommonVecPtrTy, VScale);
2248 Type *CommonEltTy =
nullptr;
2250 bool HaveVecPtrTy =
false;
2251 bool HaveCommonEltTy =
true;
2252 bool HaveCommonVecPtrTy =
true;
2253 auto CheckCandidateType = [&](
Type *Ty) {
2256 if (!CandidateTys.
empty()) {
2258 if (
DL.getTypeSizeInBits(VTy).getFixedValue() !=
2259 DL.getTypeSizeInBits(V).getFixedValue()) {
2260 CandidateTys.
clear();
2265 Type *EltTy = VTy->getElementType();
2268 CommonEltTy = EltTy;
2269 else if (CommonEltTy != EltTy)
2270 HaveCommonEltTy =
false;
2273 HaveVecPtrTy =
true;
2274 if (!CommonVecPtrTy)
2275 CommonVecPtrTy = VTy;
2276 else if (CommonVecPtrTy != VTy)
2277 HaveCommonVecPtrTy =
false;
2283 for (
const Slice &S :
P) {
2288 Ty =
SI->getValueOperand()->getType();
2292 auto CandTy = Ty->getScalarType();
2293 if (CandTy->isPointerTy() && (S.beginOffset() !=
P.beginOffset() ||
2294 S.endOffset() !=
P.endOffset())) {
2301 if (S.beginOffset() ==
P.beginOffset() && S.endOffset() ==
P.endOffset())
2302 CheckCandidateType(Ty);
2307 LoadStoreTys, CandidateTysCopy, CheckCandidateType,
P,
DL,
2308 CandidateTys, HaveCommonEltTy, CommonEltTy, HaveVecPtrTy,
2309 HaveCommonVecPtrTy, CommonVecPtrTy, VScale))
2312 CandidateTys.
clear();
2314 DeferredTys, CandidateTysCopy, CheckCandidateType,
P,
DL, CandidateTys,
2315 HaveCommonEltTy, CommonEltTy, HaveVecPtrTy, HaveCommonVecPtrTy,
2316 CommonVecPtrTy, VScale);
2327 bool &WholeAllocaOp) {
2330 uint64_t RelBegin = S.beginOffset() - AllocBeginOffset;
2331 uint64_t RelEnd = S.endOffset() - AllocBeginOffset;
2333 Use *U = S.getUse();
2340 if (
II->isLifetimeStartOrEnd() ||
II->isDroppable())
2358 if (S.beginOffset() < AllocBeginOffset)
2364 WholeAllocaOp =
true;
2366 if (ITy->getBitWidth() <
DL.getTypeStoreSizeInBits(ITy).getFixedValue())
2368 }
else if (RelBegin != 0 || RelEnd !=
Size ||
2375 Type *ValueTy =
SI->getValueOperand()->getType();
2376 if (
SI->isVolatile())
2379 TypeSize StoreSize =
DL.getTypeStoreSize(ValueTy);
2384 if (S.beginOffset() < AllocBeginOffset)
2390 WholeAllocaOp =
true;
2392 if (ITy->getBitWidth() <
DL.getTypeStoreSizeInBits(ITy).getFixedValue())
2394 }
else if (RelBegin != 0 || RelEnd !=
Size ||
2403 if (!S.isSplittable())
2420 uint64_t SizeInBits =
DL.getTypeSizeInBits(AllocaTy).getFixedValue();
2426 if (SizeInBits !=
DL.getTypeStoreSizeInBits(AllocaTy).getFixedValue())
2444 bool WholeAllocaOp =
P.empty() &&
DL.isLegalInteger(SizeInBits);
2446 for (
const Slice &S :
P)
2451 for (
const Slice *S :
P.splitSliceTails())
2456 return WholeAllocaOp;
2461 const Twine &Name) {
2465 DL.getTypeStoreSize(IntTy).getFixedValue() &&
2466 "Element extends past full value");
2468 if (
DL.isBigEndian())
2469 ShAmt = 8 * (
DL.getTypeStoreSize(IntTy).getFixedValue() -
2470 DL.getTypeStoreSize(Ty).getFixedValue() -
Offset);
2472 V = IRB.CreateLShr(V, ShAmt, Name +
".shift");
2475 assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
2476 "Cannot extract to a larger integer!");
2478 V = IRB.CreateTrunc(V, Ty, Name +
".trunc");
2488 assert(Ty->getBitWidth() <= IntTy->getBitWidth() &&
2489 "Cannot insert a larger integer!");
2492 V = IRB.CreateZExt(V, IntTy, Name +
".ext");
2496 DL.getTypeStoreSize(IntTy).getFixedValue() &&
2497 "Element store outside of alloca store");
2499 if (
DL.isBigEndian())
2500 ShAmt = 8 * (
DL.getTypeStoreSize(IntTy).getFixedValue() -
2501 DL.getTypeStoreSize(Ty).getFixedValue() -
Offset);
2503 V = IRB.CreateShl(V, ShAmt, Name +
".shift");
2507 if (ShAmt || Ty->getBitWidth() < IntTy->getBitWidth()) {
2508 APInt Mask = ~Ty->getMask().zext(IntTy->getBitWidth()).shl(ShAmt);
2509 Old = IRB.CreateAnd(Old, Mask, Name +
".mask");
2511 V = IRB.CreateOr(Old, V, Name +
".insert");
2518 unsigned EndIndex,
const Twine &Name) {
2520 unsigned NumElements = EndIndex - BeginIndex;
2523 if (NumElements == VecTy->getNumElements())
2526 if (NumElements == 1) {
2527 V = IRB.CreateExtractElement(V, IRB.getInt32(BeginIndex),
2534 V = IRB.CreateShuffleVector(V, Mask, Name +
".extract");
2540 unsigned BeginIndex,
const Twine &Name) {
2542 assert(VecTy &&
"Can only insert a vector into a vector");
2547 V = IRB.CreateInsertElement(Old, V, IRB.getInt32(BeginIndex),
2556 assert(NumSubElements <= NumElements &&
"Too many elements!");
2557 if (NumSubElements == NumElements) {
2558 assert(V->getType() == VecTy &&
"Vector type mismatch");
2561 unsigned EndIndex = BeginIndex + NumSubElements;
2568 Mask.reserve(NumElements);
2569 for (
unsigned Idx = 0; Idx != NumElements; ++Idx)
2570 if (Idx >= BeginIndex && Idx < EndIndex)
2571 Mask.push_back(Idx - BeginIndex);
2574 V = IRB.CreateShuffleVector(V, Mask, Name +
".expand");
2578 for (
unsigned Idx = 0; Idx != NumElements; ++Idx)
2579 if (Idx >= BeginIndex && Idx < EndIndex)
2580 Mask.push_back(Idx);
2582 Mask.push_back(Idx + NumElements);
2583 V = IRB.CreateShuffleVector(V, Old, Mask, Name +
"blend");
2622 const char *DebugName) {
2623 Type *EltType = VecType->getElementType();
2624 if (EltType != NewAIEltTy) {
2626 unsigned TotalBits =
2627 VecType->getNumElements() *
DL.getTypeSizeInBits(EltType);
2628 unsigned NewNumElts = TotalBits /
DL.getTypeSizeInBits(NewAIEltTy);
2631 V = Builder.CreateBitCast(V, NewVecType);
2632 VecType = NewVecType;
2633 LLVM_DEBUG(
dbgs() <<
" bitcast " << DebugName <<
": " << *V <<
"\n");
2637 BitcastIfNeeded(V0, VecType0,
"V0");
2638 BitcastIfNeeded(
V1, VecType1,
"V1");
2640 unsigned NumElts0 = VecType0->getNumElements();
2641 unsigned NumElts1 = VecType1->getNumElements();
2645 if (NumElts0 == NumElts1) {
2646 for (
unsigned i = 0; i < NumElts0 + NumElts1; ++i)
2647 ShuffleMask.push_back(i);
2651 unsigned SmallSize = std::min(NumElts0, NumElts1);
2652 unsigned LargeSize = std::max(NumElts0, NumElts1);
2653 bool IsV0Smaller = NumElts0 < NumElts1;
2654 Value *&ExtendedVec = IsV0Smaller ? V0 :
V1;
2656 for (
unsigned i = 0; i < SmallSize; ++i)
2658 for (
unsigned i = SmallSize; i < LargeSize; ++i)
2660 ExtendedVec = Builder.CreateShuffleVector(
2662 LLVM_DEBUG(
dbgs() <<
" shufflevector: " << *ExtendedVec <<
"\n");
2663 for (
unsigned i = 0; i < NumElts0; ++i)
2664 ShuffleMask.push_back(i);
2665 for (
unsigned i = 0; i < NumElts1; ++i)
2666 ShuffleMask.push_back(LargeSize + i);
2669 return Builder.CreateShuffleVector(V0,
V1, ShuffleMask);
2680class AllocaSliceRewriter :
public InstVisitor<AllocaSliceRewriter, bool> {
2682 friend class InstVisitor<AllocaSliceRewriter, bool>;
2684 using Base = InstVisitor<AllocaSliceRewriter, bool>;
2686 const DataLayout &
DL;
2689 AllocaInst &OldAI, &NewAI;
2690 const uint64_t NewAllocaBeginOffset, NewAllocaEndOffset;
2710 uint64_t ElementSize;
2714 uint64_t BeginOffset = 0;
2715 uint64_t EndOffset = 0;
2719 uint64_t NewBeginOffset = 0, NewEndOffset = 0;
2721 uint64_t SliceSize = 0;
2722 bool IsSplittable =
false;
2723 bool IsSplit =
false;
2724 Use *OldUse =
nullptr;
2728 SmallSetVector<PHINode *, 8> &PHIUsers;
2729 SmallSetVector<SelectInst *, 8> &SelectUsers;
2737 Value *getPtrToNewAI(
unsigned AddrSpace,
bool IsVolatile) {
2741 Type *AccessTy = IRB.getPtrTy(AddrSpace);
2742 return IRB.CreateAddrSpaceCast(&NewAI, AccessTy);
2746 AllocaSliceRewriter(
const DataLayout &
DL, AllocaSlices &AS, SROA &
Pass,
2747 AllocaInst &OldAI, AllocaInst &NewAI,
Type *NewAllocaTy,
2748 uint64_t NewAllocaBeginOffset,
2749 uint64_t NewAllocaEndOffset,
bool IsIntegerPromotable,
2750 VectorType *PromotableVecTy,
2751 SmallSetVector<PHINode *, 8> &PHIUsers,
2752 SmallSetVector<SelectInst *, 8> &SelectUsers)
2753 :
DL(
DL), AS(AS),
Pass(
Pass), OldAI(OldAI), NewAI(NewAI),
2754 NewAllocaBeginOffset(NewAllocaBeginOffset),
2755 NewAllocaEndOffset(NewAllocaEndOffset), NewAllocaTy(NewAllocaTy),
2756 IntTy(IsIntegerPromotable
2759 DL.getTypeSizeInBits(NewAllocaTy).getFixedValue())
2761 VecTy(PromotableVecTy),
2762 ElementTy(VecTy ? VecTy->getElementType() : nullptr),
2763 ElementSize(VecTy ?
DL.getTypeSizeInBits(ElementTy).getFixedValue() / 8
2765 PHIUsers(PHIUsers), SelectUsers(SelectUsers),
2768 assert((
DL.getTypeSizeInBits(ElementTy).getFixedValue() % 8) == 0 &&
2769 "Only multiple-of-8 sized vector elements are viable");
2772 assert((!IntTy && !VecTy) || (IntTy && !VecTy) || (!IntTy && VecTy));
2775 bool visit(AllocaSlices::const_iterator
I) {
2776 bool CanSROA =
true;
2777 BeginOffset =
I->beginOffset();
2778 EndOffset =
I->endOffset();
2779 IsSplittable =
I->isSplittable();
2781 BeginOffset < NewAllocaBeginOffset || EndOffset > NewAllocaEndOffset;
2782 LLVM_DEBUG(
dbgs() <<
" rewriting " << (IsSplit ?
"split " :
""));
2787 assert(BeginOffset < NewAllocaEndOffset);
2788 assert(EndOffset > NewAllocaBeginOffset);
2789 NewBeginOffset = std::max(BeginOffset, NewAllocaBeginOffset);
2790 NewEndOffset = std::min(EndOffset, NewAllocaEndOffset);
2792 SliceSize = NewEndOffset - NewBeginOffset;
2793 LLVM_DEBUG(
dbgs() <<
" Begin:(" << BeginOffset <<
", " << EndOffset
2794 <<
") NewBegin:(" << NewBeginOffset <<
", "
2795 << NewEndOffset <<
") NewAllocaBegin:("
2796 << NewAllocaBeginOffset <<
", " << NewAllocaEndOffset
2798 assert(IsSplit || NewBeginOffset == BeginOffset);
2799 OldUse =
I->getUse();
2803 IRB.SetInsertPoint(OldUserI);
2804 IRB.SetCurrentDebugLocation(OldUserI->
getDebugLoc());
2806 if (!IRB.getContext().shouldDiscardValueNames())
2807 IRB.getInserter().SetNamePrefix(Twine(NewAI.
getName()) +
"." +
2808 Twine(BeginOffset) +
".");
2870 std::optional<SmallVector<Value *, 4>>
2871 rewriteTreeStructuredMerge(Partition &
P) {
2873 if (
P.splitSliceTails().size() > 0)
2874 return std::nullopt;
2879 uint64_t BeginOffset;
2882 StoreInfo(StoreInst *SI, uint64_t Begin, uint64_t End,
Value *Val)
2883 :
Store(
SI), BeginOffset(Begin), EndOffset(End), StoredValue(Val) {}
2887 uint64_t BeginOffset;
2893 LoadInst *FullLoad =
nullptr;
2894 StoreInst *InitStore =
nullptr;
2898 Type *AllocatedEltTy =
2902 unsigned AllocatedEltTySize =
DL.getTypeSizeInBits(AllocatedEltTy);
2909 auto IsTypeValidForTreeStructuredMerge = [&](
Type *Ty) ->
bool {
2911 return FixedVecTy &&
2912 DL.getTypeSizeInBits(FixedVecTy->getElementType()) % 8 == 0 &&
2913 !FixedVecTy->getElementType()->isPointerTy();
2916 for (Slice &S :
P) {
2920 bool IsFullWidth = (S.beginOffset() == NewAllocaBeginOffset &&
2921 S.endOffset() == NewAllocaEndOffset);
2925 !IsTypeValidForTreeStructuredMerge(LI->
getType()))
2926 return std::nullopt;
2931 return std::nullopt;
2935 LoadInfos.
push_back({LI, S.beginOffset(), S.endOffset()});
2947 if (!
SI->isSimple() || !IsTypeValidForTreeStructuredMerge(
2948 SI->getValueOperand()->getType()))
2949 return std::nullopt;
2951 unsigned NumElts = StVecTy->getNumElements();
2952 unsigned EltSize =
DL.getTypeSizeInBits(StVecTy->getElementType());
2953 if (NumElts * EltSize % AllocatedEltTySize != 0)
2954 return std::nullopt;
2959 return std::nullopt;
2962 StoreInfos.
emplace_back(SI, S.beginOffset(), S.endOffset(),
2963 SI->getValueOperand());
2968 return std::nullopt;
2975 if (StoreInfos.
size() < 2)
2976 return std::nullopt;
2984 bool IsRMWPattern = InitStore && VecTy && !LoadInfos.
empty();
2985 bool IsStoresOnlyPattern = !InitStore && FullLoad && LoadInfos.
empty();
2986 if (!IsRMWPattern && !IsStoresOnlyPattern)
2987 return std::nullopt;
2991 BasicBlock *StoreBB = StoreInfos[0].Store->getParent();
2992 for (
auto &Info : StoreInfos)
2993 if (
Info.Store->getParent() != StoreBB)
2994 return std::nullopt;
2996 SmallVector<Value *, 4> DeletedValues;
3003 auto TreeMerge = [&](SmallVectorImpl<Value *> &Vals,
3006 while (Vals.
size() > 1) {
3007 SmallVector<Value *, 8>
Next;
3008 for (
unsigned I = 0,
E = Vals.
size();
I + 1 <
E;
I += 2) {
3014 if (Vals.
size() % 2 == 1)
3016 Vals = std::move(
Next);
3025 auto ReplaceFullLoad = [&](LoadInst *LoadToReplace,
Value *Merged) {
3027 Value *NewLoad = LoadBuilder.CreateAlignedLoad(
3028 Merged->getType(), &NewAI, getSliceAlign(),
3030 LoadToReplace->
getName() +
".sroa.new.load");
3032 NewLoad = LoadBuilder.CreateBitCast(NewLoad, LoadToReplace->
getType());
3037 if (IsStoresOnlyPattern) {
3040 llvm::sort(StoreInfos, [](
const StoreInfo &
A,
const StoreInfo &
B) {
3041 return A.BeginOffset <
B.BeginOffset;
3046 uint64_t Expected = NewAllocaBeginOffset;
3047 for (
auto &Info : StoreInfos) {
3048 if (
Info.BeginOffset != Expected)
3049 return std::nullopt;
3050 Expected =
Info.EndOffset;
3053 if (Expected != NewAllocaEndOffset)
3054 return std::nullopt;
3064 if (LoadBB == StoreBB) {
3065 for (
auto &Info : StoreInfos)
3066 if (!
Info.Store->comesBefore(FullLoad))
3067 return std::nullopt;
3071 dbgs() <<
"Tree structured merge rewrite (stores-only):\n";
3072 dbgs() <<
" Load: " << *FullLoad <<
"\n Ordered stores:\n";
3073 for (
auto [
I, Info] :
enumerate(StoreInfos)) {
3074 dbgs() <<
" [" <<
I <<
"] Range[" <<
Info.BeginOffset <<
", "
3075 <<
Info.EndOffset <<
") \tStore: " << *
Info.Store
3076 <<
"\tValue: " << *
Info.StoredValue <<
"\n";
3089 SmallVector<Value *, 8> Vals;
3090 for (
const auto &Info : StoreInfos) {
3095 Value *Merged = TreeMerge(Vals, Builder);
3096 Builder.CreateAlignedStore(Merged, &NewAI, getSliceAlign());
3099 ReplaceFullLoad(FullLoad, Merged);
3100 return DeletedValues;
3108 return std::nullopt;
3109 if (
any_of(LoadInfos, [&](
const LoadInfo &
I) {
3110 return I.Load->getParent() != StoreBB;
3112 return std::nullopt;
3124 uint64_t BeginOffset, EndOffset;
3128 Accesses.reserve(LoadInfos.
size() + StoreInfos.size());
3129 for (
const auto &L : LoadInfos)
3130 Accesses.push_back({
L.Load,
L.BeginOffset,
L.EndOffset,
false});
3131 for (
const auto &S : StoreInfos)
3132 Accesses.push_back({S.Store, S.BeginOffset, S.EndOffset,
true});
3134 return A.Inst->comesBefore(
B.Inst);
3142 return std::nullopt;
3148 if (FullLoad && FullLoad->
getParent() == StoreBB &&
3149 !
Accesses.back().Inst->comesBefore(FullLoad))
3150 return std::nullopt;
3161 using SliceRange = std::pair<uint64_t, uint64_t>;
3165 SortedRanges.
emplace_back(Acc.BeginOffset, Acc.EndOffset);
3169 uint64_t Expected = NewAllocaBeginOffset;
3170 for (
auto &
Range : SortedRanges) {
3171 if (
Range.first != Expected)
3172 return std::nullopt;
3173 Expected =
Range.second;
3175 if (Expected != NewAllocaEndOffset)
3176 return std::nullopt;
3179 dbgs() <<
"Tree structured merge rewrite (RMW):\n";
3180 dbgs() <<
" Init store: " << *InitStore <<
"\n";
3182 dbgs() <<
" Final load: " << *FullLoad <<
"\n";
3183 dbgs() <<
" Slice ranges (" << SortedRanges.size() <<
"):\n";
3184 for (
auto &
Range : SortedRanges)
3195 if (InitVec->
getType() != NewAllocaTy)
3196 InitVec = IRB.CreateBitCast(InitVec, NewAllocaTy,
"init.cast");
3197 DenseMap<SliceRange, Value *> SliceValues;
3198 for (
auto &
Range : SortedRanges) {
3199 unsigned BeginIdx = getIndex(
Range.first);
3200 unsigned EndIdx = getIndex(
Range.second);
3201 SliceValues[
Range] = IRB.CreateShuffleVector(
3217 SliceRange
Range{Acc.BeginOffset, Acc.EndOffset};
3220 if (
V->getType() != Acc.Inst->getType()) {
3222 V = IRB.CreateBitCast(V, Acc.Inst->getType());
3224 Acc.Inst->replaceAllUsesWith(V);
3241 SmallVector<Value *, 8> Vals;
3242 for (
auto &
Range : SortedRanges)
3244 Value *Merged = TreeMerge(Vals, Builder);
3245 Builder.CreateAlignedStore(Merged, &NewAI, getSliceAlign());
3250 ReplaceFullLoad(FullLoad, Merged);
3252 return DeletedValues;
3260 bool visitInstruction(Instruction &
I) {
3268 assert(IsSplit || BeginOffset == NewBeginOffset);
3269 uint64_t
Offset = NewBeginOffset - NewAllocaBeginOffset;
3271 StringRef OldName = OldPtr->
getName();
3273 size_t LastSROAPrefix = OldName.
rfind(
".sroa.");
3275 OldName = OldName.
substr(LastSROAPrefix + strlen(
".sroa."));
3280 OldName = OldName.
substr(IndexEnd + 1);
3284 OldName = OldName.
substr(OffsetEnd + 1);
3288 OldName = OldName.
substr(0, OldName.
find(
".sroa_"));
3300 Align getSliceAlign() {
3302 NewBeginOffset - NewAllocaBeginOffset);
3305 unsigned getIndex(uint64_t
Offset) {
3306 assert(VecTy &&
"Can only call getIndex when rewriting a vector");
3307 uint64_t RelOffset =
Offset - NewAllocaBeginOffset;
3308 assert(RelOffset / ElementSize < UINT32_MAX &&
"Index out of bounds");
3309 uint32_t
Index = RelOffset / ElementSize;
3310 assert(Index * ElementSize == RelOffset);
3314 void deleteIfTriviallyDead(
Value *V) {
3317 Pass.DeadInsts.push_back(
I);
3320 Value *rewriteVectorizedLoadInst(LoadInst &LI) {
3321 unsigned BeginIndex = getIndex(NewBeginOffset);
3322 unsigned EndIndex = getIndex(NewEndOffset);
3323 assert(EndIndex > BeginIndex &&
"Empty vector!");
3326 IRB.CreateAlignedLoad(NewAllocaTy, &NewAI, NewAI.
getAlign(),
"load");
3328 Load->copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access,
3329 LLVMContext::MD_access_group});
3330 return extractVector(IRB, Load, BeginIndex, EndIndex,
"vec");
3333 Value *rewriteIntegerLoad(LoadInst &LI) {
3334 assert(IntTy &&
"We cannot insert an integer to the alloca");
3337 IRB.CreateAlignedLoad(NewAllocaTy, &NewAI, NewAI.
getAlign(),
"load");
3338 V = IRB.CreateBitPreservingCastChain(
DL, V, IntTy);
3339 assert(NewBeginOffset >= NewAllocaBeginOffset &&
"Out of bounds offset");
3340 uint64_t
Offset = NewBeginOffset - NewAllocaBeginOffset;
3341 if (
Offset > 0 || NewEndOffset < NewAllocaEndOffset) {
3342 IntegerType *ExtractTy = Type::getIntNTy(LI.
getContext(), SliceSize * 8);
3351 "Can only handle an extract for an overly wide load");
3353 V = IRB.CreateZExt(V, LI.
getType());
3357 bool visitLoadInst(LoadInst &LI) {
3366 Type *TargetTy = IsSplit ? Type::getIntNTy(LI.
getContext(), SliceSize * 8)
3368 bool IsPtrAdjusted =
false;
3371 V = rewriteVectorizedLoadInst(LI);
3373 V = rewriteIntegerLoad(LI);
3374 }
else if (NewBeginOffset == NewAllocaBeginOffset &&
3375 NewEndOffset == NewAllocaEndOffset &&
3378 DL.getTypeStoreSize(TargetTy).getFixedValue() > SliceSize &&
3381 getPtrToNewAI(LI.getPointerAddressSpace(), LI.isVolatile());
3382 LoadInst *NewLI = IRB.CreateAlignedLoad(
3383 NewAllocaTy, NewPtr, NewAI.getAlign(), LI.isVolatile(), LI.getName());
3384 if (LI.isVolatile())
3385 NewLI->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
3386 if (NewLI->isAtomic())
3387 NewLI->setAlignment(LI.getAlign());
3392 copyMetadataForLoad(*NewLI, LI);
3396 NewLI->setAAMetadata(AATags.adjustForAccess(
3397 NewBeginOffset - BeginOffset, NewLI->getType(), DL));
3405 if (auto *AITy = dyn_cast<IntegerType>(NewAllocaTy))
3406 if (auto *TITy = dyn_cast<IntegerType>(TargetTy))
3407 if (AITy->getBitWidth() < TITy->getBitWidth()) {
3408 V = IRB.CreateZExt(V, TITy,
"load.ext");
3409 if (DL.isBigEndian())
3410 V = IRB.CreateShl(V, TITy->getBitWidth() - AITy->getBitWidth(),
3414 Type *LTy = IRB.getPtrTy(AS);
3416 IRB.CreateAlignedLoad(TargetTy, getNewAllocaSlicePtr(IRB, LTy),
3421 NewBeginOffset - BeginOffset, NewLI->
getType(),
DL));
3425 NewLI->
copyMetadata(LI, {LLVMContext::MD_mem_parallel_loop_access,
3426 LLVMContext::MD_access_group});
3429 IsPtrAdjusted =
true;
3431 V = IRB.CreateBitPreservingCastChain(
DL, V, TargetTy);
3436 "Only integer type loads and stores are split");
3437 assert(SliceSize <
DL.getTypeStoreSize(LI.
getType()).getFixedValue() &&
3438 "Split load isn't smaller than original load");
3440 "Non-byte-multiple bit width");
3446 LIIt.setHeadBit(
true);
3447 IRB.SetInsertPoint(LI.
getParent(), LIIt);
3452 Value *Placeholder =
3458 Placeholder->replaceAllUsesWith(&LI);
3459 Placeholder->deleteValue();
3464 Pass.DeadInsts.push_back(&LI);
3465 deleteIfTriviallyDead(OldOp);
3470 bool rewriteVectorizedStoreInst(
Value *V, StoreInst &SI,
Value *OldOp,
3475 if (
V->getType() != VecTy) {
3476 unsigned BeginIndex = getIndex(NewBeginOffset);
3477 unsigned EndIndex = getIndex(NewEndOffset);
3478 assert(EndIndex > BeginIndex &&
"Empty vector!");
3479 unsigned NumElements = EndIndex - BeginIndex;
3481 "Too many elements!");
3482 Type *SliceTy = (NumElements == 1)
3484 : FixedVectorType::
get(ElementTy, NumElements);
3485 if (
V->getType() != SliceTy)
3486 V = IRB.CreateBitPreservingCastChain(
DL, V, SliceTy);
3490 IRB.CreateAlignedLoad(NewAllocaTy, &NewAI, NewAI.
getAlign(),
"load");
3493 StoreInst *
Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.
getAlign());
3494 Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
3495 LLVMContext::MD_access_group});
3499 Pass.DeadInsts.push_back(&SI);
3503 Store,
Store->getPointerOperand(), OrigV,
DL);
3508 bool rewriteIntegerStore(
Value *V, StoreInst &SI, AAMDNodes AATags) {
3509 assert(IntTy &&
"We cannot extract an integer from the alloca");
3511 if (
DL.getTypeSizeInBits(
V->getType()).getFixedValue() !=
3513 Value *Old = IRB.CreateAlignedLoad(NewAllocaTy, &NewAI, NewAI.
getAlign(),
3515 Old = IRB.CreateBitPreservingCastChain(
DL, Old, IntTy);
3516 assert(BeginOffset >= NewAllocaBeginOffset &&
"Out of bounds offset");
3517 uint64_t
Offset = BeginOffset - NewAllocaBeginOffset;
3520 V = IRB.CreateBitPreservingCastChain(
DL, V, NewAllocaTy);
3521 StoreInst *
Store = IRB.CreateAlignedStore(V, &NewAI, NewAI.
getAlign());
3522 Store->copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
3523 LLVMContext::MD_access_group});
3529 Store,
Store->getPointerOperand(),
3530 Store->getValueOperand(),
DL);
3532 Pass.DeadInsts.push_back(&SI);
3537 bool visitStoreInst(StoreInst &SI) {
3539 Value *OldOp =
SI.getOperand(1);
3542 AAMDNodes AATags =
SI.getAAMetadata();
3547 if (
V->getType()->isPointerTy())
3549 Pass.PostPromotionWorklist.insert(AI);
3551 TypeSize StoreSize =
DL.getTypeStoreSize(
V->getType());
3554 assert(
V->getType()->isIntegerTy() &&
3555 "Only integer type loads and stores are split");
3556 assert(
DL.typeSizeEqualsStoreSize(
V->getType()) &&
3557 "Non-byte-multiple bit width");
3558 IntegerType *NarrowTy = Type::getIntNTy(
SI.getContext(), SliceSize * 8);
3564 return rewriteVectorizedStoreInst(V, SI, OldOp, AATags);
3565 if (IntTy &&
V->getType()->isIntegerTy())
3566 return rewriteIntegerStore(V, SI, AATags);
3569 if (NewBeginOffset == NewAllocaBeginOffset &&
3570 NewEndOffset == NewAllocaEndOffset &&
3572 V = IRB.CreateBitPreservingCastChain(
DL, V, NewAllocaTy);
3574 getPtrToNewAI(
SI.getPointerAddressSpace(),
SI.isVolatile());
3577 IRB.CreateAlignedStore(V, NewPtr, NewAI.
getAlign(),
SI.isVolatile());
3579 unsigned AS =
SI.getPointerAddressSpace();
3580 Value *NewPtr = getNewAllocaSlicePtr(IRB, IRB.getPtrTy(AS));
3582 IRB.CreateAlignedStore(V, NewPtr, getSliceAlign(),
SI.isVolatile());
3584 NewSI->
copyMetadata(SI, {LLVMContext::MD_mem_parallel_loop_access,
3585 LLVMContext::MD_access_group});
3589 if (
SI.isVolatile())
3598 Pass.DeadInsts.push_back(&SI);
3599 deleteIfTriviallyDead(OldOp);
3617 assert(
Size > 0 &&
"Expected a positive number of bytes.");
3625 IRB.CreateZExt(V, SplatIntTy,
"zext"),
3635 V = IRB.CreateVectorSplat(NumElements, V,
"vsplat");
3640 bool visitMemSetInst(MemSetInst &
II) {
3644 AAMDNodes AATags =
II.getAAMetadata();
3650 assert(NewBeginOffset == BeginOffset);
3651 II.setDest(getNewAllocaSlicePtr(IRB, OldPtr->
getType()));
3652 II.setDestAlignment(getSliceAlign());
3657 "AT: Unexpected link to non-const GEP");
3658 deleteIfTriviallyDead(OldPtr);
3663 Pass.DeadInsts.push_back(&
II);
3667 const bool CanContinue = [&]() {
3670 if (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset)
3674 const uint64_t
Len =
C->getLimitedValue();
3675 if (Len > std::numeric_limits<unsigned>::max())
3677 auto *Int8Ty = IntegerType::getInt8Ty(NewAI.
getContext());
3680 DL.isLegalInteger(
DL.getTypeSizeInBits(ScalarTy).getFixedValue());
3686 Type *SizeTy =
II.getLength()->getType();
3687 unsigned Sz = NewEndOffset - NewBeginOffset;
3690 getNewAllocaSlicePtr(IRB, OldPtr->
getType()),
II.getValue(),
Size,
3691 MaybeAlign(getSliceAlign()),
II.isVolatile()));
3697 New,
New->getRawDest(),
nullptr,
DL);
3712 assert(ElementTy == ScalarTy);
3714 unsigned BeginIndex = getIndex(NewBeginOffset);
3715 unsigned EndIndex = getIndex(NewEndOffset);
3716 assert(EndIndex > BeginIndex &&
"Empty vector!");
3717 unsigned NumElements = EndIndex - BeginIndex;
3719 "Too many elements!");
3722 II.getValue(),
DL.getTypeSizeInBits(ElementTy).getFixedValue() / 8);
3723 Splat = IRB.CreateBitPreservingCastChain(
DL,
Splat, ElementTy);
3724 if (NumElements > 1)
3727 Value *Old = IRB.CreateAlignedLoad(NewAllocaTy, &NewAI, NewAI.
getAlign(),
3735 uint64_t
Size = NewEndOffset - NewBeginOffset;
3736 V = getIntegerSplat(
II.getValue(),
Size);
3738 if (IntTy && (NewBeginOffset != NewAllocaBeginOffset ||
3739 NewEndOffset != NewAllocaEndOffset)) {
3740 Value *Old = IRB.CreateAlignedLoad(NewAllocaTy, &NewAI,
3742 Old = IRB.CreateBitPreservingCastChain(
DL, Old, IntTy);
3743 uint64_t
Offset = NewBeginOffset - NewAllocaBeginOffset;
3746 assert(
V->getType() == IntTy &&
3747 "Wrong type for an alloca wide integer!");
3749 V = IRB.CreateBitPreservingCastChain(
DL, V, NewAllocaTy);
3752 assert(NewBeginOffset == NewAllocaBeginOffset);
3753 assert(NewEndOffset == NewAllocaEndOffset);
3755 V = getIntegerSplat(
II.getValue(),
3756 DL.getTypeSizeInBits(ScalarTy).getFixedValue() / 8);
3761 V = IRB.CreateBitPreservingCastChain(
DL, V, NewAllocaTy);
3764 Value *NewPtr = getPtrToNewAI(
II.getDestAddressSpace(),
II.isVolatile());
3766 IRB.CreateAlignedStore(V, NewPtr, NewAI.
getAlign(),
II.isVolatile());
3767 New->copyMetadata(
II, {LLVMContext::MD_mem_parallel_loop_access,
3768 LLVMContext::MD_access_group});
3774 New,
New->getPointerOperand(), V,
DL);
3777 return !
II.isVolatile();
3780 bool visitMemTransferInst(MemTransferInst &
II) {
3786 AAMDNodes AATags =
II.getAAMetadata();
3788 bool IsDest = &
II.getRawDestUse() == OldUse;
3789 assert((IsDest &&
II.getRawDest() == OldPtr) ||
3790 (!IsDest &&
II.getRawSource() == OldPtr));
3792 Align SliceAlign = getSliceAlign();
3800 if (!IsSplittable) {
3801 Value *AdjustedPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
3806 DbgAssign->getAddress() ==
II.getDest())
3807 DbgAssign->replaceVariableLocationOp(
II.getDest(), AdjustedPtr);
3809 II.setDest(AdjustedPtr);
3810 II.setDestAlignment(SliceAlign);
3812 II.setSource(AdjustedPtr);
3813 II.setSourceAlignment(SliceAlign);
3817 deleteIfTriviallyDead(OldPtr);
3830 (BeginOffset > NewAllocaBeginOffset || EndOffset < NewAllocaEndOffset ||
3831 SliceSize !=
DL.getTypeStoreSize(NewAllocaTy).getFixedValue() ||
3832 !
DL.typeSizeEqualsStoreSize(NewAllocaTy) ||
3838 if (EmitMemCpy && &OldAI == &NewAI) {
3840 assert(NewBeginOffset == BeginOffset);
3843 if (NewEndOffset != EndOffset)
3844 II.setLength(NewEndOffset - NewBeginOffset);
3848 Pass.DeadInsts.push_back(&
II);
3852 Value *OtherPtr = IsDest ?
II.getRawSource() :
II.getRawDest();
3853 if (AllocaInst *AI =
3855 assert(AI != &OldAI && AI != &NewAI &&
3856 "Splittable transfers cannot reach the same alloca on both ends.");
3857 Pass.Worklist.insert(AI);
3864 unsigned OffsetWidth =
DL.getIndexSizeInBits(OtherAS);
3865 APInt OtherOffset(OffsetWidth, NewBeginOffset - BeginOffset);
3867 (IsDest ?
II.getSourceAlign() :
II.getDestAlign()).valueOrOne();
3869 commonAlignment(OtherAlign, OtherOffset.zextOrTrunc(64).getZExtValue());
3877 Value *OurPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
3878 Type *SizeTy =
II.getLength()->getType();
3879 Constant *
Size = ConstantInt::get(SizeTy, NewEndOffset - NewBeginOffset);
3881 Value *DestPtr, *SrcPtr;
3882 MaybeAlign DestAlign, SrcAlign;
3886 DestAlign = SliceAlign;
3888 SrcAlign = OtherAlign;
3891 DestAlign = OtherAlign;
3893 SrcAlign = SliceAlign;
3895 CallInst *
New = IRB.CreateMemCpy(DestPtr, DestAlign, SrcPtr, SrcAlign,
3898 New->setAAMetadata(AATags.
shift(NewBeginOffset - BeginOffset));
3903 &
II, New, DestPtr,
nullptr,
DL);
3908 SliceSize * 8, &
II, New, DestPtr,
nullptr,
DL);
3914 bool IsWholeAlloca = NewBeginOffset == NewAllocaBeginOffset &&
3915 NewEndOffset == NewAllocaEndOffset;
3916 uint64_t
Size = NewEndOffset - NewBeginOffset;
3917 unsigned BeginIndex = VecTy ? getIndex(NewBeginOffset) : 0;
3918 unsigned EndIndex = VecTy ? getIndex(NewEndOffset) : 0;
3919 unsigned NumElements = EndIndex - BeginIndex;
3920 IntegerType *SubIntTy =
3921 IntTy ? Type::getIntNTy(IntTy->
getContext(),
Size * 8) : nullptr;
3926 if (VecTy && !IsWholeAlloca) {
3927 if (NumElements == 1)
3928 OtherTy = VecTy->getElementType();
3931 }
else if (IntTy && !IsWholeAlloca) {
3934 OtherTy = NewAllocaTy;
3939 MaybeAlign SrcAlign = OtherAlign;
3940 MaybeAlign DstAlign = SliceAlign;
3948 DstPtr = getPtrToNewAI(
II.getDestAddressSpace(),
II.isVolatile());
3952 SrcPtr = getPtrToNewAI(
II.getSourceAddressSpace(),
II.isVolatile());
3956 if (VecTy && !IsWholeAlloca && !IsDest) {
3958 IRB.CreateAlignedLoad(NewAllocaTy, &NewAI, NewAI.
getAlign(),
"load");
3960 }
else if (IntTy && !IsWholeAlloca && !IsDest) {
3962 IRB.CreateAlignedLoad(NewAllocaTy, &NewAI, NewAI.
getAlign(),
"load");
3963 Src = IRB.CreateBitPreservingCastChain(
DL, Src, IntTy);
3964 uint64_t
Offset = NewBeginOffset - NewAllocaBeginOffset;
3967 LoadInst *
Load = IRB.CreateAlignedLoad(OtherTy, SrcPtr, SrcAlign,
3968 II.isVolatile(),
"copyload");
3969 Load->copyMetadata(
II, {LLVMContext::MD_mem_parallel_loop_access,
3970 LLVMContext::MD_access_group});
3977 if (VecTy && !IsWholeAlloca && IsDest) {
3978 Value *Old = IRB.CreateAlignedLoad(NewAllocaTy, &NewAI, NewAI.
getAlign(),
3981 }
else if (IntTy && !IsWholeAlloca && IsDest) {
3982 Value *Old = IRB.CreateAlignedLoad(NewAllocaTy, &NewAI, NewAI.
getAlign(),
3984 Old = IRB.CreateBitPreservingCastChain(
DL, Old, IntTy);
3985 uint64_t
Offset = NewBeginOffset - NewAllocaBeginOffset;
3987 Src = IRB.CreateBitPreservingCastChain(
DL, Src, NewAllocaTy);
3991 IRB.CreateAlignedStore(Src, DstPtr, DstAlign,
II.isVolatile()));
3992 Store->copyMetadata(
II, {LLVMContext::MD_mem_parallel_loop_access,
3993 LLVMContext::MD_access_group});
3996 Src->getType(),
DL));
4002 Store, DstPtr, Src,
DL);
4007 &
II, Store, DstPtr, Src,
DL);
4011 return !
II.isVolatile();
4014 bool visitIntrinsicInst(IntrinsicInst &
II) {
4015 assert((
II.isLifetimeStartOrEnd() ||
II.isDroppable()) &&
4016 "Unexpected intrinsic!");
4020 Pass.DeadInsts.push_back(&
II);
4022 if (
II.isDroppable()) {
4023 assert(
II.getIntrinsicID() == Intrinsic::assume &&
"Expected assume");
4029 assert(
II.getArgOperand(0) == OldPtr);
4033 if (
II.getIntrinsicID() == Intrinsic::lifetime_start)
4034 New = IRB.CreateLifetimeStart(Ptr);
4036 New = IRB.CreateLifetimeEnd(Ptr);
4044 void fixLoadStoreAlign(Instruction &Root) {
4048 SmallPtrSet<Instruction *, 4> Visited;
4049 SmallVector<Instruction *, 4>
Uses;
4051 Uses.push_back(&Root);
4060 SI->setAlignment(std::min(
SI->getAlign(), getSliceAlign()));
4067 for (User *U :
I->users())
4070 }
while (!
Uses.empty());
4073 bool visitPHINode(PHINode &PN) {
4075 assert(BeginOffset >= NewAllocaBeginOffset &&
"PHIs are unsplittable");
4076 assert(EndOffset <= NewAllocaEndOffset &&
"PHIs are unsplittable");
4082 IRBuilderBase::InsertPointGuard Guard(IRB);
4085 OldPtr->
getParent()->getFirstInsertionPt());
4087 IRB.SetInsertPoint(OldPtr);
4088 IRB.SetCurrentDebugLocation(OldPtr->
getDebugLoc());
4090 Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
4095 deleteIfTriviallyDead(OldPtr);
4098 fixLoadStoreAlign(PN);
4107 bool visitSelectInst(SelectInst &SI) {
4109 assert((
SI.getTrueValue() == OldPtr ||
SI.getFalseValue() == OldPtr) &&
4110 "Pointer isn't an operand!");
4111 assert(BeginOffset >= NewAllocaBeginOffset &&
"Selects are unsplittable");
4112 assert(EndOffset <= NewAllocaEndOffset &&
"Selects are unsplittable");
4114 Value *NewPtr = getNewAllocaSlicePtr(IRB, OldPtr->
getType());
4116 if (
SI.getOperand(1) == OldPtr)
4117 SI.setOperand(1, NewPtr);
4118 if (
SI.getOperand(2) == OldPtr)
4119 SI.setOperand(2, NewPtr);
4122 deleteIfTriviallyDead(OldPtr);
4125 fixLoadStoreAlign(SI);
4140class AggLoadStoreRewriter :
public InstVisitor<AggLoadStoreRewriter, bool> {
4142 friend class InstVisitor<AggLoadStoreRewriter, bool>;
4148 SmallPtrSet<User *, 8> Visited;
4155 const DataLayout &
DL;
4160 AggLoadStoreRewriter(
const DataLayout &
DL, IRBuilderTy &IRB)
4161 :
DL(
DL), IRB(IRB) {}
4165 bool rewrite(Instruction &
I) {
4169 while (!
Queue.empty()) {
4170 U =
Queue.pop_back_val();
4179 void enqueueUsers(Instruction &
I) {
4180 for (Use &U :
I.uses())
4181 if (Visited.
insert(
U.getUser()).second)
4182 Queue.push_back(&U);
4186 bool visitInstruction(Instruction &
I) {
return false; }
4189 template <
typename Derived>
class OpSplitter {
4196 SmallVector<unsigned, 4> Indices;
4200 SmallVector<Value *, 4> GEPIndices;
4214 const DataLayout &
DL;
4218 OpSplitter(Instruction *InsertionPoint,
Value *Ptr,
Type *BaseTy,
4219 Align BaseAlign,
const DataLayout &
DL, IRBuilderTy &IRB)
4220 : IRB(IRB), GEPIndices(1, IRB.getInt32(0)), Ptr(Ptr), BaseTy(BaseTy),
4221 BaseAlign(BaseAlign),
DL(
DL) {
4222 IRB.SetInsertPoint(InsertionPoint);
4239 void emitSplitOps(
Type *Ty,
Value *&Agg,
const Twine &Name) {
4241 unsigned Offset =
DL.getIndexedOffsetInType(BaseTy, GEPIndices);
4242 return static_cast<Derived *
>(
this)->emitFunc(
4247 unsigned OldSize = Indices.
size();
4249 for (
unsigned Idx = 0,
Size = ATy->getNumElements(); Idx !=
Size;
4251 assert(Indices.
size() == OldSize &&
"Did not return to the old size");
4253 GEPIndices.
push_back(IRB.getInt32(Idx));
4254 emitSplitOps(ATy->getElementType(), Agg, Name +
"." + Twine(Idx));
4262 unsigned OldSize = Indices.
size();
4264 for (
unsigned Idx = 0,
Size = STy->getNumElements(); Idx !=
Size;
4266 assert(Indices.
size() == OldSize &&
"Did not return to the old size");
4268 GEPIndices.
push_back(IRB.getInt32(Idx));
4269 emitSplitOps(STy->getElementType(Idx), Agg, Name +
"." + Twine(Idx));
4280 struct LoadOpSplitter :
public OpSplitter<LoadOpSplitter> {
4284 SmallVector<Value *, 4> Components;
4289 LoadOpSplitter(Instruction *InsertionPoint,
Value *Ptr,
Type *BaseTy,
4290 AAMDNodes AATags, Align BaseAlign,
const DataLayout &
DL,
4292 : OpSplitter<LoadOpSplitter>(InsertionPoint, Ptr, BaseTy, BaseAlign,
DL,
4298 void emitFunc(
Type *Ty,
Value *&Agg, Align Alignment,
const Twine &Name) {
4302 IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name +
".gep");
4304 IRB.CreateAlignedLoad(Ty,
GEP, Alignment, Name +
".load");
4310 Load->setAAMetadata(
4316 Agg = IRB.CreateInsertValue(Agg, Load, Indices, Name +
".insert");
4321 void recordFakeUses(LoadInst &LI) {
4322 for (Use &U : LI.
uses())
4324 if (
II->getIntrinsicID() == Intrinsic::fake_use)
4330 void emitFakeUses() {
4331 for (Instruction *
I : FakeUses) {
4332 IRB.SetInsertPoint(
I);
4333 for (
auto *V : Components)
4334 IRB.CreateIntrinsic(Intrinsic::fake_use, {
V});
4335 I->eraseFromParent();
4340 bool visitLoadInst(LoadInst &LI) {
4349 Splitter.recordFakeUses(LI);
4352 Splitter.emitFakeUses();
4359 struct StoreOpSplitter :
public OpSplitter<StoreOpSplitter> {
4360 StoreOpSplitter(Instruction *InsertionPoint,
Value *Ptr,
Type *BaseTy,
4361 AAMDNodes AATags, StoreInst *AggStore, Align BaseAlign,
4362 const DataLayout &
DL, IRBuilderTy &IRB)
4363 : OpSplitter<StoreOpSplitter>(InsertionPoint, Ptr, BaseTy, BaseAlign,
4365 AATags(AATags), AggStore(AggStore) {}
4367 StoreInst *AggStore;
4370 void emitFunc(
Type *Ty,
Value *&Agg, Align Alignment,
const Twine &Name) {
4376 Value *ExtractValue =
4377 IRB.CreateExtractValue(Agg, Indices, Name +
".extract");
4378 Value *InBoundsGEP =
4379 IRB.CreateInBoundsGEP(BaseTy, Ptr, GEPIndices, Name +
".gep");
4381 IRB.CreateAlignedStore(ExtractValue, InBoundsGEP, Alignment);
4397 uint64_t SizeInBits =
4398 DL.getTypeSizeInBits(
Store->getValueOperand()->getType());
4400 SizeInBits, AggStore, Store,
4401 Store->getPointerOperand(),
Store->getValueOperand(),
4405 "AT: unexpected debug.assign linked to store through "
4412 bool visitStoreInst(StoreInst &SI) {
4413 if (!
SI.isSimple() ||
SI.getPointerOperand() != *U)
4416 if (
V->getType()->isSingleValueType())
4421 StoreOpSplitter Splitter(&SI, *U,
V->getType(),
SI.getAAMetadata(), &SI,
4423 Splitter.emitSplitOps(
V->getType(), V,
V->getName() +
".fca");
4428 SI.eraseFromParent();
4432 bool visitBitCastInst(BitCastInst &BC) {
4437 bool visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
4447 bool unfoldGEPSelect(GetElementPtrInst &GEPI) {
4466 if (!ZI->getSrcTy()->isIntegerTy(1))
4479 dbgs() <<
" original: " << *Sel <<
"\n";
4480 dbgs() <<
" " << GEPI <<
"\n";);
4482 auto GetNewOps = [&](
Value *SelOp) {
4495 Cond =
SI->getCondition();
4496 True =
SI->getTrueValue();
4497 False =
SI->getFalseValue();
4501 Cond = Sel->getOperand(0);
4502 True = ConstantInt::get(Sel->getType(), 1);
4503 False = ConstantInt::get(Sel->getType(), 0);
4508 IRB.SetInsertPoint(&GEPI);
4512 Value *NTrue = IRB.CreateGEP(Ty, TrueOps[0],
ArrayRef(TrueOps).drop_front(),
4513 True->
getName() +
".sroa.gep", NW);
4516 IRB.CreateGEP(Ty, FalseOps[0],
ArrayRef(FalseOps).drop_front(),
4517 False->
getName() +
".sroa.gep", NW);
4519 Value *NSel = MDFrom
4520 ? IRB.CreateSelect(
Cond, NTrue, NFalse,
4521 Sel->getName() +
".sroa.sel", MDFrom)
4522 : IRB.CreateSelectWithUnknownProfile(
4524 Sel->getName() +
".sroa.sel");
4525 Visited.
erase(&GEPI);
4530 enqueueUsers(*NSelI);
4533 dbgs() <<
" " << *NFalse <<
"\n";
4534 dbgs() <<
" " << *NSel <<
"\n";);
4543 bool unfoldGEPPhi(GetElementPtrInst &GEPI) {
4548 auto IsInvalidPointerOperand = [](
Value *
V) {
4552 return !AI->isStaticAlloca();
4556 if (
any_of(
Phi->operands(), IsInvalidPointerOperand))
4571 [](
Value *V) { return isa<ConstantInt>(V); }))
4584 dbgs() <<
" original: " << *
Phi <<
"\n";
4585 dbgs() <<
" " << GEPI <<
"\n";);
4587 auto GetNewOps = [&](
Value *PhiOp) {
4597 IRB.SetInsertPoint(Phi);
4598 PHINode *NewPhi = IRB.CreatePHI(GEPI.
getType(),
Phi->getNumIncomingValues(),
4599 Phi->getName() +
".sroa.phi");
4605 for (
unsigned I = 0,
E =
Phi->getNumIncomingValues();
I !=
E; ++
I) {
4614 IRB.CreateGEP(SourceTy, NewOps[0],
ArrayRef(NewOps).drop_front(),
4620 Visited.
erase(&GEPI);
4624 enqueueUsers(*NewPhi);
4630 dbgs() <<
"\n " << *NewPhi <<
'\n');
4635 bool visitGetElementPtrInst(GetElementPtrInst &GEPI) {
4636 if (unfoldGEPSelect(GEPI))
4639 if (unfoldGEPPhi(GEPI))
4646 bool visitPHINode(PHINode &PN) {
4651 bool visitSelectInst(SelectInst &SI) {
4665 if (Ty->isSingleValueType())
4668 uint64_t AllocSize =
DL.getTypeAllocSize(Ty).getFixedValue();
4673 InnerTy = ArrTy->getElementType();
4677 InnerTy = STy->getElementType(Index);
4682 if (AllocSize >
DL.getTypeAllocSize(InnerTy).getFixedValue() ||
4683 TypeSize >
DL.getTypeSizeInBits(InnerTy).getFixedValue())
4704 if (
Offset == 0 &&
DL.getTypeAllocSize(Ty).getFixedValue() ==
Size)
4706 if (
Offset >
DL.getTypeAllocSize(Ty).getFixedValue() ||
4707 (
DL.getTypeAllocSize(Ty).getFixedValue() -
Offset) <
Size)
4714 ElementTy = AT->getElementType();
4715 TyNumElements = AT->getNumElements();
4720 ElementTy = VT->getElementType();
4721 TyNumElements = VT->getNumElements();
4723 uint64_t ElementSize =
DL.getTypeAllocSize(ElementTy).getFixedValue();
4725 if (NumSkippedElements >= TyNumElements)
4727 Offset -= NumSkippedElements * ElementSize;
4739 if (
Size == ElementSize)
4743 if (NumElements * ElementSize !=
Size)
4767 uint64_t ElementSize =
DL.getTypeAllocSize(ElementTy).getFixedValue();
4768 if (
Offset >= ElementSize)
4779 if (
Size == ElementSize)
4786 if (Index == EndIndex)
4796 assert(Index < EndIndex);
4835bool SROA::presplitLoadsAndStores(AllocaInst &AI, AllocaSlices &AS) {
4849 struct SplitOffsets {
4851 std::vector<uint64_t> Splits;
4853 SmallDenseMap<Instruction *, SplitOffsets, 8> SplitOffsetsMap;
4866 SmallPtrSet<LoadInst *, 8> UnsplittableLoads;
4868 LLVM_DEBUG(
dbgs() <<
" Searching for candidate loads and stores\n");
4869 for (
auto &
P : AS.partitions()) {
4870 for (Slice &S :
P) {
4872 if (!S.isSplittable() || S.endOffset() <=
P.endOffset()) {
4877 UnsplittableLoads.
insert(LI);
4880 UnsplittableLoads.
insert(LI);
4883 assert(
P.endOffset() > S.beginOffset() &&
4884 "Empty or backwards partition!");
4893 auto IsLoadSimplyStored = [](LoadInst *LI) {
4894 for (User *LU : LI->
users()) {
4896 if (!SI || !
SI->isSimple())
4901 if (!IsLoadSimplyStored(LI)) {
4902 UnsplittableLoads.
insert(LI);
4908 if (S.getUse() != &
SI->getOperandUse(
SI->getPointerOperandIndex()))
4912 if (!StoredLoad || !StoredLoad->isSimple())
4914 assert(!
SI->isVolatile() &&
"Cannot split volatile stores!");
4924 auto &
Offsets = SplitOffsetsMap[
I];
4926 "Should not have splits the first time we see an instruction!");
4928 Offsets.Splits.push_back(
P.endOffset() - S.beginOffset());
4933 for (Slice *S :
P.splitSliceTails()) {
4934 auto SplitOffsetsMapI =
4936 if (SplitOffsetsMapI == SplitOffsetsMap.
end())
4938 auto &
Offsets = SplitOffsetsMapI->second;
4942 "Cannot have an empty set of splits on the second partition!");
4944 P.beginOffset() -
Offsets.S->beginOffset() &&
4945 "Previous split does not end where this one begins!");
4949 if (S->endOffset() >
P.endOffset())
4958 llvm::erase_if(Stores, [&UnsplittableLoads, &SplitOffsetsMap](StoreInst *SI) {
4964 if (UnsplittableLoads.
count(LI))
4967 auto LoadOffsetsI = SplitOffsetsMap.
find(LI);
4968 if (LoadOffsetsI == SplitOffsetsMap.
end())
4970 auto &LoadOffsets = LoadOffsetsI->second;
4973 auto &StoreOffsets = SplitOffsetsMap[
SI];
4978 if (LoadOffsets.Splits == StoreOffsets.Splits)
4982 <<
" " << *LI <<
"\n"
4983 <<
" " << *SI <<
"\n");
4989 UnsplittableLoads.
insert(LI);
4998 return UnsplittableLoads.
count(LI);
5003 return UnsplittableLoads.
count(LI);
5013 IRBuilderTy IRB(&AI);
5020 SmallPtrSet<AllocaInst *, 4> ResplitPromotableAllocas;
5030 SmallDenseMap<LoadInst *, std::vector<LoadInst *>, 1> SplitLoadsMap;
5031 std::vector<LoadInst *> SplitLoads;
5032 const DataLayout &
DL = AI.getDataLayout();
5033 for (LoadInst *LI : Loads) {
5036 auto &
Offsets = SplitOffsetsMap[LI];
5037 unsigned SliceSize =
Offsets.S->endOffset() -
Offsets.S->beginOffset();
5039 "Load must have type size equal to store size");
5041 "Load must be >= slice size");
5043 uint64_t BaseOffset =
Offsets.S->beginOffset();
5044 assert(BaseOffset + SliceSize > BaseOffset &&
5045 "Cannot represent alloca access size using 64-bit integers!");
5048 IRB.SetInsertPoint(LI);
5052 uint64_t PartOffset = 0, PartSize =
Offsets.Splits.front();
5055 auto *PartTy = Type::getIntNTy(LI->
getContext(), PartSize * 8);
5058 LoadInst *PLoad = IRB.CreateAlignedLoad(
5061 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
5062 PartPtrTy,
BasePtr->getName() +
"."),
5065 PLoad->
copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
5066 LLVMContext::MD_access_group});
5070 SplitLoads.push_back(PLoad);
5074 Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
5078 <<
", " << NewSlices.
back().endOffset()
5079 <<
"): " << *PLoad <<
"\n");
5086 PartOffset =
Offsets.Splits[Idx];
5088 PartSize = (Idx <
Size ?
Offsets.Splits[Idx] : SliceSize) - PartOffset;
5094 bool DeferredStores =
false;
5095 for (User *LU : LI->
users()) {
5097 if (!Stores.
empty() && SplitOffsetsMap.
count(SI)) {
5098 DeferredStores =
true;
5104 Value *StoreBasePtr =
SI->getPointerOperand();
5105 IRB.SetInsertPoint(SI);
5106 AAMDNodes AATags =
SI->getAAMetadata();
5108 LLVM_DEBUG(
dbgs() <<
" Splitting store of load: " << *SI <<
"\n");
5110 for (
int Idx = 0,
Size = SplitLoads.size(); Idx <
Size; ++Idx) {
5111 LoadInst *PLoad = SplitLoads[Idx];
5112 uint64_t PartOffset = Idx == 0 ? 0 :
Offsets.Splits[Idx - 1];
5113 auto *PartPtrTy =
SI->getPointerOperandType();
5115 auto AS =
SI->getPointerAddressSpace();
5116 StoreInst *PStore = IRB.CreateAlignedStore(
5119 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
5120 PartPtrTy, StoreBasePtr->
getName() +
"."),
5123 PStore->
copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access,
5124 LLVMContext::MD_access_group,
5125 LLVMContext::MD_DIAssignID});
5130 LLVM_DEBUG(
dbgs() <<
" +" << PartOffset <<
":" << *PStore <<
"\n");
5138 ResplitPromotableAllocas.
insert(OtherAI);
5139 Worklist.insert(OtherAI);
5142 Worklist.insert(OtherAI);
5146 DeadInsts.push_back(SI);
5151 SplitLoadsMap.
insert(std::make_pair(LI, std::move(SplitLoads)));
5154 DeadInsts.push_back(LI);
5163 for (StoreInst *SI : Stores) {
5168 assert(StoreSize > 0 &&
"Cannot have a zero-sized integer store!");
5172 "Slice size should always match load size exactly!");
5173 uint64_t BaseOffset =
Offsets.S->beginOffset();
5174 assert(BaseOffset + StoreSize > BaseOffset &&
5175 "Cannot represent alloca access size using 64-bit integers!");
5183 auto SplitLoadsMapI = SplitLoadsMap.
find(LI);
5184 std::vector<LoadInst *> *SplitLoads =
nullptr;
5185 if (SplitLoadsMapI != SplitLoadsMap.
end()) {
5186 SplitLoads = &SplitLoadsMapI->second;
5188 "Too few split loads for the number of splits in the store!");
5193 uint64_t PartOffset = 0, PartSize =
Offsets.Splits.front();
5196 auto *PartTy = Type::getIntNTy(Ty->
getContext(), PartSize * 8);
5198 auto *StorePartPtrTy =
SI->getPointerOperandType();
5203 PLoad = (*SplitLoads)[Idx];
5205 IRB.SetInsertPoint(LI);
5207 PLoad = IRB.CreateAlignedLoad(
5210 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
5211 LoadPartPtrTy, LoadBasePtr->
getName() +
"."),
5214 PLoad->
copyMetadata(*LI, {LLVMContext::MD_mem_parallel_loop_access,
5215 LLVMContext::MD_access_group});
5219 IRB.SetInsertPoint(SI);
5220 auto AS =
SI->getPointerAddressSpace();
5221 StoreInst *PStore = IRB.CreateAlignedStore(
5224 APInt(
DL.getIndexSizeInBits(AS), PartOffset),
5225 StorePartPtrTy, StoreBasePtr->
getName() +
"."),
5228 PStore->
copyMetadata(*SI, {LLVMContext::MD_mem_parallel_loop_access,
5229 LLVMContext::MD_access_group});
5233 Slice(BaseOffset + PartOffset, BaseOffset + PartOffset + PartSize,
5237 <<
", " << NewSlices.
back().endOffset()
5238 <<
"): " << *PStore <<
"\n");
5248 PartOffset =
Offsets.Splits[Idx];
5250 PartSize = (Idx <
Size ?
Offsets.Splits[Idx] : StoreSize) - PartOffset;
5260 assert(OtherAI != &AI &&
"We can't re-split our own alloca!");
5261 ResplitPromotableAllocas.
insert(OtherAI);
5262 Worklist.insert(OtherAI);
5265 assert(OtherAI != &AI &&
"We can't re-split our own alloca!");
5266 Worklist.insert(OtherAI);
5281 DeadInsts.push_back(LI);
5283 DeadInsts.push_back(SI);
5292 AS.insert(NewSlices);
5296 for (
auto I = AS.begin(),
E = AS.end();
I !=
E; ++
I)
5302 PromotableAllocas.set_subtract(ResplitPromotableAllocas);
5338 bool IsIntegralPointerTy =
5339 EltTy->
isPointerTy() && !
DL.isNonIntegralPointerType(EltTy);
5341 !IsIntegralPointerTy)
5345 TypeSize StructSize =
DL.getStructLayout(STy)->getSizeInBytes();
5346 TypeSize VectorSize =
DL.getTypeAllocSize(VTy);
5347 if (StructSize != VectorSize)
5350 for (
const Slice &S :
P) {
5353 auto *U = S.getUse();
5357 User *Usr = U->getUser();
5379static std::tuple<Type *, bool, VectorType *>
5383 VectorType *SelectedVecTy,
bool SelectedIntWidening) {
5385 dbgs() <<
"selectPartitionType path=" << Path
5390 dbgs() <<
"<unnamed>";
5391 dbgs() <<
" partition=[" <<
P.beginOffset() <<
"," <<
P.endOffset()
5392 <<
") size=" <<
P.size();
5394 dbgs() <<
" alloc-size=" << AllocSize->getKnownMinValue();
5396 dbgs() <<
" chosen=" << *SelectedTy;
5398 dbgs() <<
" vec=" << *SelectedVecTy;
5399 dbgs() <<
" intwiden=" << SelectedIntWidening <<
"\n";
5417 if (VecTy && VecTy->getElementType()->isFloatingPointTy() &&
5418 VecTy->getElementCount().getFixedValue() > 1) {
5419 LogSelection(
"direct-fp-vecty", VecTy, VecTy,
false);
5420 return {VecTy,
false, VecTy};
5425 auto [CommonUseTy, LargestIntTy] =
5428 TypeSize CommonUseSize =
DL.getTypeAllocSize(CommonUseTy);
5434 LogSelection(
"common-type-vecty", VecTy, VecTy,
false);
5435 return {VecTy,
false, VecTy};
5438 LogSelection(
"common-type", CommonUseTy,
nullptr, IntWiden);
5439 return {CommonUseTy, IntWiden,
nullptr};
5446 P.beginOffset(),
P.size())) {
5450 if (TypePartitionTy->isArrayTy() &&
5451 TypePartitionTy->getArrayElementType()->isIntegerTy() &&
5452 DL.isLegalInteger(
P.size() * 8))
5456 LogSelection(
"type-partition-int-widen", TypePartitionTy,
nullptr,
true);
5457 return {TypePartitionTy,
true,
nullptr};
5460 LogSelection(
"type-partition-vecty", VecTy, VecTy,
false);
5461 return {VecTy,
false, VecTy};
5466 DL.getTypeAllocSize(LargestIntTy).getFixedValue() >=
P.size() &&
5468 LogSelection(
"largest-int-int-widen", LargestIntTy,
nullptr,
true);
5469 return {LargestIntTy,
true,
nullptr};
5474 if (AggregateToVector) {
5477 LogSelection(
"struct-fallback-vecty", VTy,
nullptr,
false);
5478 return {VTy,
false,
nullptr};
5484 LogSelection(
"type-partition-fallback", TypePartitionTy,
nullptr,
false);
5485 return {TypePartitionTy,
false,
nullptr};
5490 DL.getTypeAllocSize(LargestIntTy).getFixedValue() >=
P.size()) {
5491 LogSelection(
"largest-int-fallback", LargestIntTy,
nullptr,
false);
5492 return {LargestIntTy,
false,
nullptr};
5496 if (
DL.isLegalInteger(
P.size() * 8)) {
5498 LogSelection(
"legal-int-fallback", IntTy,
nullptr,
false);
5499 return {IntTy,
false,
nullptr};
5504 LogSelection(
"byte-array-fallback", ArrayTy,
nullptr,
false);
5505 return {ArrayTy,
false,
nullptr};
5518std::pair<AllocaInst *, uint64_t>
5519SROA::rewritePartition(AllocaInst &AI, AllocaSlices &AS, Partition &
P) {
5520 const DataLayout &
DL = AI.getDataLayout();
5522 auto [PartitionTy, IsIntegerWideningViable, VecTy] =
5532 if (PartitionTy == AI.getAllocatedType() &&
P.beginOffset() == 0) {
5542 const bool IsUnconstrained = Alignment <=
DL.getABITypeAlign(PartitionTy);
5543 NewAI =
new AllocaInst(
5544 PartitionTy, AI.getAddressSpace(),
nullptr,
5545 IsUnconstrained ?
DL.getPrefTypeAlign(PartitionTy) : Alignment,
5546 AI.
getName() +
".sroa." + Twine(
P.begin() - AS.begin()),
5553 LLVM_DEBUG(
dbgs() <<
"Rewriting alloca partition " <<
"[" <<
P.beginOffset()
5554 <<
"," <<
P.endOffset() <<
") to: " << *NewAI <<
"\n");
5559 unsigned PPWOldSize = PostPromotionWorklist.size();
5560 unsigned NumUses = 0;
5561 SmallSetVector<PHINode *, 8> PHIUsers;
5562 SmallSetVector<SelectInst *, 8> SelectUsers;
5565 DL, AS, *
this, AI, *NewAI, PartitionTy,
P.beginOffset(),
P.endOffset(),
5566 IsIntegerWideningViable, VecTy, PHIUsers, SelectUsers);
5567 bool Promotable =
true;
5569 if (
auto DeletedValues =
Rewriter.rewriteTreeStructuredMerge(
P)) {
5570 NumUses += DeletedValues->
size() + 1;
5571 for (
Value *V : *DeletedValues)
5572 DeadInsts.push_back(V);
5574 for (Slice *S :
P.splitSliceTails()) {
5578 for (Slice &S :
P) {
5584 NumAllocaPartitionUses += NumUses;
5585 MaxUsesPerAllocaPartition.updateMax(NumUses);
5589 for (PHINode *
PHI : PHIUsers)
5593 SelectUsers.
clear();
5598 NewSelectsToRewrite;
5600 for (SelectInst *Sel : SelectUsers) {
5601 std::optional<RewriteableMemOps>
Ops =
5602 isSafeSelectToSpeculate(*Sel, PreserveCFG);
5606 SelectUsers.clear();
5607 NewSelectsToRewrite.
clear();
5614 for (Use *U : AS.getDeadUsesIfPromotable()) {
5616 Value::dropDroppableUse(*U);
5619 DeadInsts.push_back(OldInst);
5621 if (PHIUsers.empty() && SelectUsers.empty()) {
5623 PromotableAllocas.insert(NewAI);
5628 SpeculatablePHIs.insert_range(PHIUsers);
5629 SelectsToRewrite.reserve(SelectsToRewrite.size() +
5630 NewSelectsToRewrite.
size());
5632 std::make_move_iterator(NewSelectsToRewrite.
begin()),
5633 std::make_move_iterator(NewSelectsToRewrite.
end())))
5634 SelectsToRewrite.insert(std::move(KV));
5635 Worklist.insert(NewAI);
5639 while (PostPromotionWorklist.size() > PPWOldSize)
5640 PostPromotionWorklist.pop_back();
5645 return {
nullptr, 0};
5650 Worklist.insert(NewAI);
5653 return {NewAI,
DL.getTypeSizeInBits(PartitionTy).getFixedValue()};
5697 int64_t BitExtractOffset) {
5699 bool HasFragment =
false;
5700 bool HasBitExtract =
false;
5709 HasBitExtract =
true;
5710 int64_t ExtractOffsetInBits =
Op.getArg(0);
5711 int64_t ExtractSizeInBits =
Op.getArg(1);
5720 assert(BitExtractOffset <= 0);
5721 int64_t AdjustedOffset = ExtractOffsetInBits + BitExtractOffset;
5727 if (AdjustedOffset < 0)
5730 Ops.push_back(
Op.getOp());
5731 Ops.push_back(std::max<int64_t>(0, AdjustedOffset));
5732 Ops.push_back(ExtractSizeInBits);
5735 Op.appendToVector(
Ops);
5740 if (HasFragment && HasBitExtract)
5743 if (!HasBitExtract) {
5762 std::optional<DIExpression::FragmentInfo> NewFragment,
5763 int64_t BitExtractAdjustment) {
5773 BitExtractAdjustment);
5774 if (!NewFragmentExpr)
5780 BeforeInst->
getParent()->insertDbgRecordBefore(DVR,
5793 BeforeInst->
getParent()->insertDbgRecordBefore(DVR,
5799 if (!NewAddr->
hasMetadata(LLVMContext::MD_DIAssignID)) {
5807 LLVM_DEBUG(
dbgs() <<
"Created new DVRAssign: " << *NewAssign <<
"\n");
5813bool SROA::splitAlloca(AllocaInst &AI, AllocaSlices &AS) {
5814 if (AS.begin() == AS.end())
5817 unsigned NumPartitions = 0;
5819 const DataLayout &
DL = AI.getModule()->getDataLayout();
5822 Changed |= presplitLoadsAndStores(AI, AS);
5830 bool IsSorted =
true;
5832 uint64_t AllocaSize = AI.getAllocationSize(
DL)->getFixedValue();
5833 const uint64_t MaxBitVectorSize = 1024;
5834 if (AllocaSize <= MaxBitVectorSize) {
5837 SmallBitVector SplittableOffset(AllocaSize + 1,
true);
5839 for (
unsigned O = S.beginOffset() + 1;
5840 O < S.endOffset() && O < AllocaSize; O++)
5841 SplittableOffset.reset(O);
5843 for (Slice &S : AS) {
5844 if (!S.isSplittable())
5847 if ((S.beginOffset() > AllocaSize || SplittableOffset[S.beginOffset()]) &&
5848 (S.endOffset() > AllocaSize || SplittableOffset[S.endOffset()]))
5853 S.makeUnsplittable();
5860 for (Slice &S : AS) {
5861 if (!S.isSplittable())
5864 if (S.beginOffset() == 0 && S.endOffset() >= AllocaSize)
5869 S.makeUnsplittable();
5884 Fragment(AllocaInst *AI, uint64_t O, uint64_t S)
5890 for (
auto &
P : AS.partitions()) {
5891 auto [NewAI, ActiveBits] = rewritePartition(AI, AS, P);
5895 uint64_t SizeOfByte = 8;
5897 uint64_t Size = std::min(ActiveBits, P.size() * SizeOfByte);
5898 Fragments.push_back(
5899 Fragment(NewAI, P.beginOffset() * SizeOfByte, Size));
5905 NumAllocaPartitions += NumPartitions;
5906 MaxPartitionsPerAlloca.updateMax(NumPartitions);
5910 auto MigrateOne = [&](DbgVariableRecord *DbgVariable) {
5915 const Value *DbgPtr = DbgVariable->getAddress();
5917 DbgVariable->getFragmentOrEntireVariable();
5920 int64_t CurrentExprOffsetInBytes = 0;
5921 SmallVector<uint64_t> PostOffsetOps;
5923 ->extractLeadingOffset(CurrentExprOffsetInBytes, PostOffsetOps))
5927 int64_t ExtractOffsetInBits = 0;
5931 ExtractOffsetInBits =
Op.getArg(0);
5936 DIBuilder DIB(*AI.getModule(),
false);
5937 for (
auto Fragment : Fragments) {
5938 int64_t OffsetFromLocationInBits;
5939 std::optional<DIExpression::FragmentInfo> NewDbgFragment;
5944 DL, &AI, Fragment.Offset, Fragment.Size, DbgPtr,
5945 CurrentExprOffsetInBytes * 8, ExtractOffsetInBits, VarFrag,
5946 NewDbgFragment, OffsetFromLocationInBits))
5952 if (NewDbgFragment && !NewDbgFragment->SizeInBits)
5957 if (!NewDbgFragment)
5958 NewDbgFragment = DbgVariable->getFragment();
5962 int64_t OffestFromNewAllocaInBits =
5963 OffsetFromLocationInBits - ExtractOffsetInBits;
5966 int64_t BitExtractOffset =
5967 std::min<int64_t>(0, OffestFromNewAllocaInBits);
5972 OffestFromNewAllocaInBits =
5973 std::max(int64_t(0), OffestFromNewAllocaInBits);
5979 DIExpression *NewExpr = DIExpression::get(AI.getContext(), PostOffsetOps);
5980 if (OffestFromNewAllocaInBits > 0) {
5981 int64_t OffsetInBytes = (OffestFromNewAllocaInBits + 7) / 8;
5987 auto RemoveOne = [DbgVariable](
auto *OldDII) {
5988 auto SameVariableFragment = [](
const auto *
LHS,
const auto *
RHS) {
5989 return LHS->getVariable() ==
RHS->getVariable() &&
5990 LHS->getDebugLoc()->getInlinedAt() ==
5991 RHS->getDebugLoc()->getInlinedAt();
5993 if (SameVariableFragment(OldDII, DbgVariable))
5994 OldDII->eraseFromParent();
5999 NewDbgFragment, BitExtractOffset);
6013void SROA::clobberUse(Use &U) {
6023 DeadInsts.push_back(OldI);
6045bool SROA::propagateStoredValuesToLoads(AllocaInst &AI, AllocaSlices &AS) {
6050 LLVM_DEBUG(
dbgs() <<
"Attempting to propagate values on " << AI <<
"\n");
6051 bool AllSameAndValid =
true;
6052 Type *PartitionType =
nullptr;
6054 uint64_t BeginOffset = 0;
6055 uint64_t EndOffset = 0;
6057 auto Flush = [&]() {
6058 if (AllSameAndValid && !Insts.
empty()) {
6059 LLVM_DEBUG(
dbgs() <<
"Propagate values on slice [" << BeginOffset <<
", "
6060 << EndOffset <<
")\n");
6062 SSAUpdater
SSA(&NewPHIs);
6064 BasicLoadAndStorePromoter Promoter(Insts,
SSA, PartitionType);
6065 Promoter.run(Insts);
6067 AllSameAndValid =
true;
6068 PartitionType =
nullptr;
6072 for (Slice &S : AS) {
6076 dbgs() <<
"Ignoring slice: ";
6077 AS.print(
dbgs(), &S);
6081 if (S.beginOffset() >= EndOffset) {
6083 BeginOffset = S.beginOffset();
6084 EndOffset = S.endOffset();
6085 }
else if (S.beginOffset() != BeginOffset || S.endOffset() != EndOffset) {
6086 if (AllSameAndValid) {
6088 dbgs() <<
"Slice does not match range [" << BeginOffset <<
", "
6089 << EndOffset <<
")";
6090 AS.print(
dbgs(), &S);
6092 AllSameAndValid =
false;
6094 EndOffset = std::max(EndOffset, S.endOffset());
6101 if (!LI->
isSimple() || (PartitionType && UserTy != PartitionType))
6102 AllSameAndValid =
false;
6103 PartitionType = UserTy;
6106 Type *UserTy =
SI->getValueOperand()->getType();
6107 if (!
SI->isSimple() || (PartitionType && UserTy != PartitionType))
6108 AllSameAndValid =
false;
6109 PartitionType = UserTy;
6112 AllSameAndValid =
false;
6125std::pair<
bool ,
bool >
6126SROA::runOnAlloca(AllocaInst &AI) {
6128 bool CFGChanged =
false;
6131 ++NumAllocasAnalyzed;
6134 if (AI.use_empty()) {
6135 AI.eraseFromParent();
6139 const DataLayout &
DL = AI.getDataLayout();
6142 std::optional<TypeSize>
Size = AI.getAllocationSize(
DL);
6143 if (AI.isArrayAllocation() || !
Size ||
Size->isScalable() ||
Size->isZero())
6148 IRBuilderTy IRB(&AI);
6149 AggLoadStoreRewriter AggRewriter(
DL, IRB);
6150 Changed |= AggRewriter.rewrite(AI);
6153 AllocaSlices AS(
DL, AI);
6158 if (AS.isEscapedReadOnly()) {
6159 Changed |= propagateStoredValuesToLoads(AI, AS);
6164 for (Instruction *DeadUser : AS.getDeadUsers()) {
6166 for (Use &DeadOp : DeadUser->operands())
6173 DeadInsts.push_back(DeadUser);
6176 for (Use *DeadOp : AS.getDeadOperands()) {
6177 clobberUse(*DeadOp);
6182 if (AS.begin() == AS.end())
6185 Changed |= splitAlloca(AI, AS);
6188 while (!SpeculatablePHIs.empty())
6192 auto RemainingSelectsToRewrite = SelectsToRewrite.takeVector();
6193 while (!RemainingSelectsToRewrite.empty()) {
6194 const auto [
K,
V] = RemainingSelectsToRewrite.pop_back_val();
6211bool SROA::deleteDeadInstructions(
6212 SmallPtrSetImpl<AllocaInst *> &DeletedAllocas) {
6214 while (!DeadInsts.empty()) {
6224 DeletedAllocas.
insert(AI);
6226 OldDII->eraseFromParent();
6232 for (Use &Operand :
I->operands())
6237 DeadInsts.push_back(U);
6241 I->eraseFromParent();
6251bool SROA::promoteAllocas() {
6252 if (PromotableAllocas.empty())
6259 NumPromoted += PromotableAllocas.size();
6260 PromoteMemToReg(PromotableAllocas.getArrayRef(), DTU->getDomTree(), AC);
6263 PromotableAllocas.clear();
6267std::pair<
bool ,
bool > SROA::runSROA(Function &
F) {
6270 const DataLayout &
DL =
F.getDataLayout();
6275 std::optional<TypeSize>
Size = AI->getAllocationSize(
DL);
6277 PromotableAllocas.insert(AI);
6279 Worklist.insert(AI);
6284 bool CFGChanged =
false;
6287 SmallPtrSet<AllocaInst *, 4> DeletedAllocas;
6290 while (!Worklist.empty()) {
6291 auto [IterationChanged, IterationCFGChanged] =
6292 runOnAlloca(*Worklist.pop_back_val());
6294 CFGChanged |= IterationCFGChanged;
6296 Changed |= deleteDeadInstructions(DeletedAllocas);
6300 if (!DeletedAllocas.
empty()) {
6301 Worklist.set_subtract(DeletedAllocas);
6302 PostPromotionWorklist.set_subtract(DeletedAllocas);
6303 PromotableAllocas.set_subtract(DeletedAllocas);
6304 DeletedAllocas.
clear();
6310 Worklist = PostPromotionWorklist;
6311 PostPromotionWorklist.clear();
6312 }
while (!Worklist.empty());
6314 assert((!CFGChanged ||
Changed) &&
"Can not only modify the CFG.");
6315 assert((!CFGChanged || !PreserveCFG) &&
6316 "Should not have modified the CFG when told to preserve it.");
6319 for (
auto &BB :
F) {
6332 SROA(&
F.getContext(), &DTU, &AC, Options).runSROA(
F);
6345 OS, MapClassName2PassName);
6349 if (Options.AggregateToVector)
6350 OS <<
";aggregate-to-vector";
6371 if (skipFunction(
F))
6374 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
6376 getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
F);
6382 void getAnalysisUsage(AnalysisUsage &AU)
const override {
6389 StringRef getPassName()
const override {
return "SROA"; }
6394char SROALegacyPass::ID = 0;
6399 AggregateToVector));
6403 "Scalar Replacement Of Aggregates",
false,
false)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
This file implements a class to represent arbitrary precision integral constant values and operations...
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
This file contains the declarations for the subclasses of Constant, which represent the different fla...
DXIL Forward Handle Accesses
This file defines the DenseMap class.
static bool runOnFunction(Function &F, bool PostInlining)
This is the interface for a simple mod/ref and alias analysis over globals.
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
This defines the Use class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
print mir2vec MIR2Vec Vocabulary Printer Pass
This file implements a map that provides insertion order iteration.
static std::optional< AllocFnsTy > getAllocationSize(const CallBase *CB, const TargetLibraryInfo *TLI)
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
PassBuilder PB(Machine, PassOpts->PTO, std::nullopt, &PIC)
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the PointerIntPair class.
This file provides a collection of visitors which walk the (instruction) uses of a pointer.
const SmallVectorImpl< MachineOperand > & Cond
Remove Loads Into Fake Uses
static unsigned getNumElements(Type *Ty)
bool isDead(const MachineInstr &MI, const MachineRegisterInfo &MRI)
static void visit(BasicBlock &Start, std::function< bool(BasicBlock *)> op)
static void migrateDebugInfo(AllocaInst *OldAlloca, bool IsSplit, uint64_t OldAllocaOffsetInBits, uint64_t SliceSizeInBits, Instruction *OldInst, Instruction *Inst, Value *Dest, Value *Value, const DataLayout &DL)
Find linked dbg.assign and generate a new one with the correct FragmentInfo.
static VectorType * isVectorPromotionViable(Partition &P, const DataLayout &DL, unsigned VScale)
Test whether the given alloca partitioning and range of slices can be promoted to a vector.
static Align getAdjustedAlignment(Instruction *I, uint64_t Offset)
Compute the adjusted alignment for a load or store from an offset.
static VectorType * checkVectorTypesForPromotion(Partition &P, const DataLayout &DL, SmallVectorImpl< VectorType * > &CandidateTys, bool HaveCommonEltTy, Type *CommonEltTy, bool HaveVecPtrTy, bool HaveCommonVecPtrTy, VectorType *CommonVecPtrTy, unsigned VScale)
Test whether any vector type in CandidateTys is viable for promotion.
static std::pair< Type *, IntegerType * > findCommonType(AllocaSlices::const_iterator B, AllocaSlices::const_iterator E, uint64_t EndOffset)
Walk the range of a partitioning looking for a common type to cover this sequence of slices.
static Type * stripAggregateTypeWrapping(const DataLayout &DL, Type *Ty)
Strip aggregate type wrapping.
static FragCalcResult calculateFragment(DILocalVariable *Variable, uint64_t NewStorageSliceOffsetInBits, uint64_t NewStorageSliceSizeInBits, std::optional< DIExpression::FragmentInfo > StorageFragment, std::optional< DIExpression::FragmentInfo > CurrentFragment, DIExpression::FragmentInfo &Target)
static DIExpression * createOrReplaceFragment(const DIExpression *Expr, DIExpression::FragmentInfo Frag, int64_t BitExtractOffset)
Create or replace an existing fragment in a DIExpression with Frag.
static Value * insertInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *Old, Value *V, uint64_t Offset, const Twine &Name)
static bool isVectorPromotionViableForSlice(Partition &P, const Slice &S, VectorType *Ty, uint64_t ElementSize, const DataLayout &DL, unsigned VScale)
Test whether the given slice use can be promoted to a vector.
static Value * getAdjustedPtr(IRBuilderTy &IRB, const DataLayout &DL, Value *Ptr, APInt Offset, Type *PointerTy, const Twine &NamePrefix)
Compute an adjusted pointer from Ptr by Offset bytes where the resulting pointer has PointerTy.
static bool isIntegerWideningViableForSlice(const Slice &S, uint64_t AllocBeginOffset, Type *AllocaTy, const DataLayout &DL, bool &WholeAllocaOp)
Test whether a slice of an alloca is valid for integer widening.
static Value * extractVector(IRBuilderTy &IRB, Value *V, unsigned BeginIndex, unsigned EndIndex, const Twine &Name)
static Value * foldPHINodeOrSelectInst(Instruction &I)
A helper that folds a PHI node or a select.
static bool rewriteSelectInstMemOps(SelectInst &SI, const RewriteableMemOps &Ops, IRBuilderTy &IRB, DomTreeUpdater *DTU)
static void rewriteMemOpOfSelect(SelectInst &SI, T &I, SelectHandSpeculativity Spec, DomTreeUpdater &DTU)
static Value * foldSelectInst(SelectInst &SI)
bool isKillAddress(const DbgVariableRecord *DVR)
static Value * insertVector(IRBuilderTy &IRB, Value *Old, Value *V, unsigned BeginIndex, const Twine &Name)
static bool isIntegerWideningViable(Partition &P, Type *AllocaTy, const DataLayout &DL)
Test whether the given alloca partition's integer operations can be widened to promotable ones.
static void speculatePHINodeLoads(IRBuilderTy &IRB, PHINode &PN)
static VectorType * createAndCheckVectorTypesForPromotion(SetVector< Type * > &OtherTys, ArrayRef< VectorType * > CandidateTysCopy, function_ref< void(Type *)> CheckCandidateType, Partition &P, const DataLayout &DL, SmallVectorImpl< VectorType * > &CandidateTys, bool &HaveCommonEltTy, Type *&CommonEltTy, bool &HaveVecPtrTy, bool &HaveCommonVecPtrTy, VectorType *&CommonVecPtrTy, unsigned VScale)
static DebugVariable getAggregateVariable(DbgVariableRecord *DVR)
static std::tuple< Type *, bool, VectorType * > selectPartitionType(Partition &P, const DataLayout &DL, AllocaInst &AI, LLVMContext &C, bool AggregateToVector)
Select a partition type for an alloca partition.
static bool isSafePHIToSpeculate(PHINode &PN)
PHI instructions that use an alloca and are subsequently loaded can be rewritten to load both input p...
static FixedVectorType * tryCanonicalizeStructToVector(StructType *STy, Partition &P, const DataLayout &DL)
Try to canonicalize a homogeneous struct partition to a vector type.
static Value * extractInteger(const DataLayout &DL, IRBuilderTy &IRB, Value *V, IntegerType *Ty, uint64_t Offset, const Twine &Name)
static void insertNewDbgInst(DIBuilder &DIB, DbgVariableRecord *Orig, AllocaInst *NewAddr, DIExpression *NewAddrExpr, Instruction *BeforeInst, std::optional< DIExpression::FragmentInfo > NewFragment, int64_t BitExtractAdjustment)
Insert a new DbgRecord.
static void speculateSelectInstLoads(SelectInst &SI, LoadInst &LI, IRBuilderTy &IRB)
static Value * mergeTwoVectors(Value *V0, Value *V1, const DataLayout &DL, Type *NewAIEltTy, IRBuilder<> &Builder)
This function takes two vector values and combines them into a single vector by concatenating their e...
const DIExpression * getAddressExpression(const DbgVariableRecord *DVR)
static Type * getTypePartition(const DataLayout &DL, Type *Ty, uint64_t Offset, uint64_t Size)
Try to find a partition of the aggregate type passed in for a given offset and size.
static bool canConvertValue(const DataLayout &DL, Type *OldTy, Type *NewTy, unsigned VScale=0)
Test whether we can convert a value from the old to the new type.
static SelectHandSpeculativity isSafeLoadOfSelectToSpeculate(LoadInst &LI, SelectInst &SI, bool PreserveCFG)
This file provides the interface for LLVM's Scalar Replacement of Aggregates pass.
This file implements a set that has insertion order iteration characteristics.
This file implements the SmallBitVector class.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static SymbolRef::Type getType(const Symbol *Sym)
static unsigned getBitWidth(Type *Ty, const DataLayout &DL)
Returns the bitwidth of the given scalar or pointer type.
Virtual Register Rewriter
Builder for the alloca slices.
SliceBuilder(const DataLayout &DL, AllocaInst &AI, AllocaSlices &AS)
An iterator over partitions of the alloca's slices.
bool operator==(const partition_iterator &RHS) const
friend class AllocaSlices
partition_iterator & operator++()
Class for arbitrary precision integers.
an instruction to allocate memory on the stack
LLVM_ABI bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
PointerType * getType() const
Overload to return most specific pointer type.
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
LLVM_ABI std::optional< TypeSize > getAllocationSize(const DataLayout &DL) const
Get allocation size in bytes.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
Get the array size.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
A function analysis which provides an AssumptionCache.
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
InstListType::iterator iterator
Instruction iterators...
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction; assumes that the block is well-formed.
Represents analyses that only rely on functions' control flow.
LLVM_ABI CaptureInfo getCaptureInfo(unsigned OpNo) const
Return which pointer components this operand may capture.
bool onlyReadsMemory(unsigned OpNo) const
bool isDataOperand(const Use *U) const
This is the shared class of boolean and integer constants.
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static DIAssignID * getDistinct(LLVMContext &Context)
LLVM_ABI DbgInstPtr insertDbgAssign(Instruction *LinkedInstr, Value *Val, DILocalVariable *SrcVar, DIExpression *ValExpr, Value *Addr, DIExpression *AddrExpr, const DILocation *DL)
Insert a new llvm.dbg.assign intrinsic call.
iterator_range< expr_op_iterator > expr_ops() const
DbgVariableFragmentInfo FragmentInfo
LLVM_ABI bool startsWithDeref() const
Return whether the first element a DW_OP_deref.
static LLVM_ABI bool calculateFragmentIntersect(const DataLayout &DL, const Value *SliceStart, uint64_t SliceOffsetInBits, uint64_t SliceSizeInBits, const Value *DbgPtr, int64_t DbgPtrOffsetInBits, int64_t DbgExtractOffsetInBits, DIExpression::FragmentInfo VarFrag, std::optional< DIExpression::FragmentInfo > &Result, int64_t &OffsetFromLocationInBits)
Computes a fragment, bit-extract operation if needed, and new constant offset to describe a part of a...
static LLVM_ABI std::optional< DIExpression * > createFragmentExpression(const DIExpression *Expr, unsigned OffsetInBits, unsigned SizeInBits)
Create a DIExpression to describe one part of an aggregate variable that is fragmented across multipl...
static LLVM_ABI DIExpression * prepend(const DIExpression *Expr, uint8_t Flags, int64_t Offset=0)
Prepend DIExpr with a deref and offset operation and optionally turn it into a stack value or/and an ...
A parsed version of the target data layout string in and methods for querying it.
LLVM_ABI void moveBefore(DbgRecord *MoveBefore)
DebugLoc getDebugLoc() const
void setDebugLoc(DebugLoc Loc)
Record of a variable value-assignment, aka a non instruction representation of the dbg....
LLVM_ABI void setKillAddress()
Kill the address component.
LLVM_ABI bool isKillLocation() const
LocationType getType() const
LLVM_ABI bool isKillAddress() const
Check whether this kills the address component.
LLVM_ABI void replaceVariableLocationOp(Value *OldValue, Value *NewValue, bool AllowEmpty=false)
Value * getValue(unsigned OpIdx=0) const
static LLVM_ABI DbgVariableRecord * createLinkedDVRAssign(Instruction *LinkedInstr, Value *Val, DILocalVariable *Variable, DIExpression *Expression, Value *Address, DIExpression *AddressExpression, const DILocation *DI)
LLVM_ABI void setAssignId(DIAssignID *New)
DIExpression * getExpression() const
static LLVM_ABI DbgVariableRecord * createDVRDeclare(Value *Address, DILocalVariable *DV, DIExpression *Expr, const DILocation *DI)
static LLVM_ABI DbgVariableRecord * createDbgVariableRecord(Value *Location, DILocalVariable *DV, DIExpression *Expr, const DILocation *DI)
DILocalVariable * getVariable() const
LLVM_ABI void setKillLocation()
bool isDbgDeclare() const
void setAddress(Value *V)
DIExpression * getAddressExpression() const
LLVM_ABI DILocation * getInlinedAt() const
Identifies a unique instance of a variable.
ValueT lookup(const_arg_type_t< KeyT > Val) const
Return the entry for the specified key, or a default constructed value if no such entry exists.
iterator find(const_arg_type_t< KeyT > Val)
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Analysis pass which computes a DominatorTree.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Class to represent fixed width SIMD vectors.
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
FunctionPass class - This class is used to implement most global optimizations.
unsigned getVScaleValue() const
Return the value for vscale based on the vscale_range attribute or 0 when unknown.
const BasicBlock & getEntryBlock() const
LLVM_ABI bool accumulateConstantOffset(const DataLayout &DL, APInt &Offset, function_ref< bool(Value &, APInt &)> ExternalAnalysis=nullptr) const
Accumulate the constant address offset of this GEP if possible.
Value * getPointerOperand()
iterator_range< op_iterator > indices()
Type * getSourceElementType() const
LLVM_ABI GEPNoWrapFlags getNoWrapFlags() const
Get the nowrap flags for the GEP instruction.
This provides the default implementation of the IRBuilder 'InsertHelper' method that is called whenev...
virtual void InsertHelper(Instruction *I, const Twine &Name, BasicBlock::iterator InsertPt) const
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Base class for instruction visitors.
LLVM_ABI unsigned getNumSuccessors() const LLVM_READONLY
Return the number of successors that this instruction has.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
bool hasMetadata() const
Return true if this instruction has any metadata attached to it.
LLVM_ABI bool isAtomic() const LLVM_READONLY
Return true if this instruction has an AtomicOrdering of unordered or higher.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Instruction * user_back()
Specialize the methods defined in Value, as we know that an instruction can only be used by other ins...
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
LLVM_ABI bool mayHaveSideEffects() const LLVM_READONLY
Return true if the instruction may have side effects.
LLVM_ABI bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Class to represent integer types.
@ MAX_INT_BITS
Maximum number of bits that can be specified.
unsigned getBitWidth() const
Get the number of bits in this IntegerType.
A wrapper class for inspecting calls to intrinsic functions.
This is an important class for using LLVM in a threaded context.
An instruction for reading from memory.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
void setAlignment(Align Align)
Value * getPointerOperand()
bool isVolatile() const
Return true if this is a load from a volatile memory location.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Type * getPointerOperandType() const
static unsigned getPointerOperandIndex()
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Align getAlign() const
Return the alignment of the access that is being performed.
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
LLVMContext & getContext() const
LLVM_ABI StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
This is the common base class for memset/memcpy/memmove.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
op_range incoming_values()
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
unsigned getNumIncomingValues() const
Return the number of incoming edges.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
PointerIntPair - This class implements a pair of a pointer and small integer.
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
PreservedAnalyses & preserve()
Mark an analysis as preserved.
PtrUseVisitor(const DataLayout &DL)
LLVM_ABI SROAPass(SROAOptions Options)
If PreserveCFG is set, then the pass is not allowed to modify CFG in any way, even if it would update...
LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Run the pass over the function.
LLVM_ABI void printPipeline(raw_ostream &OS, function_ref< StringRef(StringRef)> MapClassName2PassName)
Helper class for SSA formation on a set of values defined in multiple blocks.
This class represents the LLVM 'select' instruction.
A vector that has set insertion semantics.
size_type size() const
Determine the number of elements in the SetVector.
void clear()
Completely clear the SetVector.
bool insert(const value_type &X)
Insert a new element into the SetVector.
bool erase(PtrType Ptr)
Remove pointer from the set.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void reserve(size_type N)
iterator erase(const_iterator CI)
typename SuperClass::const_iterator const_iterator
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setAlignment(Align Align)
Value * getValueOperand()
static unsigned getPointerOperandIndex()
Value * getPointerOperand()
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
Represent a constant reference to a string, i.e.
static constexpr size_t npos
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
size_t rfind(char C, size_t From=npos) const
Search for the last character C in the string.
size_t find(char C, size_t From=0) const
Search for the first character C in the string.
LLVM_ABI size_t find_first_not_of(char C, size_t From=0) const
Find the first character in the string that is not C or npos if not found.
Used to lazily calculate structure layout information for a target machine, based on the DataLayout s...
TypeSize getSizeInBytes() const
LLVM_ABI unsigned getElementContainingOffset(uint64_t FixedOffset) const
Given a valid byte offset into the structure, returns the structure index that contains it.
TypeSize getElementOffset(unsigned Idx) const
TypeSize getSizeInBits() const
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
element_iterator element_end() const
ArrayRef< Type * > elements() const
element_iterator element_begin() const
unsigned getNumElements() const
Random access to the elements.
Type * getElementType(unsigned N) const
Type::subtype_iterator element_iterator
Target - Wrapper for Target specific information.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
static constexpr TypeSize getFixed(ScalarTy ExactSize)
The instances of the Type class are immutable: once they are created, they are never changed.
LLVM_ABI unsigned getIntegerBitWidth() const
bool isPointerTy() const
True if this is an instance of PointerType.
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
bool isSingleValueType() const
Return true if the type is a valid type for a register in codegen.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
bool isStructTy() const
True if this is an instance of StructType.
bool isTargetExtTy() const
Return true if this is a target extension type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
bool isIntegerTy() const
True if this is an instance of IntegerType.
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static LLVM_ABI UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
A Use represents the edge between a Value definition and its users.
const Use & getOperandUse(unsigned i) const
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
user_iterator user_begin()
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVMContext & getContext() const
All values hold a context through their type.
LLVM_ABI const Value * stripInBoundsOffsets(function_ref< void(const Value *)> Func=[](const Value *) {}) const
Strip off pointer casts and inbounds GEPs.
iterator_range< user_iterator > users()
LLVM_ABI void dropDroppableUsesIn(User &Usr)
Remove every use of this value in User that can safely be removed.
LLVM_ABI const Value * stripAndAccumulateConstantOffsets(const DataLayout &DL, APInt &Offset, bool AllowNonInbounds, bool AllowInvariantGroup=false, function_ref< bool(Value &Value, APInt &Offset)> ExternalAnalysis=nullptr, bool LookThroughIntToPtr=false) const
Accumulate the constant offset this value has compared to a base pointer.
iterator_range< use_iterator > uses()
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
static VectorType * getWithSizeAndScalar(VectorType *SizeTy, Type *EltTy)
This static method attempts to construct a VectorType with the same size-in-bits as SizeTy but with a...
static LLVM_ABI bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
constexpr bool isFixed() const
Returns true if the quantity is not scaled by vscale.
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
CRTP base class which implements the entire standard iterator facade in terms of a minimal subset of ...
A range adaptor for a pair of iterators.
This class implements an extremely fast bulk output stream that can only output to a stream.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char IsVolatile[]
Key for Kernel::Arg::Metadata::mIsVolatile.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
SmallVector< DbgVariableRecord * > getDVRAssignmentMarkers(const Instruction *Inst)
Return a range of dbg_assign records for which Inst performs the assignment they encode.
LLVM_ABI void deleteAssignmentMarkers(const Instruction *Inst)
Delete the llvm.dbg.assign intrinsics linked to Inst.
initializer< Ty > init(const Ty &Val)
@ DW_OP_LLVM_extract_bits_zext
Only used in LLVM metadata.
@ DW_OP_LLVM_fragment
Only used in LLVM metadata.
@ DW_OP_LLVM_extract_bits_sext
Only used in LLVM metadata.
@ User
could "use" a pointer
NodeAddr< PhiNode * > Phi
NodeAddr< UseNode * > Use
Context & getContext() const
friend class Instruction
Iterator for Instructions in a `BasicBlock.
LLVM_ABI iterator begin() const
This is an optimization pass for GlobalISel generic memory operations.
static cl::opt< bool > SROASkipMem2Reg("sroa-skip-mem2reg", cl::init(false), cl::Hidden)
Disable running mem2reg during SROA in order to test or debug SROA.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
bool operator<(int64_t V1, const APSInt &V2)
FunctionAddr VTableAddr Value
void stable_sort(R &&Range)
LLVM_ABI bool RemoveRedundantDbgInstrs(BasicBlock *BB)
Try to remove redundant dbg.value instructions from given basic block.
LLVM_ABI cl::opt< bool > ProfcheckDisableMetadataFixes
UnaryFunction for_each(R &&Range, UnaryFunction F)
Provide wrappers to std::for_each which take ranges instead of having to pass begin/end explicitly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
LLVM_ABI void PromoteMemToReg(ArrayRef< AllocaInst * > Allocas, DominatorTree &DT, AssumptionCache *AC=nullptr)
Promote the specified list of alloca instructions into scalar registers, inserting PHI nodes as appro...
LLVM_ABI bool isAssumeLikeIntrinsic(const Instruction *I)
Return true if it is an intrinsic that cannot be speculated but also cannot trap.
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
auto successors(const MachineBasicBlock *BB)
bool operator!=(uint64_t V1, const APInt &V2)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
LLVM_ABI std::optional< RegOrConstant > getVectorSplat(const MachineInstr &MI, const MachineRegisterInfo &MRI)
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Align getLoadStoreAlignment(const Value *I)
A helper function that returns the alignment of load or store instruction.
auto unique(Range &&R, Predicate P)
bool operator==(const AddressRangeValuePair &LHS, const AddressRangeValuePair &RHS)
LLVM_ABI bool isAllocaPromotable(const AllocaInst *AI)
Return true if this alloca is legal for promotion.
auto dyn_cast_or_null(const Y &Val)
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
LLVM_ABI bool isInstructionTriviallyDead(Instruction *I, const TargetLibraryInfo *TLI=nullptr)
Return true if the result produced by the instruction is not used, and the instruction will return.
bool capturesFullProvenance(CaptureComponents CC)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI void SplitBlockAndInsertIfThenElse(Value *Cond, BasicBlock::iterator SplitBefore, Instruction **ThenTerm, Instruction **ElseTerm, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr)
SplitBlockAndInsertIfThenElse is similar to SplitBlockAndInsertIfThen, but also creates the ElseBlock...
LLVM_ABI bool isSafeToLoadUnconditionally(Value *V, Align Alignment, const APInt &Size, const DataLayout &DL, Instruction *ScanFrom, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Return true if we know that executing a load from this value cannot trap.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
LLVM_ABI void initializeSROALegacyPassPass(PassRegistry &)
SmallVector< ValueTypeFromRangeType< R >, Size > to_vector(R &&Range)
Given a range of type R, iterate the entire range and return a SmallVector with elements of the vecto...
LLVM_ABI TinyPtrVector< DbgVariableRecord * > findDVRValues(Value *V)
As above, for DVRValues.
LLVM_ABI void llvm_unreachable_internal(const char *msg=nullptr, const char *file=nullptr, unsigned line=0)
This function calls abort(), and prints the optional message to stderr.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
constexpr int PoisonMaskElem
iterator_range(Container &&) -> iterator_range< llvm::detail::IterOfRange< Container > >
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI bool isAssignmentTrackingEnabled(const Module &M)
Return true if assignment tracking is enabled for module M.
FunctionAddr VTableAddr Next
DWARFExpression::Operation Op
LLVM_ABI FunctionPass * createSROAPass(bool PreserveCFG=true, bool AggregateToVector=false)
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
LLVM_ABI TinyPtrVector< DbgVariableRecord * > findDVRDeclares(Value *V)
Finds dbg.declare records declaring local variables as living in the memory that 'V' points to.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
bool all_equal(std::initializer_list< T > Values)
Returns true if all Values in the initializer lists are equal or the list.
LLVM_ABI Instruction * SplitBlockAndInsertIfThen(Value *Cond, BasicBlock::iterator SplitBefore, bool Unreachable, MDNode *BranchWeights=nullptr, DomTreeUpdater *DTU=nullptr, LoopInfo *LI=nullptr, BasicBlock *ThenBlock=nullptr)
Split the containing block at the specified instruction - everything before SplitBefore stays in the ...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI llvm::SmallVector< int, 16 > createSequentialMask(unsigned Start, unsigned NumInts, unsigned NumUndefs)
Create a sequential shuffle mask.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
AAMDNodes shift(size_t Offset) const
Create a new AAMDNode that describes this AAMDNode after applying a constant offset to the start of t...
LLVM_ABI AAMDNodes adjustForAccess(unsigned AccessSize)
Create a new AAMDNode for accessing AccessSize bytes of this AAMDNode.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Describes an element of a Bitfield.
static Bitfield::Type get(StorageType Packed)
Unpacks the field from the Packed value.
static void set(StorageType &Packed, typename Bitfield::Type Value)
Sets the typed value in the provided Packed value.
A CRTP mix-in to automatically provide informational APIs needed for passes.