65#define DEBUG_TYPE "memcpyopt"
68 "enable-memcpyopt-without-libcalls",
cl::Hidden,
69 cl::desc(
"Enable memcpyopt even when libcalls are disabled"));
71STATISTIC(NumMemCpyInstr,
"Number of memcpy instructions deleted");
72STATISTIC(NumMemMoveInstr,
"Number of memmove instructions deleted");
73STATISTIC(NumMemSetInfer,
"Number of memsets inferred");
74STATISTIC(NumMoveToCpy,
"Number of memmoves converted to memcpy");
75STATISTIC(NumCpyToSet,
"Number of memcpys converted to memset");
76STATISTIC(NumCallSlot,
"Number of call slot optimizations performed");
77STATISTIC(NumStackMove,
"Number of stack-move optimizations performed");
106 bool isProfitableToUseMemset(
const DataLayout &
DL)
const;
114bool MemsetRange::isProfitableToUseMemset(
const DataLayout &
DL)
const {
116 if (TheStores.
size() >= 4 || End - Start >= 16)
120 if (TheStores.
size() < 2)
125 for (Instruction *SI : TheStores)
131 if (TheStores.size() == 2)
144 unsigned Bytes = unsigned(End - Start);
145 unsigned MaxIntSize =
DL.getLargestLegalIntTypeSizeInBits() / 8;
148 unsigned NumPointerStores = Bytes / MaxIntSize;
151 unsigned NumByteStores = Bytes % MaxIntSize;
156 return TheStores.size() > NumPointerStores + NumByteStores;
167 const DataLayout &
DL;
170 MemsetRanges(
const DataLayout &
DL) :
DL(
DL) {}
174 const_iterator
begin()
const {
return Ranges.begin(); }
175 const_iterator
end()
const {
return Ranges.end(); }
178 void addInst(int64_t OffsetFromFirst, Instruction *Inst) {
180 addStore(OffsetFromFirst, SI);
185 void addStore(int64_t OffsetFromFirst, StoreInst *SI) {
186 TypeSize StoreSize =
DL.getTypeStoreSize(
SI->getOperand(0)->getType());
189 SI->getPointerOperand(),
SI->getAlign(), SI);
192 void addMemSet(int64_t OffsetFromFirst, MemSetInst *MSI) {
206void MemsetRanges::addRange(int64_t Start, int64_t
Size,
Value *Ptr,
207 MaybeAlign Alignment, Instruction *Inst) {
211 Ranges, [=](
const MemsetRange &O) {
return O.End <
Start; });
216 if (
I ==
Ranges.end() || End < I->Start) {
217 MemsetRange &
R = *
Ranges.insert(
I, MemsetRange());
221 R.Alignment = Alignment;
222 R.TheStores.push_back(Inst);
227 I->TheStores.push_back(Inst);
231 if (
I->Start <= Start &&
I->End >= End)
240 if (Start < I->Start) {
243 I->Alignment = Alignment;
251 range_iterator NextI =
I;
252 while (++NextI !=
Ranges.end() && End >= NextI->Start) {
254 I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end());
255 if (NextI->End >
I->End)
271 assert(Start->getParent() == End->
getParent() &&
"Must be in same block");
273 if (Start->getFunction()->doesNotThrow())
278 bool RequiresNoCaptureBeforeUnwind;
280 RequiresNoCaptureBeforeUnwind) &&
281 !RequiresNoCaptureBeforeUnwind)
289void MemCpyOptPass::eraseInstruction(Instruction *
I) {
290 MSSAU->removeMemoryAccess(
I);
291 EEA->removeInstruction(
I);
292 I->eraseFromParent();
303 assert(Start->getBlock() == End->
getBlock() &&
"Only local supported");
309 if (
II &&
II->getIntrinsicID() == Intrinsic::lifetime_start &&
310 SkippedLifetimeStart && !*SkippedLifetimeStart) {
311 *SkippedLifetimeStart =
I;
330 return Start->getBlock() != End->
getBlock() ||
334 if (isa<MemoryUse>(&Acc))
336 Instruction *AccInst =
337 cast<MemoryUseOrDef>(&Acc)->getMemoryInst();
338 return isModSet(AA.getModRefInfo(AccInst, Loc));
352Instruction *MemCpyOptPass::tryMergingIntoMemset(Instruction *StartInst,
359 if (
DL.getTypeStoreSize(
SI->getOperand(0)->getType()).isScalable())
373 MemoryUseOrDef *MemInsertPoint =
nullptr;
374 for (++BI; !BI->isTerminator(); ++BI) {
378 MemInsertPoint = CurrentAcc;
383 if (CB->onlyAccessesInaccessibleMemory())
391 if (BI->mayWriteToMemory() || BI->mayReadFromMemory())
398 if (!NextStore->isSimple())
401 Value *StoredVal = NextStore->getValueOperand();
409 if (
DL.getTypeStoreSize(StoredVal->
getType()).isScalable())
418 if (ByteVal != StoredByte)
422 std::optional<int64_t>
Offset =
423 NextStore->getPointerOperand()->getPointerOffsetFrom(StartPtr,
DL);
431 if (MSI->isVolatile() || ByteVal != MSI->getValue() ||
436 std::optional<int64_t>
Offset =
437 MSI->getDest()->getPointerOffsetFrom(StartPtr,
DL);
453 Ranges.addInst(0, StartInst);
463 for (
const MemsetRange &
Range : Ranges) {
464 if (
Range.TheStores.size() == 1)
468 if (!
Range.isProfitableToUseMemset(
DL))
473 StartPtr =
Range.StartPtr;
475 AMemSet = Builder.CreateMemSet(StartPtr, ByteVal,
Range.End -
Range.Start,
482 dbgs() <<
"With: " << *AMemSet <<
'\n');
483 if (!
Range.TheStores.empty())
488 ? MSSAU->createMemoryAccessBefore(AMemSet,
nullptr, MemInsertPoint)
489 : MSSAU->createMemoryAccessAfter(AMemSet,
nullptr, MemInsertPoint));
490 MSSAU->insertDef(NewDef,
true);
491 MemInsertPoint = NewDef;
494 for (Instruction *SI :
Range.TheStores)
507bool MemCpyOptPass::moveUp(StoreInst *SI, Instruction *
P,
const LoadInst *LI) {
515 DenseSet<Instruction *>
Args;
516 auto AddArg = [&](
Value *Arg) {
518 if (
I &&
I->getParent() ==
SI->getParent()) {
526 if (!AddArg(
SI->getPointerOperand()))
530 SmallVector<Instruction *, 8> ToLift{
SI};
540 for (
auto I = --
SI->getIterator(),
E =
P->getIterator();
I !=
E; --
I) {
548 bool MayAlias =
isModOrRefSet(AA->getModRefInfo(
C, std::nullopt));
550 bool NeedLift =
false;
570 if (
isModSet(AA->getModRefInfo(
C, LoadLoc)))
603 MemoryUseOrDef *MemInsertPoint =
nullptr;
604 if (MemoryUseOrDef *MA = MSSA->getMemoryAccess(
P)) {
610 if (MemoryUseOrDef *MA = MSSA->getMemoryAccess(&
I)) {
620 I->moveBefore(
P->getIterator());
621 assert(MemInsertPoint &&
"Must have found insert point");
622 if (MemoryUseOrDef *MA = MSSA->getMemoryAccess(
I)) {
623 MSSAU->moveAfter(MA, MemInsertPoint);
631bool MemCpyOptPass::processStoreOfLoad(StoreInst *SI, LoadInst *LI,
632 const DataLayout &
DL,
637 BatchAAResults BAA(*AA, EEA);
643 if (
T->isAggregateType() &&
645 (TLI->has(LibFunc_memcpy) && TLI->has(LibFunc_memmove)))) {
655 if (
isModSet(BAA.getModRefInfo(&
I, LoadLoc))) {
665 if (
P == SI || moveUp(SI,
P, LI)) {
670 bool UseMemMove =
false;
671 if (
isModSet(AA->getModRefInfo(SI, LoadLoc)))
676 Builder.CreateTypeSize(Builder.getInt64Ty(),
DL.getTypeStoreSize(
T));
679 M = Builder.CreateMemMove(
SI->getPointerOperand(),
SI->getAlign(),
683 M = Builder.CreateMemCpy(
SI->getPointerOperand(),
SI->getAlign(),
685 M->copyMetadata(*SI, LLVMContext::MD_DIAssignID);
687 LLVM_DEBUG(
dbgs() <<
"Promoting " << *LI <<
" to " << *SI <<
" => " << *M
691 auto *NewAccess = MSSAU->createMemoryAccessAfter(M,
nullptr, LastDef);
699 BBI =
M->getIterator();
707 auto GetCall = [&]() -> CallInst * {
711 MSSA->getWalker()->getClobberingMemoryAccess(LI, BAA)))
716 bool Changed = performCallSlotOptzn(
717 LI, SI,
SI->getPointerOperand()->stripPointerCasts(),
719 DL.getTypeStoreSize(
SI->getOperand(0)->getType()),
720 std::min(
SI->getAlign(), LI->
getAlign()), BAA, GetCall);
731 if (performStackMoveOptzn(LI, SI,
SI->getPointerOperand(),
735 BBI =
SI->getNextNode()->getIterator();
755 if (
SI->getMetadata(LLVMContext::MD_nontemporal))
758 const DataLayout &
DL =
SI->getDataLayout();
760 Value *StoredVal =
SI->getValueOperand();
769 return processStoreOfLoad(SI, LI,
DL, BBI);
790 tryMergingIntoMemset(SI,
SI->getPointerOperand(), ByteVal)) {
791 BBI =
I->getIterator();
798 auto *
T =
V->getType();
799 if (!
T->isAggregateType())
802 TypeSize
Size =
DL.getTypeStoreSize(
T);
803 if (
Size.isScalable())
807 auto *
M = Builder.CreateMemSet(
SI->getPointerOperand(), ByteVal,
Size,
809 M->copyMetadata(*SI, LLVMContext::MD_DIAssignID);
811 LLVM_DEBUG(
dbgs() <<
"Promoting " << *SI <<
" to " << *M <<
"\n");
816 auto *NewAccess = MSSAU->createMemoryAccessBefore(M,
nullptr, StoreDef);
823 BBI =
M->getIterator();
833 BBI =
I->getIterator();
842bool MemCpyOptPass::performCallSlotOptzn(Instruction *cpyLoad,
843 Instruction *cpyStore,
Value *cpyDest,
844 Value *cpySrc, TypeSize cpySize,
847 std::function<CallInst *()> GetC) {
873 std::optional<TypeSize> SrcAllocaSize = srcAlloca->getAllocationSize(
DL);
874 if (!SrcAllocaSize || SrcAllocaSize->isScalable())
876 uint64_t srcSize = SrcAllocaSize->getFixedValue();
878 if (cpySize < srcSize)
881 CallInst *
C = GetC();
886 if (Function *
F =
C->getCalledFunction())
887 if (
F->isIntrinsic() &&
F->getIntrinsicID() == Intrinsic::lifetime_start)
890 if (
C->getParent() != cpyStore->
getParent()) {
895 MemoryLocation DestLoc =
898 : MemoryLocation::getForDest(
cast<MemCpyInst>(cpyStore));
904 MSSA->getMemoryAccess(cpyStore), &SkippedLifetimeStart)) {
905 LLVM_DEBUG(dbgs() <<
"Call Slot: Dest pointer modified after call\n");
912 if (SkippedLifetimeStart) {
915 if (LifetimeArg && LifetimeArg->getParent() ==
C->getParent() &&
916 C->comesBefore(LifetimeArg))
922 bool ExplicitlyDereferenceableOnly;
924 ExplicitlyDereferenceableOnly) ||
927 LLVM_DEBUG(
dbgs() <<
"Call Slot: Dest pointer not dereferenceable\n");
946 LLVM_DEBUG(
dbgs() <<
"Call Slot: Dest may be visible through unwinding\n");
951 Align srcAlign = srcAlloca->getAlign();
952 bool isDestSufficientlyAligned = srcAlign <= cpyDestAlign;
956 LLVM_DEBUG(
dbgs() <<
"Call Slot: Dest not sufficiently aligned\n");
965 while (!srcUseList.empty()) {
966 User *
U = srcUseList.pop_back_val();
975 if (U !=
C && U != cpyLoad) {
976 LLVM_DEBUG(
dbgs() <<
"Call slot: Source accessed by " << *U <<
"\n");
983 bool SrcIsCaptured =
any_of(
C->args(), [&](Use &U) {
984 return U->stripPointerCasts() == cpySrc &&
985 !C->doesNotCapture(C->getArgOperandNo(&U));
1002 MemoryLocation SrcLoc =
1004 for (Instruction &
I :
1005 make_range(++
C->getIterator(),
C->getParent()->end())) {
1008 if (
II->getIntrinsicID() == Intrinsic::lifetime_end &&
1009 II->getArgOperand(0) == srcAlloca)
1032 bool NeedMoveGEP =
false;
1033 if (!DT->dominates(cpyDest,
C)) {
1036 if (
GEP &&
GEP->hasAllConstantIndices() &&
1037 DT->dominates(
GEP->getPointerOperand(),
C))
1059 for (
unsigned ArgI = 0; ArgI <
C->arg_size(); ++ArgI)
1060 if (
C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc &&
1061 cpySrc->
getType() !=
C->getArgOperand(ArgI)->getType())
1065 bool changedArgument =
false;
1066 for (
unsigned ArgI = 0; ArgI <
C->arg_size(); ++ArgI)
1067 if (
C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc) {
1068 changedArgument =
true;
1069 C->setArgOperand(ArgI, cpyDest);
1072 if (!changedArgument)
1076 if (!isDestSufficientlyAligned) {
1083 GEP->moveBefore(
C->getIterator());
1086 if (SkippedLifetimeStart) {
1087 SkippedLifetimeStart->
moveBefore(
C->getIterator());
1088 MSSAU->moveBefore(MSSA->getMemoryAccess(SkippedLifetimeStart),
1089 MSSA->getMemoryAccess(
C));
1093 if (cpyLoad != cpyStore)
1102bool MemCpyOptPass::processMemCpyMemCpyDependence(MemCpyInst *M,
1104 BatchAAResults &BAA) {
1118 int64_t MForwardOffset = 0;
1119 const DataLayout &
DL =
M->getModule()->getDataLayout();
1122 if (
M->getSource() != MDep->
getDest()) {
1123 std::optional<int64_t>
Offset =
1124 M->getSource()->getPointerOffsetFrom(MDep->
getDest(),
DL);
1127 MForwardOffset = *
Offset;
1130 Value *CopyLength =
M->getLength();
1135 if (MForwardOffset != 0 || MDep->
getLength() != CopyLength) {
1141 if (!MDepLen || !MLen)
1143 if (MDepLen->getZExtValue() < MLen->getZExtValue() + MForwardOffset) {
1146 if (MDepLen->getZExtValue() <= (uint64_t)MForwardOffset)
1150 CopyLength = ConstantInt::get(CopyLength->
getType(),
1151 MDepLen->getZExtValue() - MForwardOffset);
1159 if (NewCopySource && NewCopySource->
use_empty())
1172 MCopyLoc = MCopyLoc.getWithNewSize(
1181 if (MForwardOffset > 0) {
1183 std::optional<int64_t> MDestOffset =
1185 if (MDestOffset == MForwardOffset)
1186 CopySource =
M->getDest();
1188 CopySource = Builder.CreateInBoundsPtrAdd(
1189 CopySource, Builder.getInt64(MForwardOffset));
1193 MCopyLoc = MCopyLoc.getWithNewPtr(CopySource);
1194 if (CopySourceAlign)
1207 if (
writtenBetween(MSSA, BAA, MCopyLoc, MSSA->getMemoryAccess(MDep),
1208 MSSA->getMemoryAccess(M)))
1224 bool UseMemMove =
false;
1229 if (
M->isForceInlined())
1235 LLVM_DEBUG(
dbgs() <<
"MemCpyOptPass: Forwarding memcpy->memcpy src:\n"
1243 NewM = Builder.CreateMemMove(
M->getDest(),
M->getDestAlign(), CopySource,
1244 CopySourceAlign, CopyLength,
M->isVolatile());
1245 else if (
M->isForceInlined())
1249 NewM = Builder.CreateMemCpyInline(
M->getDest(),
M->getDestAlign(),
1250 CopySource, CopySourceAlign, CopyLength,
1253 NewM = Builder.CreateMemCpy(
M->getDest(),
M->getDestAlign(), CopySource,
1254 CopySourceAlign, CopyLength,
M->isVolatile());
1260 auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM,
nullptr, LastDef);
1288bool MemCpyOptPass::processMemSetMemCpyDependence(MemCpyInst *MemCpy,
1290 BatchAAResults &BAA) {
1316 MSSA->getMemoryAccess(MemSet),
1317 MSSA->getMemoryAccess(MemCpy)))
1329 if (DestSize == SrcSize) {
1352 "Preserving debug location based on moving memset within BB.");
1353 Builder.SetCurrentDebugLocation(MemSet->
getDebugLoc());
1359 SrcSize = Builder.CreateZExt(SrcSize, DestSize->
getType());
1361 DestSize = Builder.CreateZExt(DestSize, SrcSize->
getType());
1364 Value *Ule = Builder.CreateICmpULE(DestSize, SrcSize);
1365 Value *SizeDiff = Builder.CreateSub(DestSize, SrcSize);
1366 Value *MemsetLen = Builder.CreateSelect(
1367 Ule, ConstantInt::getNullValue(DestSize->
getType()), SizeDiff);
1373 Builder.CreateMemSet(Builder.CreatePtrAdd(Dest, SrcSize),
1374 MemSet->
getOperand(1), MemsetLen, Alignment);
1377 "MemCpy must be a MemoryDef");
1382 MSSAU->createMemoryAccessBefore(NewMemSet,
nullptr, LastDef);
1397 if (
II->getIntrinsicID() == Intrinsic::lifetime_start)
1399 return II->getArgOperand(0) == Alloca;
1432bool MemCpyOptPass::performMemCpyToMemSetOptzn(MemCpyInst *MemCpy,
1434 BatchAAResults &BAA) {
1438 int64_t MOffset = 0;
1443 std::optional<int64_t>
Offset =
1454 if (MOffset != 0 || MemSetSize != CopySize) {
1461 if (!CMemSetSize || !CCopySize || MOffset < 0 ||
1462 CCopySize->getZExtValue() + MOffset > CMemSetSize->getZExtValue()) {
1466 if (CMemSetSize && CCopySize) {
1467 uint64_t MemSetSizeVal = CMemSetSize->getZExtValue();
1468 uint64_t MemCpySizeVal = CCopySize->getZExtValue();
1473 uint64_t
Offset = -MOffset;
1474 NewSize = MemCpySizeVal <=
Offset ? 0 : MemCpySizeVal -
Offset;
1475 }
else if (MOffset == 0) {
1476 NewSize = MemSetSizeVal;
1479 MemSetSizeVal <= (uint64_t)MOffset ? 0 : MemSetSizeVal - MOffset;
1481 CopySize = ConstantInt::get(CopySize->
getType(), NewSize);
1493 DestPtr = Builder.CreatePtrAdd(DestPtr, Builder.getInt64(-MOffset));
1499 Builder.CreateMemSet(DestPtr, MemSet->
getOperand(1), CopySize, Align);
1501 auto *NewAccess = MSSAU->createMemoryAccessAfter(NewM,
nullptr, LastDef);
1519bool MemCpyOptPass::performStackMoveOptzn(Instruction *Load, Instruction *Store,
1521 TypeSize
Size, BatchAAResults &BAA) {
1536 if (SrcAlloca == DestAlloca)
1556 if (!SrcOffset || *SrcOffset < *DestOffset || *SrcOffset < 0)
1559 if ((*SrcOffset - *DestOffset) % DestAlloca->
getAlign().
value() != 0)
1563 if (!SrcSize || !DestSize)
1565 if (*SrcSize != *DestSize)
1566 if (!SrcSize->isFixed() || !DestSize->isFixed())
1569 if (
Size != *DestSize || *DestOffset != 0) {
1570 LLVM_DEBUG(
dbgs() <<
"Stack Move: Destination alloca size mismatch\n");
1575 bool MoveSrc = !DT->dominates(SrcAlloca, DestAlloca);
1577 if (!DT->dominates(DestAlloca, SrcAlloca))
1586 SmallVector<Instruction *, 4> LifetimeMarkers;
1587 SmallPtrSet<Instruction *, 4> AAMetadataInstrs;
1589 auto CaptureTrackingWithModRef =
1590 [&](
Instruction *AI, function_ref<bool(Instruction *)> ModRefCallback,
1591 bool &AddressCaptured) ->
bool {
1592 SmallVector<Instruction *, 8> Worklist;
1595 Worklist.
reserve(MaxUsesToExplore);
1596 SmallPtrSet<const Use *, 20> Visited;
1597 while (!Worklist.
empty()) {
1599 for (
const Use &U :
I->uses()) {
1602 if (Visited.
size() >= MaxUsesToExplore) {
1605 <<
"Stack Move: Exceeded max uses to see ModRef, bailing\n");
1608 if (!Visited.
insert(&U).second)
1615 if (UI->mayReadOrWriteMemory()) {
1616 if (UI->isLifetimeStartOrEnd()) {
1625 AAMetadataInstrs.
insert(UI);
1627 if (!ModRefCallback(UI))
1642 ModRefInfo DestModRef = ModRefInfo::NoModRef;
1644 SmallVector<BasicBlock *, 8> ReachabilityWorklist;
1645 auto DestModRefCallback = [&](
Instruction *UI) ->
bool {
1655 if (UI->getParent() ==
Store->getParent()) {
1664 if (UI->comesBefore(Store))
1674 ReachabilityWorklist.
push_back(UI->getParent());
1680 bool DestAddressCaptured =
false;
1681 if (!CaptureTrackingWithModRef(DestAlloca, DestModRefCallback,
1682 DestAddressCaptured))
1685 if (!ReachabilityWorklist.
empty() &&
1687 nullptr, DT,
nullptr))
1703 auto SrcModRefCallback = [&](
Instruction *UI) ->
bool {
1706 if (PDT->dominates(Load, UI) || UI == Load || UI == Store)
1716 bool SrcAddressCaptured =
false;
1717 if (!CaptureTrackingWithModRef(SrcAlloca, SrcModRefCallback,
1718 SrcAddressCaptured))
1723 if (DestAddressCaptured && SrcAddressCaptured)
1735 if (*SrcSize != *DestSize) {
1738 if (DestSize->getFixedValue() > SrcSize->getFixedValue()) {
1745 Value *NewDestPtr = SrcAlloca;
1746 if (*SrcOffset != *DestOffset) {
1748 NewDestPtr = Builder.CreateInBoundsPtrAdd(
1749 SrcAlloca, Builder.getInt64(*SrcOffset - *DestOffset));
1760 if (!LifetimeMarkers.
empty()) {
1761 for (Instruction *
I : LifetimeMarkers)
1770 for (Instruction *
I : AAMetadataInstrs) {
1771 I->setMetadata(LLVMContext::MD_alias_scope,
nullptr);
1772 I->setMetadata(LLVMContext::MD_noalias,
nullptr);
1773 I->setMetadata(LLVMContext::MD_tbaa,
nullptr);
1774 I->setMetadata(LLVMContext::MD_tbaa_struct,
nullptr);
1777 LLVM_DEBUG(
dbgs() <<
"Stack Move: Performed stack-move optimization\n");
1799 if (
M->isVolatile())
1803 if (
M->getSource() ==
M->getDest()) {
1816 MemoryUseOrDef *MA = MSSA->getMemoryAccess(M);
1823 if (GV->isConstant() && GV->hasDefinitiveInitializer())
1825 M->getDataLayout())) {
1828 M->getRawDest(), ByteVal,
M->getLength(),
M->getDestAlign(),
false);
1831 MSSAU->createMemoryAccessAfter(NewM,
nullptr, LastDef);
1839 BatchAAResults BAA(*AA, EEA);
1843 const MemoryAccess *DestClobber =
1844 MSSA->getWalker()->getClobberingMemoryAccess(AnyClobber, DestLoc, BAA);
1852 if (DestClobber->
getBlock() ==
M->getParent())
1853 if (processMemSetMemCpyDependence(M, MDep, BAA))
1856 MemoryAccess *SrcClobber = MSSA->getWalker()->getClobberingMemoryAccess(
1868 if (Instruction *
MI = MD->getMemoryInst()) {
1871 if (performCallSlotOptzn(M, M,
M->getDest(),
M->getSource(),
1873 M->getDestAlign().valueOrOne(), BAA,
1874 [
C]() -> CallInst * { return C; })) {
1876 <<
" call: " << *
C <<
"\n"
1877 <<
" memcpy: " << *M <<
"\n");
1885 if (processMemCpyMemCpyDependence(M, MDep, BAA))
1888 if (performMemCpyToMemSetOptzn(M, MDep, BAA)) {
1911 if (performStackMoveOptzn(M, M,
M->getDest(),
M->getSource(),
1914 BBI =
M->getNextNode()->getIterator();
1925bool MemCpyOptPass::isMemMoveMemSetDependency(MemMoveInst *M) {
1926 const auto &
DL =
M->getDataLayout();
1927 MemoryUseOrDef *MemMoveAccess = MSSA->getMemoryAccess(M);
1933 auto *MemMoveSourceOp =
M->getSource();
1939 LocationSize MemMoveLocSize = SourceLoc.
Size;
1940 if (
Source->getPointerOperand() !=
M->getDest() ||
1946 uint64_t MemMoveSize = MemMoveLocSize.
getValue();
1947 LocationSize TotalSize =
1949 MemoryLocation CombinedLoc(
M->getDest(), TotalSize);
1953 BatchAAResults BAA(*AA);
1956 MSSA->getWalker()->getClobberingMemoryAccess(FirstDef, CombinedLoc, BAA));
1966 if (!MemSetLength || MemSetLength->getZExtValue() < MemMoveSize)
1983 if (!
M->isVolatile() && isMemMoveMemSetDependency(M)) {
1993 LLVM_DEBUG(
dbgs() <<
"MemCpyOptPass: Optimizing memmove -> memcpy: " << *M
1997 Type *ArgTys[3] = {
M->getRawDest()->getType(),
M->getRawSource()->getType(),
1998 M->getLength()->getType()};
2000 M->getModule(), Intrinsic::memcpy, ArgTys));
2010bool MemCpyOptPass::processByValArgument(CallBase &CB,
unsigned ArgNo) {
2015 TypeSize ByValSize =
DL.getTypeAllocSize(ByValTy);
2017 MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB);
2020 MemCpyInst *MDep =
nullptr;
2021 BatchAAResults BAA(*AA, EEA);
2022 MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
2036 if (!C1 || !TypeSize::isKnownGE(
2049 if ((!MemDepAlign || *MemDepAlign < *ByValAlign) &&
2065 MSSA->getMemoryAccess(MDep), CallAccess))
2068 LLVM_DEBUG(
dbgs() <<
"MemCpyOptPass: Forwarding memcpy to byval:\n"
2069 <<
" " << *MDep <<
"\n"
2070 <<
" " << CB <<
"\n");
2093bool MemCpyOptPass::processImmutArgument(CallBase &CB,
unsigned ArgNo) {
2094 BatchAAResults BAA(*AA, EEA);
2118 std::optional<TypeSize> AllocaSize = AI->getAllocationSize(
DL);
2121 if (!AllocaSize || AllocaSize->isScalable())
2124 MemoryUseOrDef *CallAccess = MSSA->getMemoryAccess(&CB);
2128 MemCpyInst *MDep =
nullptr;
2129 MemoryAccess *Clobber = MSSA->getWalker()->getClobberingMemoryAccess(
2145 if (!MDepLen || AllocaSize != MDepLen->getValue())
2152 Align AllocaAlign = AI->getAlign();
2153 if (MemDepAlign < AllocaAlign &&
2165 MSSA->getMemoryAccess(MDep), CallAccess))
2172 LLVM_DEBUG(
dbgs() <<
"MemCpyOptPass: Forwarding memcpy to Immut src:\n"
2173 <<
" " << *MDep <<
"\n"
2174 <<
" " << CB <<
"\n");
2184bool MemCpyOptPass::iterateOnFunction(Function &
F) {
2185 bool MadeChange =
false;
2188 for (BasicBlock &BB :
F) {
2193 if (!DT->isReachableFromEntry(&BB))
2200 bool RepeatInstruction =
false;
2203 MadeChange |= processStore(SI, BI);
2205 RepeatInstruction = processMemSet(M, BI);
2207 RepeatInstruction = processMemCpy(M, BI);
2209 RepeatInstruction = processMemMove(M, BI);
2211 for (
unsigned i = 0, e = CB->
arg_size(); i != e; ++i) {
2213 MadeChange |= processByValArgument(*CB, i);
2215 MadeChange |= processImmutArgument(*CB, i);
2220 if (RepeatInstruction) {
2221 if (BI != BB.
begin())
2239 bool MadeChange =
runImpl(
F, &TLI, AA, AC, DT, PDT, &MSSA->getMSSA());
2253 bool MadeChange =
false;
2266 if (!iterateOnFunction(
F))
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseSet and SmallDenseSet classes.
static bool runImpl(Function &F, const TargetLowering &TLI, const LibcallLoweringInfo &Libcalls, AssumptionCache *AC)
This is the interface for a simple mod/ref and alias analysis over globals.
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
static void eraseInstruction(Instruction &I, ICFLoopSafetyInfo &SafetyInfo, MemorySSAUpdater &MSSAU)
static bool mayBeVisibleThroughUnwinding(Value *V, Instruction *Start, Instruction *End)
static bool isZeroSize(Value *Size)
static bool hasUndefContents(MemorySSA *MSSA, BatchAAResults &AA, Value *V, MemoryDef *Def)
Determine whether the pointer V had only undefined content (due to Def), either because it was freshl...
static bool accessedBetween(BatchAAResults &AA, MemoryLocation Loc, const MemoryUseOrDef *Start, const MemoryUseOrDef *End, Instruction **SkippedLifetimeStart=nullptr)
static bool overreadUndefContents(MemorySSA *MSSA, MemCpyInst *MemCpy, MemIntrinsic *MemSrc, BatchAAResults &BAA)
static cl::opt< bool > EnableMemCpyOptWithoutLibcalls("enable-memcpyopt-without-libcalls", cl::Hidden, cl::desc("Enable memcpyopt even when libcalls are disabled"))
static bool writtenBetween(MemorySSA *MSSA, BatchAAResults &AA, MemoryLocation Loc, const MemoryUseOrDef *Start, const MemoryUseOrDef *End)
This file provides utility analysis objects describing memory locations.
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
uint64_t IntrinsicInst * II
This file contains the declarations for profiling metadata utility functions.
This file defines the make_scope_exit function, which executes user-defined cleanup logic at scope ex...
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
A manager for alias analyses.
LLVM_ABI bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
void setAllocatedType(Type *Ty)
for use only in special circumstances that need to generically transform a whole instruction (eg: IR ...
Type * getAllocatedType() const
Return the type that is being allocated by the instruction.
unsigned getAddressSpace() const
Return the address space for the allocation.
LLVM_ABI std::optional< TypeSize > getAllocationSize(const DataLayout &DL) const
Get allocation size in bytes.
void setAlignment(Align Align)
const Value * getArraySize() const
Get the number of elements allocated.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
iterator begin()
Instruction iterator methods.
LLVM_ABI bool isEntryBlock() const
Return true if this is the entry block of the containing function.
InstListType::iterator iterator
Instruction iterators...
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
ModRefInfo callCapturesBefore(const Instruction *I, const MemoryLocation &MemLoc, DominatorTree *DT)
Represents analyses that only rely on functions' control flow.
bool doesNotCapture(unsigned OpNo) const
Determine whether this data operand is not captured.
LLVM_ABI bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
bool isByValArgument(unsigned ArgNo) const
Determine whether this argument is passed by value.
MaybeAlign getParamAlign(unsigned ArgNo) const
Extract the alignment for a call or parameter (0=unknown).
bool onlyReadsMemory(unsigned OpNo) const
Type * getParamByValType(unsigned ArgNo) const
Extract the byval type for a call or parameter.
Value * getArgOperand(unsigned i) const
void setArgOperand(unsigned i, Value *v)
unsigned arg_size() const
A parsed version of the target data layout string in and methods for querying it.
Analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Context-sensitive CaptureAnalysis provider, which computes and caches the earliest common dominator c...
LLVM_ABI void mergeDIAssignID(ArrayRef< const Instruction * > SourceInstructions)
Merge the DIAssignID metadata from this instruction and those attached to instructions in SourceInstr...
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void moveBefore(InstListType::iterator InsertPos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI void dropUnknownNonDebugMetadata(ArrayRef< unsigned > KnownIDs={})
Drop all unknown metadata except for debug locations.
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Value * getPointerOperand()
Align getAlign() const
Return the alignment of the access that is being performed.
static LocationSize precise(uint64_t Value)
TypeSize getValue() const
This class wraps the llvm.memcpy intrinsic.
LLVM_ABI PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Value * getLength() const
Value * getRawDest() const
Value * getDest() const
This is just like getRawDest, but it strips off any cast instructions (including addrspacecast) that ...
MaybeAlign getDestAlign() const
This is the common base class for memset/memcpy/memmove.
Value * getRawSource() const
Return the arguments to the instruction.
MaybeAlign getSourceAlign() const
Value * getSource() const
This is just like getRawSource, but it strips off any cast instructions that feed it,...
BasicBlock * getBlock() const
AllAccessType::self_iterator getIterator()
Get the iterators for the all access list and the defs only list We default to the all access list.
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
Representation for a specific memory location.
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
static LLVM_ABI MemoryLocation getForSource(const MemTransferInst *MTI)
Return a location representing the source of a memory transfer.
LocationSize Size
The maximum size of the location, in address-units, or UnknownSize if the size is not known.
static MemoryLocation getBeforeOrAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location before or after Ptr, while remaining within the underl...
static LLVM_ABI MemoryLocation getForDest(const MemIntrinsic *MI)
Return a location representing the destination of a memory set or transfer.
An analysis that produces MemorySSA for a function.
MemoryAccess * getClobberingMemoryAccess(const Instruction *I, BatchAAResults &AA)
Given a memory Mod/Ref/ModRef'ing instruction, calling this will give you the nearest dominating Memo...
Encapsulates MemorySSA, including all data associated with memory accesses.
LLVM_ABI bool dominates(const MemoryAccess *A, const MemoryAccess *B) const
Given two memory accesses in potentially different blocks, determine whether MemoryAccess A dominates...
LLVM_ABI void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
LLVM_ABI MemorySSAWalker * getWalker()
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
bool isLiveOnEntryDef(const MemoryAccess *MA) const
Return true if MA represents the live on entry value.
Class that has the common methods + fields of memory uses/defs.
MemoryAccess * getDefiningAccess() const
Get the access that produces the memory state used by this Use.
Instruction * getMemoryInst() const
Get the instruction that this MemoryUse represents.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Analysis pass which computes a PostDominatorTree.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
PreservedAnalyses & preserve()
Mark an analysis as preserved.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
void reserve(size_type N)
typename SuperClass::const_iterator const_iterator
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
LLVM_ABI unsigned getIntegerBitWidth() const
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
void setOperand(unsigned i, Value *Val)
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
LLVM_ABI std::optional< int64_t > getPointerOffsetFrom(const Value *Other, const DataLayout &DL) const
If this ptr is provably equal to Other plus a constant offset, return that offset in bytes.
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
const ParentTy * getParent() const
reverse_self_iterator getReverseIterator()
self_iterator getIterator()
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
Abstract Attribute helper functions.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
@ C
The default llvm calling convention, compatible with C.
@ BasicBlock
Various leaf nodes.
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
@ User
could "use" a pointer
friend class Instruction
Iterator for Instructions in a `BasicBlock.
LLVM_ABI iterator begin() const
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
bool capturesAddress(CaptureComponents CC)
LLVM_ABI void setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, StringRef PassName, const Function *F=nullptr)
Like setExplicitlyUnknownBranchWeights(...), but only sets unknown branch weights in the new instruct...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
scope_exit(Callable) -> scope_exit< Callable >
LLVM_ABI bool isDereferenceableAndAlignedPointer(const Value *V, Type *Ty, Align Alignment, const DataLayout &DL, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Returns true if V is always a dereferenceable pointer with alignment greater or equal than requested.
auto partition_point(R &&Range, Predicate P)
Binary search for the first iterator in a range where a predicate is false.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
auto cast_or_null(const Y &Val)
LLVM_ABI unsigned getDefaultMaxUsesToExploreForCaptureTracking()
getDefaultMaxUsesToExploreForCaptureTracking - Return default value of the maximal number of uses to ...
LLVM_ABI bool PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures, const Instruction *I, const DominatorTree *DT, bool IncludeI=false, unsigned MaxUsesToExplore=0, const LoopInfo *LI=nullptr)
PointerMayBeCapturedBefore - Return true if this pointer value may be captured by the enclosing funct...
LLVM_ABI Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
auto dyn_cast_or_null(const Y &Val)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
LLVM_ABI Align getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
bool isModSet(const ModRefInfo MRI)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool isModOrRefSet(const ModRefInfo MRI)
LLVM_ABI bool isNotVisibleOnUnwind(const Value *Object, bool &RequiresNoCaptureBeforeUnwind)
Return true if Object memory is not visible after an unwind, in the sense that program semantics cann...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
LLVM_ABI bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
RNSuccIterator< NodeRef, BlockT, RegionT > succ_begin(NodeRef Node)
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
LLVM_ABI bool VerifyMemorySSA
Enables verification of MemorySSA.
RNSuccIterator< NodeRef, BlockT, RegionT > succ_end(NodeRef Node)
DWARFExpression::Operation Op
LLVM_ABI bool isPotentiallyReachableFromMany(SmallVectorImpl< BasicBlock * > &Worklist, const BasicBlock *StopBB, const SmallPtrSetImpl< BasicBlock * > *ExclusionSet, const DominatorTree *DT=nullptr, const LoopInfo *LI=nullptr, const CycleInfo *CI=nullptr)
Determine whether there is at least one path from a block in 'Worklist' to 'StopBB' without passing t...
LLVM_ABI bool isIdentifiedFunctionLocal(const Value *V)
Return true if V is umabigously identified at the function-level.
LLVM_ABI bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
LLVM_ABI Value * isBytewiseValue(Value *V, const DataLayout &DL)
If the specified value can be set by repeating the same byte in memory, return the i8 value that it i...
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
LLVM_ABI void combineAAMetadata(Instruction *K, const Instruction *J)
Combine metadata of two instructions, where instruction J is a memory access that has been merged int...
bool capturesAnything(CaptureComponents CC)
LLVM_ABI UseCaptureInfo DetermineUseCaptureKind(const Use &U, const Value *Base)
Determine what kind of capture behaviour U may exhibit.
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
bool capturesAnyProvenance(CaptureComponents CC)
bool isRefSet(const ModRefInfo MRI)
LLVM_ABI bool isWritableObject(const Value *Object, bool &ExplicitlyDereferenceableOnly)
Return true if the Object is writable, in the sense that any location based on this pointer that can ...
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.
CaptureComponents UseCC
Components captured by this use.
CaptureComponents ResultCC
Components captured by the return value of the user of this Use.