64#define DEBUG_TYPE "memcpyopt"
67 "enable-memcpyopt-without-libcalls",
cl::Hidden,
68 cl::desc(
"Enable memcpyopt even when libcalls are disabled"));
70STATISTIC(NumMemCpyInstr,
"Number of memcpy instructions deleted");
71STATISTIC(NumMemSetInfer,
"Number of memsets inferred");
72STATISTIC(NumMoveToCpy,
"Number of memmoves converted to memcpy");
73STATISTIC(NumCpyToSet,
"Number of memcpys converted to memset");
74STATISTIC(NumCallSlot,
"Number of call slot optimizations performed");
75STATISTIC(NumStackMove,
"Number of stack-move optimizations performed");
104 bool isProfitableToUseMemset(
const DataLayout &
DL)
const;
109bool MemsetRange::isProfitableToUseMemset(
const DataLayout &
DL)
const {
111 if (TheStores.size() >= 4 ||
End-Start >= 16)
return true;
114 if (TheStores.size() < 2)
return false;
119 if (!isa<StoreInst>(SI))
124 if (TheStores.size() == 2)
return false;
137 unsigned MaxIntSize =
DL.getLargestLegalIntTypeSizeInBits() / 8;
140 unsigned NumPointerStores = Bytes / MaxIntSize;
143 unsigned NumByteStores = Bytes % MaxIntSize;
148 return TheStores.size() > NumPointerStores+NumByteStores;
168 bool empty()
const {
return Ranges.empty(); }
170 void addInst(int64_t OffsetFromFirst,
Instruction *Inst) {
171 if (
auto *SI = dyn_cast<StoreInst>(Inst))
172 addStore(OffsetFromFirst, SI);
174 addMemSet(OffsetFromFirst, cast<MemSetInst>(Inst));
177 void addStore(int64_t OffsetFromFirst,
StoreInst *SI) {
178 TypeSize StoreSize =
DL.getTypeStoreSize(
SI->getOperand(0)->getType());
181 SI->getPointerOperand(),
SI->getAlign(), SI);
184 void addMemSet(int64_t OffsetFromFirst,
MemSetInst *MSI) {
185 int64_t
Size = cast<ConstantInt>(MSI->
getLength())->getZExtValue();
198void MemsetRanges::addRange(int64_t Start, int64_t
Size,
Value *
Ptr,
203 Ranges, [=](
const MemsetRange &O) {
return O.End < Start; });
208 if (
I ==
Ranges.end() || End < I->Start) {
209 MemsetRange &
R = *
Ranges.insert(
I, MemsetRange());
213 R.Alignment = Alignment;
214 R.TheStores.push_back(Inst);
219 I->TheStores.push_back(Inst);
223 if (
I->Start <= Start &&
I->End >=
End)
232 if (Start < I->Start) {
235 I->Alignment = Alignment;
243 range_iterator NextI =
I;
244 while (++NextI !=
Ranges.end() &&
End >= NextI->Start) {
246 I->TheStores.append(NextI->TheStores.begin(), NextI->TheStores.end());
247 if (NextI->End >
I->End)
263 assert(Start->getParent() ==
End->getParent() &&
"Must be in same block");
265 if (Start->getFunction()->doesNotThrow())
270 bool RequiresNoCaptureBeforeUnwind;
272 RequiresNoCaptureBeforeUnwind) &&
273 !RequiresNoCaptureBeforeUnwind)
283 I->eraseFromParent();
294 assert(Start->getBlock() ==
End->getBlock() &&
"Only local supported");
297 Instruction *
I = cast<MemoryUseOrDef>(MA).getMemoryInst();
299 auto *II = dyn_cast<IntrinsicInst>(
I);
300 if (II && II->getIntrinsicID() == Intrinsic::lifetime_start &&
301 SkippedLifetimeStart && !*SkippedLifetimeStart) {
302 *SkippedLifetimeStart =
I;
317 if (isa<MemoryUse>(
End)) {
321 return Start->getBlock() !=
End->getBlock() ||
323 make_range(std::next(Start->getIterator()),
End->getIterator()),
325 if (isa<MemoryUse>(&Acc))
327 Instruction *AccInst =
328 cast<MemoryUseOrDef>(&Acc)->getMemoryInst();
329 return isModSet(AA.getModRefInfo(AccInst, Loc));
335 End->getDefiningAccess(), Loc, AA);
343 unsigned KnownIDs[] = {LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope,
344 LLVMContext::MD_noalias,
345 LLVMContext::MD_invariant_group,
346 LLVMContext::MD_access_group};
360 if (
auto *SI = dyn_cast<StoreInst>(StartInst))
361 if (
DL.getTypeStoreSize(
SI->getOperand(0)->getType()).isScalable())
376 for (++BI; !BI->isTerminator(); ++BI) {
377 auto *CurrentAcc = cast_or_null<MemoryUseOrDef>(
380 MemInsertPoint = CurrentAcc;
384 if (
auto *CB = dyn_cast<CallBase>(BI)) {
385 if (CB->onlyAccessesInaccessibleMemory())
389 if (!isa<StoreInst>(BI) && !isa<MemSetInst>(BI)) {
393 if (BI->mayWriteToMemory() || BI->mayReadFromMemory())
398 if (
auto *NextStore = dyn_cast<StoreInst>(BI)) {
400 if (!NextStore->isSimple())
break;
402 Value *StoredVal = NextStore->getValueOperand();
410 if (
DL.getTypeStoreSize(StoredVal->
getType()).isScalable())
415 if (isa<UndefValue>(ByteVal) && StoredByte)
416 ByteVal = StoredByte;
417 if (ByteVal != StoredByte)
421 std::optional<int64_t>
Offset =
422 NextStore->getPointerOperand()->getPointerOffsetFrom(StartPtr,
DL);
428 auto *MSI = cast<MemSetInst>(BI);
430 if (MSI->isVolatile() || ByteVal != MSI->getValue() ||
431 !isa<ConstantInt>(MSI->getLength()))
435 std::optional<int64_t>
Offset =
436 MSI->getDest()->getPointerOffsetFrom(StartPtr,
DL);
452 Ranges.addInst(0, StartInst);
462 for (
const MemsetRange &Range : Ranges) {
463 if (
Range.TheStores.size() == 1)
continue;
466 if (!
Range.isProfitableToUseMemset(
DL))
471 StartPtr =
Range.StartPtr;
473 AMemSet = Builder.CreateMemSet(StartPtr, ByteVal,
Range.End -
Range.Start,
480 dbgs() <<
"With: " << *AMemSet <<
'\n');
481 if (!
Range.TheStores.empty())
487 AMemSet,
nullptr, MemInsertPoint)
489 AMemSet,
nullptr, MemInsertPoint));
491 MemInsertPoint = NewDef;
495 eraseInstruction(SI);
516 auto AddArg = [&](
Value *Arg) {
517 auto *
I = dyn_cast<Instruction>(Arg);
518 if (
I &&
I->getParent() ==
SI->getParent()) {
520 if (
I ==
P)
return false;
525 if (!AddArg(
SI->getPointerOperand()))
539 for (
auto I = --
SI->getIterator(),
E =
P->getIterator();
I !=
E; --
I) {
549 bool NeedLift =
false;
571 else if (
const auto *Call = dyn_cast<CallBase>(
C)) {
577 }
else if (isa<LoadInst>(
C) || isa<StoreInst>(
C) || isa<VAArgInst>(
C)) {
583 MemLocs.push_back(
ML);
604 MemInsertPoint = cast<MemoryUseOrDef>(--MA->getIterator());
620 assert(MemInsertPoint &&
"Must have found insert point");
642 if (
T->isAggregateType() &&
644 (TLI->
has(LibFunc_memcpy) && TLI->
has(LibFunc_memmove)))) {
665 if (!moveUp(SI,
P, LI))
676 bool UseMemMove =
false;
681 Value *
Size = Builder.CreateTypeSize(Builder.getInt64Ty(),
682 DL.getTypeStoreSize(
T));
685 M = Builder.CreateMemMove(
686 SI->getPointerOperand(),
SI->getAlign(),
689 M = Builder.CreateMemCpy(
690 SI->getPointerOperand(),
SI->getAlign(),
692 M->copyMetadata(*SI, LLVMContext::MD_DIAssignID);
694 LLVM_DEBUG(
dbgs() <<
"Promoting " << *LI <<
" to " << *SI <<
" => "
700 MSSAU->
insertDef(cast<MemoryDef>(NewAccess),
true);
702 eraseInstruction(SI);
703 eraseInstruction(LI);
707 BBI =
M->getIterator();
716 auto GetCall = [&]() ->
CallInst * {
719 if (
auto *LoadClobber = dyn_cast<MemoryUseOrDef>(
721 return dyn_cast_or_null<CallInst>(LoadClobber->getMemoryInst());
725 bool Changed = performCallSlotOptzn(
726 LI, SI,
SI->getPointerOperand()->stripPointerCasts(),
728 DL.getTypeStoreSize(
SI->getOperand(0)->getType()),
729 std::min(
SI->getAlign(), LI->
getAlign()), BAA, GetCall);
731 eraseInstruction(SI);
732 eraseInstruction(LI);
740 if (
auto *DestAlloca = dyn_cast<AllocaInst>(
SI->getPointerOperand())) {
742 if (performStackMoveOptzn(LI, SI, DestAlloca, SrcAlloca,
743 DL.getTypeStoreSize(
T), BAA)) {
745 BBI =
SI->getNextNonDebugInstruction()->getIterator();
746 eraseInstruction(SI);
747 eraseInstruction(LI);
758 if (!
SI->isSimple())
return false;
766 if (
SI->getMetadata(LLVMContext::MD_nontemporal))
771 Value *StoredVal =
SI->getValueOperand();
779 if (
auto *LI = dyn_cast<LoadInst>(StoredVal))
780 return processStoreOfLoad(SI, LI,
DL, BBI);
795 auto *
V =
SI->getOperand(0);
797 if (
Instruction *
I = tryMergingIntoMemset(SI,
SI->getPointerOperand(),
799 BBI =
I->getIterator();
806 auto *
T =
V->getType();
807 if (
T->isAggregateType()) {
810 auto *
M = Builder.CreateMemSet(
SI->getPointerOperand(), ByteVal,
Size,
812 M->copyMetadata(*SI, LLVMContext::MD_DIAssignID);
814 LLVM_DEBUG(
dbgs() <<
"Promoting " << *SI <<
" to " << *M <<
"\n");
820 M,
nullptr, StoreDef);
821 MSSAU->
insertDef(cast<MemoryDef>(NewAccess),
false);
823 eraseInstruction(SI);
827 BBI =
M->getIterator();
841 BBI =
I->getIterator();
850bool MemCpyOptPass::performCallSlotOptzn(
Instruction *cpyLoad,
874 auto *srcAlloca = dyn_cast<AllocaInst>(cpySrc);
878 ConstantInt *srcArraySize = dyn_cast<ConstantInt>(srcAlloca->getArraySize());
883 TypeSize SrcAllocaSize =
DL.getTypeAllocSize(srcAlloca->getAllocatedType());
889 if (cpySize < srcSize)
898 if (
F->isIntrinsic() &&
F->getIntrinsicID() == Intrinsic::lifetime_start)
902 if (
C->getParent() != cpyStore->
getParent()) {
916 LLVM_DEBUG(
dbgs() <<
"Call Slot: Dest pointer modified after call\n");
923 if (SkippedLifetimeStart) {
925 dyn_cast<Instruction>(SkippedLifetimeStart->
getOperand(1));
926 if (LifetimeArg && LifetimeArg->getParent() ==
C->getParent() &&
927 C->comesBefore(LifetimeArg))
933 bool ExplicitlyDereferenceableOnly;
935 ExplicitlyDereferenceableOnly) ||
938 LLVM_DEBUG(
dbgs() <<
"Call Slot: Dest pointer not dereferenceable\n");
957 LLVM_DEBUG(
dbgs() <<
"Call Slot: Dest may be visible through unwinding\n");
962 Align srcAlign = srcAlloca->getAlign();
963 bool isDestSufficientlyAligned = srcAlign <= cpyDestAlign;
966 if (!isDestSufficientlyAligned && !isa<AllocaInst>(cpyDest)) {
967 LLVM_DEBUG(
dbgs() <<
"Call Slot: Dest not sufficiently aligned\n");
976 while (!srcUseList.empty()) {
977 User *
U = srcUseList.pop_back_val();
979 if (isa<BitCastInst>(U) || isa<AddrSpaceCastInst>(U)) {
983 if (
const auto *
G = dyn_cast<GetElementPtrInst>(U)) {
984 if (!
G->hasAllZeroIndices())
990 if (
const auto *
IT = dyn_cast<IntrinsicInst>(U))
991 if (
IT->isLifetimeStartOrEnd())
994 if (U !=
C && U != cpyLoad)
1000 bool SrcIsCaptured =
any_of(
C->args(), [&](
Use &U) {
1001 return U->stripPointerCasts() == cpySrc &&
1002 !C->doesNotCapture(C->getArgOperandNo(&U));
1008 if (SrcIsCaptured) {
1023 make_range(++
C->getIterator(),
C->getParent()->end())) {
1025 if (
auto *II = dyn_cast<IntrinsicInst>(&
I)) {
1026 if (II->getIntrinsicID() == Intrinsic::lifetime_end &&
1027 II->getArgOperand(1)->stripPointerCasts() == srcAlloca &&
1028 cast<ConstantInt>(II->getArgOperand(0))->uge(srcSize))
1033 if (isa<ReturnInst>(&
I))
1051 bool NeedMoveGEP =
false;
1054 auto *
GEP = dyn_cast<GetElementPtrInst>(cpyDest);
1055 if (
GEP &&
GEP->hasAllConstantIndices() &&
1078 for (
unsigned ArgI = 0; ArgI <
C->arg_size(); ++ArgI)
1079 if (
C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc &&
1080 cpySrc->
getType() !=
C->getArgOperand(ArgI)->getType())
1084 bool changedArgument =
false;
1085 for (
unsigned ArgI = 0; ArgI <
C->arg_size(); ++ArgI)
1086 if (
C->getArgOperand(ArgI)->stripPointerCasts() == cpySrc) {
1087 changedArgument =
true;
1088 C->setArgOperand(ArgI, cpyDest);
1091 if (!changedArgument)
1095 if (!isDestSufficientlyAligned) {
1096 assert(isa<AllocaInst>(cpyDest) &&
"Can only increase alloca alignment!");
1097 cast<AllocaInst>(cpyDest)->setAlignment(srcAlign);
1101 auto *
GEP = dyn_cast<GetElementPtrInst>(cpyDest);
1105 if (SkippedLifetimeStart) {
1112 if (cpyLoad != cpyStore)
1121bool MemCpyOptPass::processMemCpyMemCpyDependence(
MemCpyInst *M,
1140 auto *MDepLen = dyn_cast<ConstantInt>(MDep->
getLength());
1141 auto *MLen = dyn_cast<ConstantInt>(
M->getLength());
1142 if (!MDepLen || !MLen || MDepLen->getZExtValue() < MLen->getZExtValue())
1166 bool UseMemMove =
false;
1171 if (isa<MemCpyInlineInst>(M))
1177 LLVM_DEBUG(
dbgs() <<
"MemCpyOptPass: Forwarding memcpy->memcpy src:\n"
1178 << *MDep <<
'\n' << *M <<
'\n');
1185 NewM = Builder.CreateMemMove(
M->getRawDest(),
M->getDestAlign(),
1187 M->getLength(),
M->isVolatile());
1188 else if (isa<MemCpyInlineInst>(M)) {
1192 NewM = Builder.CreateMemCpyInline(
1196 NewM = Builder.CreateMemCpy(
M->getRawDest(),
M->getDestAlign(),
1198 M->getLength(),
M->isVolatile());
1204 MSSAU->
insertDef(cast<MemoryDef>(NewAccess),
true);
1207 eraseInstruction(M);
1231bool MemCpyOptPass::processMemSetMemCpyDependence(
MemCpyInst *MemCpy,
1261 if (DestSize == SrcSize) {
1262 eraseInstruction(MemSet);
1273 if (
auto *SrcSizeC = dyn_cast<ConstantInt>(SrcSize))
1284 "Preserving debug location based on moving memset within BB.");
1285 Builder.SetCurrentDebugLocation(MemSet->
getDebugLoc());
1291 SrcSize = Builder.CreateZExt(SrcSize, DestSize->
getType());
1293 DestSize = Builder.CreateZExt(DestSize, SrcSize->
getType());
1296 Value *Ule = Builder.CreateICmpULE(DestSize, SrcSize);
1297 Value *SizeDiff = Builder.CreateSub(DestSize, SrcSize);
1298 Value *MemsetLen = Builder.CreateSelect(
1301 Builder.CreateMemSet(Builder.CreatePtrAdd(Dest, SrcSize),
1302 MemSet->
getOperand(1), MemsetLen, Alignment);
1305 "MemCpy must be a MemoryDef");
1311 NewMemSet,
nullptr, LastDef);
1312 MSSAU->
insertDef(cast<MemoryDef>(NewAccess),
true);
1314 eraseInstruction(MemSet);
1325 if (
auto *II = dyn_cast_or_null<IntrinsicInst>(Def->getMemoryInst())) {
1326 if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
1327 auto *LTSize = cast<ConstantInt>(II->getArgOperand(0));
1329 if (
auto *CSize = dyn_cast<ConstantInt>(
Size)) {
1331 LTSize->getZExtValue() >= CSize->getZExtValue())
1341 const DataLayout &
DL = Alloca->getModule()->getDataLayout();
1342 if (std::optional<TypeSize> AllocaSize =
1343 Alloca->getAllocationSize(
DL))
1344 if (*AllocaSize == LTSize->getValue())
1366bool MemCpyOptPass::performMemCpyToMemSetOptzn(
MemCpyInst *MemCpy,
1377 if (MemSetSize != CopySize) {
1382 auto *CMemSetSize = dyn_cast<ConstantInt>(MemSetSize);
1387 auto *CCopySize = dyn_cast<ConstantInt>(CopySize);
1390 if (CCopySize->getZExtValue() > CMemSetSize->getZExtValue()) {
1396 bool CanReduceSize =
false;
1400 if (
auto *MD = dyn_cast<MemoryDef>(Clobber))
1402 CanReduceSize =
true;
1406 CopySize = MemSetSize;
1417 MSSAU->
insertDef(cast<MemoryDef>(NewAccess),
true);
1450 if (!SrcSize ||
Size != *SrcSize) {
1451 LLVM_DEBUG(
dbgs() <<
"Stack Move: Source alloca size mismatch\n");
1455 if (!DestSize ||
Size != *DestSize) {
1456 LLVM_DEBUG(
dbgs() <<
"Stack Move: Destination alloca size mismatch\n");
1470 bool SrcNotDom =
false;
1474 bool CanBeNull, CanBeFreed;
1475 return V->getPointerDereferenceableBytes(
DL, CanBeNull, CanBeFreed);
1478 auto CaptureTrackingWithModRef =
1484 Worklist.
reserve(MaxUsesToExplore);
1486 while (!Worklist.
empty()) {
1489 for (
const Use &U :
I->uses()) {
1490 auto *UI = cast<Instruction>(
U.getUser());
1496 if (Visited.
size() >= MaxUsesToExplore) {
1499 <<
"Stack Move: Exceeded max uses to see ModRef, bailing\n");
1502 if (!Visited.
insert(&U).second)
1512 if (UI->isLifetimeStartOrEnd()) {
1518 int64_t
Size = cast<ConstantInt>(UI->getOperand(0))->getSExtValue();
1519 if (
Size < 0 ||
Size == DestSize) {
1524 if (UI->hasMetadata(LLVMContext::MD_noalias))
1525 NoAliasInstrs.
insert(UI);
1526 if (!ModRefCallback(UI))
1541 auto DestModRefCallback = [&](
Instruction *UI) ->
bool {
1551 if (UI->getParent() ==
Store->getParent()) {
1560 if (UI->comesBefore(Store))
1570 ReachabilityWorklist.
push_back(UI->getParent());
1576 if (!CaptureTrackingWithModRef(DestAlloca, DestModRefCallback))
1579 if (!ReachabilityWorklist.
empty() &&
1581 nullptr, DT,
nullptr))
1589 auto SrcModRefCallback = [&](
Instruction *UI) ->
bool {
1602 if (!CaptureTrackingWithModRef(SrcAlloca, SrcModRefCallback))
1616 eraseInstruction(DestAlloca);
1624 if (!LifetimeMarkers.
empty()) {
1626 eraseInstruction(
I);
1634 I->setMetadata(LLVMContext::MD_noalias,
nullptr);
1636 LLVM_DEBUG(
dbgs() <<
"Stack Move: Performed staack-move optimization\n");
1642 if (
auto *
I = dyn_cast<Instruction>(
Size))
1646 if (
auto *
C = dyn_cast<Constant>(
Size))
1647 return isa<UndefValue>(
C) ||
C->isNullValue();
1658 if (
M->isVolatile())
return false;
1661 if (
M->getSource() ==
M->getDest()) {
1663 eraseInstruction(M);
1671 eraseInstruction(M);
1681 if (
auto *GV = dyn_cast<GlobalVariable>(
M->getSource()))
1682 if (GV->isConstant() && GV->hasDefinitiveInitializer())
1684 M->getModule()->getDataLayout())) {
1687 M->getRawDest(), ByteVal,
M->getLength(),
M->getDestAlign(),
false);
1688 auto *LastDef = cast<MemoryDef>(MA);
1691 MSSAU->
insertDef(cast<MemoryDef>(NewAccess),
true);
1693 eraseInstruction(M);
1709 if (
auto *MD = dyn_cast<MemoryDef>(DestClobber))
1710 if (
auto *MDep = dyn_cast_or_null<MemSetInst>(MD->getMemoryInst()))
1711 if (DestClobber->
getBlock() ==
M->getParent())
1712 if (processMemSetMemCpyDependence(M, MDep, BAA))
1726 if (
auto *MD = dyn_cast<MemoryDef>(SrcClobber)) {
1728 if (
auto *CopySize = dyn_cast<ConstantInt>(
M->getLength())) {
1729 if (
auto *
C = dyn_cast<CallInst>(
MI)) {
1730 if (performCallSlotOptzn(M, M,
M->getDest(),
M->getSource(),
1732 M->getDestAlign().valueOrOne(), BAA,
1735 <<
" call: " << *
C <<
"\n"
1736 <<
" memcpy: " << *M <<
"\n");
1737 eraseInstruction(M);
1743 if (
auto *MDep = dyn_cast<MemCpyInst>(
MI))
1744 if (processMemCpyMemCpyDependence(M, MDep, BAA))
1746 if (
auto *MDep = dyn_cast<MemSetInst>(
MI)) {
1747 if (performMemCpyToMemSetOptzn(M, MDep, BAA)) {
1749 eraseInstruction(M);
1758 eraseInstruction(M);
1767 auto *DestAlloca = dyn_cast<AllocaInst>(
M->getDest());
1770 auto *SrcAlloca = dyn_cast<AllocaInst>(
M->getSource());
1776 if (performStackMoveOptzn(M, M, DestAlloca, SrcAlloca,
1779 BBI =
M->getNextNonDebugInstruction()->getIterator();
1780 eraseInstruction(M);
1790bool MemCpyOptPass::processMemMove(
MemMoveInst *M) {
1795 LLVM_DEBUG(
dbgs() <<
"MemCpyOptPass: Optimizing memmove -> memcpy: " << *M
1799 Type *ArgTys[3] = {
M->getRawDest()->getType(),
1800 M->getRawSource()->getType(),
1801 M->getLength()->getType() };
1803 Intrinsic::memcpy, ArgTys));
1813bool MemCpyOptPass::processByValArgument(
CallBase &CB,
unsigned ArgNo) {
1818 TypeSize ByValSize =
DL.getTypeAllocSize(ByValTy);
1827 if (
auto *MD = dyn_cast<MemoryDef>(Clobber))
1828 MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst());
1838 auto *C1 = dyn_cast<ConstantInt>(MDep->
getLength());
1846 if (!ByValAlign)
return false;
1851 if ((!MemDepAlign || *MemDepAlign < *ByValAlign) &&
1870 LLVM_DEBUG(
dbgs() <<
"MemCpyOptPass: Forwarding memcpy to byval:\n"
1871 <<
" " << *MDep <<
"\n"
1872 <<
" " << CB <<
"\n");
1895bool MemCpyOptPass::processImmutArgument(
CallBase &CB,
unsigned ArgNo) {
1910 std::optional<TypeSize> AllocaSize = AI->getAllocationSize(
DL);
1913 if (!AllocaSize || AllocaSize->isScalable())
1924 if (
auto *MD = dyn_cast<MemoryDef>(Clobber))
1925 MDep = dyn_cast_or_null<MemCpyInst>(MD->getMemoryInst());
1937 auto *MDepLen = dyn_cast<ConstantInt>(MDep->
getLength());
1938 if (!MDepLen || AllocaSize != MDepLen->getValue())
1945 Align AllocaAlign = AI->getAlign();
1946 if (MemDepAlign < AllocaAlign &&
1965 LLVM_DEBUG(
dbgs() <<
"MemCpyOptPass: Forwarding memcpy to Immut src:\n"
1966 <<
" " << *MDep <<
"\n"
1967 <<
" " << CB <<
"\n");
1977bool MemCpyOptPass::iterateOnFunction(
Function &
F) {
1978 bool MadeChange =
false;
1993 bool RepeatInstruction =
false;
1995 if (
auto *SI = dyn_cast<StoreInst>(
I))
1996 MadeChange |= processStore(SI, BI);
1997 else if (
auto *M = dyn_cast<MemSetInst>(
I))
1998 RepeatInstruction = processMemSet(M, BI);
1999 else if (
auto *M = dyn_cast<MemCpyInst>(
I))
2000 RepeatInstruction = processMemCpy(M, BI);
2001 else if (
auto *M = dyn_cast<MemMoveInst>(
I))
2002 RepeatInstruction = processMemMove(M);
2003 else if (
auto *CB = dyn_cast<CallBase>(
I)) {
2004 for (
unsigned i = 0, e = CB->
arg_size(); i != e; ++i) {
2006 MadeChange |= processByValArgument(*CB, i);
2008 MadeChange |= processImmutArgument(*CB, i);
2013 if (RepeatInstruction) {
2014 if (BI != BB.
begin())
2032 bool MadeChange =
runImpl(
F, &TLI, AA, AC, DT, PDT, &MSSA->getMSSA());
2046 bool MadeChange =
false;
2057 if (!iterateOnFunction(
F))
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< ITMode > IT(cl::desc("IT block support"), cl::Hidden, cl::init(DefaultIT), cl::values(clEnumValN(DefaultIT, "arm-default-it", "Generate any type of IT block"), clEnumValN(RestrictedIT, "arm-restrict-it", "Disallow complex IT blocks")))
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseSet and SmallDenseSet classes.
This is the interface for a simple mod/ref and alias analysis over globals.
static bool mayBeVisibleThroughUnwinding(Value *V, Instruction *Start, Instruction *End)
static bool isZeroSize(Value *Size)
static void combineAAMetadata(Instruction *ReplInst, Instruction *I)
static bool accessedBetween(BatchAAResults &AA, MemoryLocation Loc, const MemoryUseOrDef *Start, const MemoryUseOrDef *End, Instruction **SkippedLifetimeStart=nullptr)
static bool hasUndefContents(MemorySSA *MSSA, BatchAAResults &AA, Value *V, MemoryDef *Def, Value *Size)
Determine whether the instruction has undefined content for the given Size, either because it was fre...
static cl::opt< bool > EnableMemCpyOptWithoutLibcalls("enable-memcpyopt-without-libcalls", cl::Hidden, cl::desc("Enable memcpyopt even when libcalls are disabled"))
static bool writtenBetween(MemorySSA *MSSA, BatchAAResults &AA, MemoryLocation Loc, const MemoryUseOrDef *Start, const MemoryUseOrDef *End)
This file provides utility analysis objects describing memory locations.
This file exposes an interface to building/using memory SSA to walk memory instructions using a use/d...
Module.h This file contains the declarations for the Module class.
This header defines various interfaces for pass management in LLVM.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
A manager for alias analyses.
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Check whether or not an instruction may read or write the optionally specified memory location.
Class for arbitrary precision integers.
an instruction to allocate memory on the stack
bool isStaticAlloca() const
Return true if this alloca is in the entry block of the function and is a constant size.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
unsigned getAddressSpace() const
Return the address space for the allocation.
std::optional< TypeSize > getAllocationSize(const DataLayout &DL) const
Get allocation size in bytes.
void setAlignment(Align Align)
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
bool isEntryBlock() const
Return true if this is the entry block of the containing function.
const Function * getParent() const
Return the enclosing method, or null if none.
InstListType::iterator iterator
Instruction iterators...
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
bool isMustAlias(const MemoryLocation &LocA, const MemoryLocation &LocB)
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
ModRefInfo callCapturesBefore(const Instruction *I, const MemoryLocation &MemLoc, DominatorTree *DT)
Represents analyses that only rely on functions' control flow.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
bool isByValArgument(unsigned ArgNo) const
Determine whether this argument is passed by value.
MaybeAlign getParamAlign(unsigned ArgNo) const
Extract the alignment for a call or parameter (0=unknown).
bool onlyReadsMemory(unsigned OpNo) const
Type * getParamByValType(unsigned ArgNo) const
Extract the byval type for a call or parameter.
Value * getArgOperand(unsigned i) const
void setArgOperand(unsigned i, Value *v)
unsigned arg_size() const
Function * getCaller()
Helper to get the caller (the parent function).
This class represents a function call, abstracting a target machine's calling convention.
This is the shared class of boolean and integer constants.
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Implements a dense probed hash-table based set.
Analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Module * getParent()
Get the module that this global value is contained inside of...
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
void mergeDIAssignID(ArrayRef< const Instruction * > SourceInstructions)
Merge the DIAssignID metadata from this instruction and those attached to instructions in SourceInstr...
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
const BasicBlock * getParent() const
void setDebugLoc(DebugLoc Loc)
Set the debug location information for this instruction.
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
void dropUnknownNonDebugMetadata(ArrayRef< unsigned > KnownIDs)
Drop all unknown metadata except for debug locations.
An instruction for reading from memory.
Value * getPointerOperand()
Align getAlign() const
Return the alignment of the access that is being performed.
static LocationSize precise(uint64_t Value)
This class wraps the llvm.memcpy intrinsic.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
bool runImpl(Function &F, TargetLibraryInfo *TLI, AAResults *AA, AssumptionCache *AC, DominatorTree *DT, PostDominatorTree *PDT, MemorySSA *MSSA)
Value * getLength() const
Value * getRawDest() const
Value * getDest() const
This is just like getRawDest, but it strips off any cast instructions (including addrspacecast) that ...
MaybeAlign getDestAlign() const
This class wraps the llvm.memmove intrinsic.
This class wraps the llvm.memset and llvm.memset.inline intrinsics.
Value * getRawSource() const
Return the arguments to the instruction.
MaybeAlign getSourceAlign() const
Value * getSource() const
This is just like getRawSource, but it strips off any cast instructions that feed it,...
BasicBlock * getBlock() const
Represents a read-write access to memory, whether it is a must-alias, or a may-alias.
Representation for a specific memory location.
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
static MemoryLocation getForSource(const MemTransferInst *MTI)
Return a location representing the source of a memory transfer.
static MemoryLocation getForDest(const MemIntrinsic *MI)
Return a location representing the destination of a memory set or transfer.
An analysis that produces MemorySSA for a function.
MemorySSA * getMemorySSA() const
Get handle on MemorySSA.
MemoryUseOrDef * createMemoryAccessBefore(Instruction *I, MemoryAccess *Definition, MemoryUseOrDef *InsertPt)
Create a MemoryAccess in MemorySSA before an existing MemoryAccess.
void insertDef(MemoryDef *Def, bool RenameUses=false)
Insert a definition into the MemorySSA IR.
void moveAfter(MemoryUseOrDef *What, MemoryUseOrDef *Where)
void removeMemoryAccess(MemoryAccess *, bool OptimizePhis=false)
Remove a MemoryAccess from MemorySSA, including updating all definitions and uses.
MemoryUseOrDef * createMemoryAccessAfter(Instruction *I, MemoryAccess *Definition, MemoryAccess *InsertPt)
Create a MemoryAccess in MemorySSA after an existing MemoryAccess.
void moveBefore(MemoryUseOrDef *What, MemoryUseOrDef *Where)
MemoryAccess * getClobberingMemoryAccess(const Instruction *I, BatchAAResults &AA)
Given a memory Mod/Ref/ModRef'ing instruction, calling this will give you the nearest dominating Memo...
Encapsulates MemorySSA, including all data associated with memory accesses.
bool dominates(const MemoryAccess *A, const MemoryAccess *B) const
Given two memory accesses in potentially different blocks, determine whether MemoryAccess A dominates...
void verifyMemorySSA(VerificationLevel=VerificationLevel::Fast) const
Verify that MemorySSA is self consistent (IE definitions dominate all uses, uses appear in the right ...
MemorySSAWalker * getWalker()
MemoryUseOrDef * getMemoryAccess(const Instruction *I) const
Given a memory Mod/Ref'ing instruction, get the MemorySSA access associated with it.
bool isLiveOnEntryDef(const MemoryAccess *MA) const
Return true if MA represents the live on entry value.
Class that has the common methods + fields of memory uses/defs.
MemoryAccess * getDefiningAccess() const
Get the access that produces the memory state used by this Use.
Instruction * getMemoryInst() const
Get the instruction that this MemoryUse represents.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Analysis pass which computes a PostDominatorTree.
PostDominatorTree Class - Concrete subclass of DominatorTree that is used to compute the post-dominat...
bool dominates(const Instruction *I1, const Instruction *I2) const
Return true if I1 dominates I2.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void preserveSet()
Mark an analysis set as preserved.
void preserve()
Mark an analysis as preserved.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
void reserve(size_type N)
typename SuperClass::const_iterator const_iterator
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
typename SuperClass::iterator iterator
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool has(LibFunc F) const
Tests whether a library function is available.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
The instances of the Type class are immutable: once they are created, they are never changed.
unsigned getIntegerBitWidth() const
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
A Use represents the edge between a Value definition and its users.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
bool hasOneUse() const
Return true if there is exactly one use of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
constexpr ScalarTy getFixedValue() const
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
static constexpr bool isKnownGE(const FixedOrScalableQuantity &LHS, const FixedOrScalableQuantity &RHS)
An efficient, type-erasing, non-owning reference to a callable.
reverse_self_iterator getReverseIterator()
self_iterator getIterator()
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
@ C
The default llvm calling convention, compatible with C.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
const_iterator end(StringRef path)
Get end iterator over path.
This is an optimization pass for GlobalISel generic memory operations.
Interval::succ_iterator succ_end(Interval *I)
bool isPotentiallyReachableFromMany(SmallVectorImpl< BasicBlock * > &Worklist, const BasicBlock *StopBB, const SmallPtrSetImpl< BasicBlock * > *ExclusionSet, const DominatorTree *DT=nullptr, const LoopInfo *LI=nullptr)
Determine whether there is at least one path from a block in 'Worklist' to 'StopBB' without passing t...
bool isDereferenceableAndAlignedPointer(const Value *V, Type *Ty, Align Alignment, const DataLayout &DL, const Instruction *CtxI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr, const TargetLibraryInfo *TLI=nullptr)
Returns true if V is always a dereferenceable pointer with alignment greater or equal than requested.
UseCaptureKind DetermineUseCaptureKind(const Use &U, llvm::function_ref< bool(Value *, const DataLayout &)> IsDereferenceableOrNull)
Determine what kind of capture behaviour U may exhibit.
auto partition_point(R &&Range, Predicate P)
Binary search for the first iterator in a range where a predicate is false.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
bool PointerMayBeCapturedBefore(const Value *V, bool ReturnCaptures, bool StoreCaptures, const Instruction *I, const DominatorTree *DT, bool IncludeI=false, unsigned MaxUsesToExplore=0, const LoopInfo *LI=nullptr)
PointerMayBeCapturedBefore - Return true if this pointer value may be captured by the enclosing funct...
Interval::succ_iterator succ_begin(Interval *I)
succ_begin/succ_end - define methods so that Intervals may be used just like BasicBlocks can with the...
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value,...
unsigned getDefaultMaxUsesToExploreForCaptureTracking()
getDefaultMaxUsesToExploreForCaptureTracking - Return default value of the maximal number of uses to ...
Value * simplifyInstruction(Instruction *I, const SimplifyQuery &Q)
See if we can compute a simplified version of this instruction.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
auto reverse(ContainerTy &&C)
Align getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
bool isModSet(const ModRefInfo MRI)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool isModOrRefSet(const ModRefInfo MRI)
bool isNotVisibleOnUnwind(const Value *Object, bool &RequiresNoCaptureBeforeUnwind)
Return true if Object memory is not visible after an unwind, in the sense that program semantics cann...
void combineMetadata(Instruction *K, const Instruction *J, ArrayRef< unsigned > KnownIDs, bool DoesKMove)
Combine the metadata of two instructions so that K can replace J.
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
@ NoModRef
The access neither references nor modifies the value stored in memory.
bool VerifyMemorySSA
Enables verification of MemorySSA.
bool isIdentifiedFunctionLocal(const Value *V)
Return true if V is umabigously identified at the function-level.
bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Value * isBytewiseValue(Value *V, const DataLayout &DL)
If the specified value can be set by repeating the same byte in memory, return the i8 value that it i...
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
bool isRefSet(const ModRefInfo MRI)
bool isWritableObject(const Value *Object, bool &ExplicitlyDereferenceableOnly)
Return true if the Object is writable, in the sense that any location based on this pointer that can ...
This struct is a compact representation of a valid (non-zero power of two) alignment.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Align valueOrOne() const
For convenience, returns a valid alignment or 1 if undefined.