72#define DEBUG_TYPE "loop-accesses"
76 cl::desc(
"Sets the SIMD width. Zero is autoselect."),
82 cl::desc(
"Sets the vectorization interleave count. "
83 "Zero is autoselect."),
90 cl::desc(
"When performing memory disambiguation checks at runtime do not "
91 "generate more than this number of comparisons (default = 8)."),
98 cl::desc(
"Maximum number of comparisons done when trying to merge "
99 "runtime memory checks. (default = 100)"),
108 cl::desc(
"Maximum number of dependences collected by "
109 "loop-access analysis (default = 100)"),
125 cl::desc(
"Enable symbolic stride memory access versioning"));
130 "store-to-load-forwarding-conflict-detection",
cl::Hidden,
131 cl::desc(
"Enable conflict detection in loop-access analysis"),
136 cl::desc(
"Maximum recursion depth when finding forked SCEVs (default = 5)"),
140 return ::VectorizationInterleave.getNumOccurrences() > 0;
144 if (
auto *CI = dyn_cast<CastInst>(V))
145 if (CI->getOperand(0)->getType()->isIntegerTy())
146 return CI->getOperand(0);
158 if (
SI == PtrToStride.
end())
165 const auto *U = cast<SCEVUnknown>(SE->
getSCEV(StrideVal));
173 <<
" by: " << *Expr <<
"\n");
182 ->getPointerAddressSpace()),
183 NeedsFreeze(RtCheck.Pointers[
Index].NeedsFreeze) {
201 Type *AccessTy,
bool WritePtr,
202 unsigned DepSetId,
unsigned ASId,
211 ScStart = ScEnd = PtrExpr;
214 assert(AR &&
"Invalid addrec expression");
223 if (
const auto *CStep = dyn_cast<SCEVConstant>(Step)) {
224 if (CStep->getValue()->isNegative())
236 Type *IdxTy =
DL.getIndexType(
Ptr->getType());
240 Pointers.emplace_back(
Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, PtrExpr,
244void RuntimePointerChecking::tryToCreateDiffCheck(
246 if (!CanUseDiffCheck)
253 CanUseDiffCheck =
false;
264 CanUseDiffCheck =
false;
274 if (AccSrc.
size() != 1 || AccSink.
size() != 1) {
275 CanUseDiffCheck =
false;
279 if (AccSink[0] < AccSrc[0])
282 auto *SrcAR = dyn_cast<SCEVAddRecExpr>(Src->Expr);
283 auto *SinkAR = dyn_cast<SCEVAddRecExpr>(
Sink->Expr);
286 CanUseDiffCheck =
false;
296 if (isa<ScalableVectorType>(SrcTy) || isa<ScalableVectorType>(DstTy)) {
297 CanUseDiffCheck =
false;
301 SinkAR->getLoop()->getHeader()->getModule()->getDataLayout();
303 std::max(
DL.getTypeAllocSize(SrcTy),
DL.getTypeAllocSize(DstTy));
308 auto *Step = dyn_cast<SCEVConstant>(SinkAR->getStepRecurrence(*SE));
309 if (!Step || Step != SrcAR->getStepRecurrence(*SE) ||
310 Step->getAPInt().abs() != AllocSize) {
311 CanUseDiffCheck =
false;
320 if (Step->getValue()->isNegative())
325 if (isa<SCEVCouldNotCompute>(SinkStartInt) ||
326 isa<SCEVCouldNotCompute>(SrcStartInt)) {
327 CanUseDiffCheck =
false;
330 DiffChecks.emplace_back(SrcStartInt, SinkStartInt, AllocSize,
331 Src->NeedsFreeze ||
Sink->NeedsFreeze);
343 tryToCreateDiffCheck(CGI, CGJ);
344 Checks.
push_back(std::make_pair(&CGI, &CGJ));
351void RuntimePointerChecking::generateChecks(
354 groupChecks(DepCands, UseDependencies);
360 for (
unsigned I = 0, EI = M.Members.size(); EI !=
I; ++
I)
361 for (
unsigned J = 0, EJ =
N.Members.size(); EJ != J; ++J)
376 if (
C->getValue()->isNegative())
385 RtCheck.
Pointers[
Index].PointerValue->getType()->getPointerAddressSpace(),
390 const SCEV *End,
unsigned AS,
394 "all pointers in a checking group must be in the same address space");
420void RuntimePointerChecking::groupChecks(
466 if (!UseDependencies) {
472 unsigned TotalComparisons = 0;
477 Iter.first->second.push_back(
Index);
506 auto PointerI = PositionMap.
find(
MI->getPointer());
508 "pointer in equivalence class not found in PositionMap");
509 for (
unsigned Pointer : PointerI->second) {
526 if (Group.addPointer(Pointer, *
this)) {
549 return (PtrToPartition[PtrIdx1] != -1 &&
550 PtrToPartition[PtrIdx1] == PtrToPartition[PtrIdx2]);
574 unsigned Depth)
const {
576 for (
const auto &
Check : Checks) {
577 const auto &First =
Check.first->Members, &Second =
Check.second->Members;
582 for (
unsigned K = 0; K < First.size(); ++K)
586 for (
unsigned K = 0; K < Second.size(); ++K)
601 OS.
indent(
Depth + 4) <<
"(Low: " << *CG.Low <<
" High: " << *CG.High
603 for (
unsigned J = 0; J < CG.Members.size(); ++J) {
616class AccessAnalysis {
625 : TheLoop(TheLoop), BAA(*AA), AST(BAA), LI(LI), DepCands(DA), PSE(PSE) {
627 BAA.enableCrossIterationMode();
634 Accesses[MemAccessInfo(
Ptr,
false)].insert(AccessTy);
636 ReadOnlyPtr.insert(
Ptr);
643 Accesses[MemAccessInfo(
Ptr,
true)].insert(AccessTy);
654 MemAccessInfo Access,
Type *AccessTy,
657 Loop *TheLoop,
unsigned &RunningDepId,
658 unsigned ASId,
bool ShouldCheckStride,
bool Assume);
667 Value *&UncomputablePtr,
bool ShouldCheckWrap =
false);
671 void buildDependenceSets() {
672 processMemAccesses();
680 bool isDependencyCheckNeeded() {
return !CheckDeps.empty(); }
688 MemAccessInfoList &getDependenciesToCheck() {
return CheckDeps; }
695 void processMemAccesses();
699 PtrAccessMap Accesses;
705 MemAccessInfoList CheckDeps;
731 bool IsRTCheckAnalysisNeeded =
false;
743 const SCEV *PtrScev,
Loop *L,
bool Assume) {
767 int64_t Stride =
getPtrStride(PSE, AccessTy,
Ptr, L, Strides).value_or(0);
780 while (!WorkList.
empty()) {
784 auto *PN = dyn_cast<PHINode>(
Ptr);
788 if (PN && InnermostLoop.
contains(PN->getParent()) &&
789 PN->getParent() != InnermostLoop.
getHeader()) {
790 for (
const Use &Inc : PN->incoming_values())
823 if (isa<SCEVAddRecExpr>(Scev) || L->isLoopInvariant(
Ptr) ||
824 !isa<Instruction>(
Ptr) ||
Depth == 0) {
835 auto GetBinOpExpr = [&SE](
unsigned Opcode,
const SCEV *L,
const SCEV *R) {
837 case Instruction::Add:
839 case Instruction::Sub:
847 unsigned Opcode =
I->getOpcode();
849 case Instruction::GetElementPtr: {
851 Type *SourceTy =
GEP->getSourceElementType();
854 if (
I->getNumOperands() != 2 || SourceTy->
isVectorTy()) {
864 bool NeedsFreeze =
any_of(BaseScevs, UndefPoisonCheck) ||
865 any_of(OffsetScevs, UndefPoisonCheck);
870 if (OffsetScevs.
size() == 2 && BaseScevs.
size() == 1)
872 else if (BaseScevs.
size() == 2 && OffsetScevs.
size() == 1)
875 ScevList.emplace_back(Scev, NeedsFreeze);
893 ScevList.emplace_back(SE->
getAddExpr(get<0>(BaseScevs[0]), Scaled1),
895 ScevList.emplace_back(SE->
getAddExpr(get<0>(BaseScevs[1]), Scaled2),
899 case Instruction::Select: {
906 if (ChildScevs.
size() == 2) {
907 ScevList.push_back(ChildScevs[0]);
908 ScevList.push_back(ChildScevs[1]);
913 case Instruction::Add:
914 case Instruction::Sub: {
922 any_of(LScevs, UndefPoisonCheck) ||
any_of(RScevs, UndefPoisonCheck);
927 if (LScevs.
size() == 2 && RScevs.
size() == 1)
929 else if (RScevs.
size() == 2 && LScevs.
size() == 1)
932 ScevList.emplace_back(Scev, NeedsFreeze);
936 ScevList.emplace_back(
937 GetBinOpExpr(Opcode, get<0>(LScevs[0]), get<0>(RScevs[0])),
939 ScevList.emplace_back(
940 GetBinOpExpr(Opcode, get<0>(LScevs[1]), get<0>(RScevs[1])),
946 LLVM_DEBUG(
dbgs() <<
"ForkedPtr unhandled instruction: " << *
I <<
"\n");
963 if (Scevs.
size() == 2 &&
964 (isa<SCEVAddRecExpr>(get<0>(Scevs[0])) ||
966 (isa<SCEVAddRecExpr>(get<0>(Scevs[1])) ||
978 MemAccessInfo Access,
Type *AccessTy,
981 Loop *TheLoop,
unsigned &RunningDepId,
982 unsigned ASId,
bool ShouldCheckWrap,
989 for (
auto &
P : TranslatedPtrs) {
990 const SCEV *PtrExpr = get<0>(
P);
996 if (ShouldCheckWrap) {
998 if (TranslatedPtrs.size() > 1)
1001 if (!
isNoWrap(PSE, StridesMap,
Ptr, AccessTy, TheLoop)) {
1003 if (!Assume || !isa<SCEVAddRecExpr>(Expr))
1010 if (TranslatedPtrs.size() == 1)
1015 for (
auto [PtrExpr, NeedsFreeze] : TranslatedPtrs) {
1019 if (isDependencyCheckNeeded()) {
1021 unsigned &LeaderId = DepSetId[Leader];
1023 LeaderId = RunningDepId++;
1027 DepId = RunningDepId++;
1029 bool IsWrite = Access.getInt();
1030 RtCheck.
insert(TheLoop,
Ptr, PtrExpr, AccessTy, IsWrite, DepId, ASId, PSE,
1041 Value *&UncomputablePtr,
bool ShouldCheckWrap) {
1044 bool CanDoRT =
true;
1046 bool MayNeedRTCheck =
false;
1047 if (!IsRTCheckAnalysisNeeded)
return true;
1049 bool IsDepCheckNeeded = isDependencyCheckNeeded();
1054 for (
auto &AS : AST) {
1055 int NumReadPtrChecks = 0;
1056 int NumWritePtrChecks = 0;
1057 bool CanDoAliasSetRT =
true;
1062 unsigned RunningDepId = 1;
1070 for (
const auto &
A : AS) {
1072 bool IsWrite = Accesses.count(MemAccessInfo(
Ptr,
true));
1075 ++NumWritePtrChecks;
1083 if (NumWritePtrChecks == 0 ||
1084 (NumWritePtrChecks == 1 && NumReadPtrChecks == 0)) {
1085 assert((AS.size() <= 1 ||
1088 MemAccessInfo AccessWrite(AC.getValue(),
true);
1089 return DepCands.
findValue(AccessWrite) == DepCands.
end();
1091 "Can only skip updating CanDoRT below, if all entries in AS "
1092 "are reads or there is at most 1 entry");
1096 for (
auto &Access : AccessInfos) {
1097 for (
const auto &AccessTy : Accesses[Access]) {
1098 if (!createCheckForAccess(RtCheck, Access, AccessTy, StridesMap,
1099 DepSetId, TheLoop, RunningDepId, ASId,
1100 ShouldCheckWrap,
false)) {
1102 << *Access.getPointer() <<
'\n');
1104 CanDoAliasSetRT =
false;
1118 bool NeedsAliasSetRTCheck = RunningDepId > 2 || !Retries.
empty();
1122 if (NeedsAliasSetRTCheck && !CanDoAliasSetRT) {
1126 CanDoAliasSetRT =
true;
1127 for (
auto Retry : Retries) {
1128 MemAccessInfo Access = Retry.first;
1129 Type *AccessTy = Retry.second;
1130 if (!createCheckForAccess(RtCheck, Access, AccessTy, StridesMap,
1131 DepSetId, TheLoop, RunningDepId, ASId,
1132 ShouldCheckWrap,
true)) {
1133 CanDoAliasSetRT =
false;
1134 UncomputablePtr = Access.getPointer();
1140 CanDoRT &= CanDoAliasSetRT;
1141 MayNeedRTCheck |= NeedsAliasSetRTCheck;
1150 unsigned NumPointers = RtCheck.
Pointers.size();
1151 for (
unsigned i = 0; i < NumPointers; ++i) {
1152 for (
unsigned j = i + 1;
j < NumPointers; ++
j) {
1154 if (RtCheck.
Pointers[i].DependencySetId ==
1155 RtCheck.
Pointers[j].DependencySetId)
1168 dbgs() <<
"LAA: Runtime check would require comparison between"
1169 " different address spaces\n");
1175 if (MayNeedRTCheck && CanDoRT)
1179 <<
" pointer comparisons.\n");
1186 bool CanDoRTIfNeeded = !RtCheck.
Need || CanDoRT;
1187 if (!CanDoRTIfNeeded)
1189 return CanDoRTIfNeeded;
1192void AccessAnalysis::processMemAccesses() {
1199 LLVM_DEBUG(
dbgs() <<
"LAA: Accesses(" << Accesses.size() <<
"):\n");
1201 for (
auto A : Accesses)
1202 dbgs() <<
"\t" << *
A.first.getPointer() <<
" ("
1203 << (
A.first.getInt()
1205 : (ReadOnlyPtr.count(
A.first.getPointer()) ?
"read-only"
1214 for (
const auto &AS : AST) {
1219 bool SetHasWrite =
false;
1223 UnderlyingObjToAccessMap ObjToLastAccess;
1226 PtrAccessMap DeferredAccesses;
1230 for (
int SetIteration = 0; SetIteration < 2; ++SetIteration) {
1231 bool UseDeferred = SetIteration > 0;
1232 PtrAccessMap &S = UseDeferred ? DeferredAccesses : Accesses;
1234 for (
const auto &AV : AS) {
1239 for (
const auto &AC : S) {
1240 if (AC.first.getPointer() !=
Ptr)
1243 bool IsWrite = AC.first.getInt();
1247 bool IsReadOnlyPtr = ReadOnlyPtr.count(
Ptr) && !IsWrite;
1248 if (UseDeferred && !IsReadOnlyPtr)
1252 assert(((IsReadOnlyPtr && UseDeferred) || IsWrite ||
1253 S.count(MemAccessInfo(
Ptr,
false))) &&
1254 "Alias-set pointer not in the access set?");
1256 MemAccessInfo Access(
Ptr, IsWrite);
1264 if (!UseDeferred && IsReadOnlyPtr) {
1267 DeferredAccesses.insert({Access, {}});
1275 if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) {
1276 CheckDeps.push_back(Access);
1277 IsRTCheckAnalysisNeeded =
true;
1286 ValueVector TempObjects;
1290 <<
"Underlying objects for pointer " << *
Ptr <<
"\n");
1291 for (
const Value *UnderlyingObj : TempObjects) {
1294 if (isa<ConstantPointerNull>(UnderlyingObj) &&
1300 UnderlyingObjToAccessMap::iterator Prev =
1301 ObjToLastAccess.find(UnderlyingObj);
1302 if (Prev != ObjToLastAccess.end())
1303 DepCands.
unionSets(Access, Prev->second);
1305 ObjToLastAccess[UnderlyingObj] = Access;
1316 return GEP->isInBounds();
1336 auto *
GEP = dyn_cast<GetElementPtrInst>(
Ptr);
1337 if (!
GEP || !
GEP->isInBounds())
1341 Value *NonConstIndex =
nullptr;
1343 if (!isa<ConstantInt>(
Index)) {
1346 NonConstIndex =
Index;
1354 if (
auto *OBO = dyn_cast<OverflowingBinaryOperator>(NonConstIndex))
1355 if (OBO->hasNoSignedWrap() &&
1358 isa<ConstantInt>(OBO->getOperand(1))) {
1359 auto *OpScev = PSE.
getSCEV(OBO->getOperand(0));
1361 if (
auto *OpAR = dyn_cast<SCEVAddRecExpr>(OpScev))
1362 return OpAR->getLoop() == L && OpAR->getNoWrapFlags(
SCEV::FlagNSW);
1373 bool Assume,
bool ShouldCheckWrap) {
1377 if (isa<ScalableVectorType>(AccessTy)) {
1378 LLVM_DEBUG(
dbgs() <<
"LAA: Bad stride - Scalable object: " << *AccessTy
1380 return std::nullopt;
1391 <<
" SCEV: " << *PtrScev <<
"\n");
1392 return std::nullopt;
1397 LLVM_DEBUG(
dbgs() <<
"LAA: Bad stride - Not striding over innermost loop "
1398 << *
Ptr <<
" SCEV: " << *AR <<
"\n");
1399 return std::nullopt;
1411 bool IsNoWrapAddRec = !ShouldCheckWrap ||
1414 if (!IsNoWrapAddRec && !IsInBoundsGEP &&
1418 IsNoWrapAddRec =
true;
1419 LLVM_DEBUG(
dbgs() <<
"LAA: Pointer may wrap in the address space:\n"
1420 <<
"LAA: Pointer: " << *
Ptr <<
"\n"
1421 <<
"LAA: SCEV: " << *AR <<
"\n"
1422 <<
"LAA: Added an overflow assumption\n");
1425 dbgs() <<
"LAA: Bad stride - Pointer may wrap in the address space "
1426 << *
Ptr <<
" SCEV: " << *AR <<
"\n");
1427 return std::nullopt;
1438 <<
" SCEV: " << *AR <<
"\n");
1439 return std::nullopt;
1443 TypeSize AllocSize =
DL.getTypeAllocSize(AccessTy);
1445 const APInt &APStepVal =
C->getAPInt();
1449 return std::nullopt;
1454 int64_t Stride = StepVal /
Size;
1455 int64_t Rem = StepVal %
Size;
1457 return std::nullopt;
1462 if (!IsNoWrapAddRec && Stride != 1 && Stride != -1 &&
1467 LLVM_DEBUG(
dbgs() <<
"LAA: Non unit strided pointer which is not either "
1468 <<
"inbounds or in address space 0 may wrap:\n"
1469 <<
"LAA: Pointer: " << *
Ptr <<
"\n"
1470 <<
"LAA: SCEV: " << *AR <<
"\n"
1471 <<
"LAA: Added an overflow assumption\n");
1474 return std::nullopt;
1485 assert(PtrA && PtrB &&
"Expected non-nullptr pointers.");
1487 ->isOpaqueOrPointeeTypeMatches(ElemTyA) &&
"Wrong PtrA type");
1489 ->isOpaqueOrPointeeTypeMatches(ElemTyB) &&
"Wrong PtrB type");
1497 return std::nullopt;
1504 return std::nullopt;
1505 unsigned IdxWidth =
DL.getIndexSizeInBits(ASA);
1507 APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0);
1512 if (PtrA1 == PtrB1) {
1515 ASA = cast<PointerType>(PtrA1->
getType())->getAddressSpace();
1516 ASB = cast<PointerType>(PtrB1->
getType())->getAddressSpace();
1519 return std::nullopt;
1521 IdxWidth =
DL.getIndexSizeInBits(ASA);
1522 OffsetA = OffsetA.sextOrTrunc(IdxWidth);
1532 dyn_cast<SCEVConstant>(SE.
getMinusSCEV(PtrSCEVB, PtrSCEVA));
1534 return std::nullopt;
1535 Val = Diff->getAPInt().getSExtValue();
1537 int Size =
DL.getTypeStoreSize(ElemTyA);
1538 int Dist = Val /
Size;
1542 if (!StrictCheck || Dist *
Size == Val)
1544 return std::nullopt;
1551 VL, [](
const Value *V) {
return V->getType()->isPointerTy(); }) &&
1552 "Expected list of pointer operands.");
1555 Value *Ptr0 = VL[0];
1557 using DistOrdPair = std::pair<int64_t, int>;
1559 std::set<DistOrdPair,
decltype(Compare)> Offsets(Compare);
1560 Offsets.emplace(0, 0);
1562 bool IsConsecutive =
true;
1571 auto Res = Offsets.emplace(
Offset, Cnt);
1575 IsConsecutive = IsConsecutive && std::next(Res.first) == Offsets.end();
1578 SortedIndices.
clear();
1579 if (!IsConsecutive) {
1583 for (
const std::pair<int64_t, int> &Pair : Offsets) {
1584 SortedIndices[Cnt] = Pair.second;
1600 std::optional<int> Diff =
1603 return Diff && *Diff == 1;
1609 Accesses[MemAccessInfo(Ptr, true)].push_back(AccessIdx);
1610 InstMap.push_back(SI);
1618 Accesses[MemAccessInfo(Ptr, false)].push_back(AccessIdx);
1619 InstMap.push_back(LI);
1646 case ForwardButPreventsForwarding:
1650 case BackwardVectorizable:
1652 case BackwardVectorizableButPreventsForwarding:
1665 case ForwardButPreventsForwarding:
1670 case BackwardVectorizable:
1672 case BackwardVectorizableButPreventsForwarding:
1678bool MemoryDepChecker::couldPreventStoreLoadForward(
uint64_t Distance,
1692 const uint64_t NumItersForStoreLoadThroughMemory = 8 * TypeByteSize;
1694 uint64_t MaxVFWithoutSLForwardIssues = std::min(
1698 for (
uint64_t VF = 2 * TypeByteSize; VF <= MaxVFWithoutSLForwardIssues;
1702 if (Distance % VF && Distance / VF < NumItersForStoreLoadThroughMemory) {
1703 MaxVFWithoutSLForwardIssues = (VF >> 1);
1708 if (MaxVFWithoutSLForwardIssues < 2 * TypeByteSize) {
1710 dbgs() <<
"LAA: Distance " << Distance
1711 <<
" that could cause a store-load forwarding conflict\n");
1715 if (MaxVFWithoutSLForwardIssues < MaxSafeDepDistBytes &&
1716 MaxVFWithoutSLForwardIssues !=
1718 MaxSafeDepDistBytes = MaxVFWithoutSLForwardIssues;
1740 const SCEV &BackedgeTakenCount,
1761 const uint64_t ByteStride = Stride * TypeByteSize;
1765 const SCEV *CastedDist = &Dist;
1766 const SCEV *CastedProduct = Product;
1773 if (DistTypeSizeBits > ProductTypeSizeBits)
1801 assert(Stride > 1 &&
"The stride must be greater than 1");
1802 assert(TypeByteSize > 0 &&
"The type size in byte must be non-zero");
1803 assert(Distance > 0 &&
"The distance must be non-zero");
1806 if (Distance % TypeByteSize)
1809 uint64_t ScaledDist = Distance / TypeByteSize;
1827 return ScaledDist % Stride;
1831MemoryDepChecker::isDependent(
const MemAccessInfo &
A,
unsigned AIdx,
1834 assert (AIdx < BIdx &&
"Must pass arguments in program order");
1836 auto [APtr, AIsWrite] =
A;
1837 auto [BPtr, BIsWrite] =
B;
1842 if (!AIsWrite && !BIsWrite)
1846 if (APtr->getType()->getPointerAddressSpace() !=
1847 BPtr->getType()->getPointerAddressSpace())
1850 int64_t StrideAPtr =
1851 getPtrStride(PSE, ATy, APtr, InnermostLoop, Strides,
true).value_or(0);
1852 int64_t StrideBPtr =
1853 getPtrStride(PSE, BTy, BPtr, InnermostLoop, Strides,
true).value_or(0);
1860 if (StrideAPtr < 0) {
1872 LLVM_DEBUG(
dbgs() <<
"LAA: Src Scev: " << *Src <<
"Sink Scev: " << *Sink
1873 <<
"(Induction step: " << StrideAPtr <<
")\n");
1874 LLVM_DEBUG(
dbgs() <<
"LAA: Distance for " << *InstMap[AIdx] <<
" to "
1875 << *InstMap[BIdx] <<
": " << *Dist <<
"\n");
1880 if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr){
1881 LLVM_DEBUG(
dbgs() <<
"Pointer access with non-constant stride\n");
1886 uint64_t TypeByteSize =
DL.getTypeAllocSize(ATy);
1888 DL.getTypeStoreSizeInBits(ATy) ==
DL.getTypeStoreSizeInBits(BTy);
1889 uint64_t Stride = std::abs(StrideAPtr);
1891 if (!isa<SCEVCouldNotCompute>(Dist) && HasSameSize &&
1893 Stride, TypeByteSize))
1898 LLVM_DEBUG(
dbgs() <<
"LAA: Dependence because of non-constant distance\n");
1899 FoundNonConstantDistanceDependence =
true;
1903 const APInt &Val =
C->getAPInt();
1907 if (std::abs(Distance) > 0 && Stride > 1 && HasSameSize &&
1915 bool IsTrueDataDependence = (AIsWrite && !BIsWrite);
1917 (couldPreventStoreLoadForward(Val.
abs().
getZExtValue(), TypeByteSize) ||
1919 LLVM_DEBUG(
dbgs() <<
"LAA: Forward but may prevent st->ld forwarding\n");
1932 dbgs() <<
"LAA: Zero dependence difference but different type sizes\n");
1939 LLVM_DEBUG(
dbgs() <<
"LAA: ReadWrite-Write positive dependency with "
1940 "different type sizes\n");
1950 unsigned MinNumIter = std::max(ForcedFactor * ForcedUnroll, 2U);
1979 TypeByteSize * Stride * (MinNumIter - 1) + TypeByteSize;
1980 if (MinDistanceNeeded >
static_cast<uint64_t>(Distance)) {
1981 LLVM_DEBUG(
dbgs() <<
"LAA: Failure because of positive distance "
1982 << Distance <<
'\n');
1987 if (MinDistanceNeeded > MaxSafeDepDistBytes) {
1989 << MinDistanceNeeded <<
" size in bytes\n");
2009 MaxSafeDepDistBytes =
2010 std::min(
static_cast<uint64_t>(Distance), MaxSafeDepDistBytes);
2012 bool IsTrueDataDependence = (!AIsWrite && BIsWrite);
2014 couldPreventStoreLoadForward(Distance, TypeByteSize))
2017 uint64_t MaxVF = MaxSafeDepDistBytes / (TypeByteSize * Stride);
2019 <<
" with max VF = " << MaxVF <<
'\n');
2020 uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8;
2021 MaxSafeVectorWidthInBits = std::min(MaxSafeVectorWidthInBits, MaxVFInBits);
2029 MaxSafeDepDistBytes = -1;
2032 if (Visited.
count(CurAccess))
2048 bool AIIsWrite = AI->getInt();
2052 (AIIsWrite ? AI : std::next(AI));
2055 for (std::vector<unsigned>::iterator I1 = Accesses[*AI].begin(),
2056 I1E = Accesses[*AI].
end(); I1 != I1E; ++I1)
2059 for (std::vector<unsigned>::iterator
2060 I2 = (OI == AI ? std::next(I1) : Accesses[*OI].begin()),
2061 I2E = (OI == AI ? I1E : Accesses[*OI].end());
2063 auto A = std::make_pair(&*AI, *I1);
2064 auto B = std::make_pair(&*OI, *I2);
2071 isDependent(*
A.first,
A.second, *
B.first,
B.second, Strides);
2078 if (RecordDependences) {
2083 RecordDependences =
false;
2084 Dependences.clear();
2086 <<
"Too many dependences, stopped recording\n");
2098 LLVM_DEBUG(
dbgs() <<
"Total Dependences: " << Dependences.size() <<
"\n");
2105 auto &IndexVector = Accesses.find(Access)->second;
2109 std::back_inserter(Insts),
2110 [&](
unsigned Idx) {
return this->InstMap[
Idx]; });
2115 "NoDep",
"Unknown",
"Forward",
"ForwardButPreventsForwarding",
"Backward",
2116 "BackwardVectorizable",
"BackwardVectorizableButPreventsForwarding"};
2126bool LoopAccessInfo::canAnalyzeLoop() {
2135 recordAnalysis(
"NotInnerMostLoop") <<
"loop is not the innermost loop";
2142 dbgs() <<
"LAA: loop control flow is not understood by analyzer\n");
2143 recordAnalysis(
"CFGNotUnderstood")
2144 <<
"loop control flow is not understood by analyzer";
2150 if (isa<SCEVCouldNotCompute>(ExitCount)) {
2151 recordAnalysis(
"CantComputeNumberOfIterations")
2152 <<
"could not determine number of loop iterations";
2153 LLVM_DEBUG(
dbgs() <<
"LAA: SCEV could not compute the loop exit count.\n");
2168 unsigned NumReads = 0;
2169 unsigned NumReadWrites = 0;
2171 bool HasComplexMemInst =
false;
2174 HasConvergentOp =
false;
2176 PtrRtChecking->Pointers.
clear();
2177 PtrRtChecking->Need =
false;
2181 const bool EnableMemAccessVersioningOfLoop =
2193 if (
auto *Call = dyn_cast<CallBase>(&
I)) {
2194 if (
Call->isConvergent())
2195 HasConvergentOp =
true;
2200 if (HasComplexMemInst && HasConvergentOp) {
2206 if (HasComplexMemInst)
2212 if (
I.mayReadFromMemory()) {
2216 auto *
Call = dyn_cast<CallInst>(&
I);
2222 if (Call && !
Call->isNoBuiltin() &&
Call->getCalledFunction() &&
2226 auto *Ld = dyn_cast<LoadInst>(&
I);
2228 recordAnalysis(
"CantVectorizeInstruction", Ld)
2229 <<
"instruction cannot be vectorized";
2230 HasComplexMemInst =
true;
2233 if (!Ld->isSimple() && !IsAnnotatedParallel) {
2234 recordAnalysis(
"NonSimpleLoad", Ld)
2235 <<
"read with atomic ordering or volatile read";
2237 HasComplexMemInst =
true;
2242 DepChecker->addAccess(Ld);
2243 if (EnableMemAccessVersioningOfLoop)
2244 collectStridedAccess(Ld);
2249 if (
I.mayWriteToMemory()) {
2250 auto *St = dyn_cast<StoreInst>(&
I);
2252 recordAnalysis(
"CantVectorizeInstruction", St)
2253 <<
"instruction cannot be vectorized";
2254 HasComplexMemInst =
true;
2257 if (!St->isSimple() && !IsAnnotatedParallel) {
2258 recordAnalysis(
"NonSimpleStore", St)
2259 <<
"write with atomic ordering or volatile write";
2261 HasComplexMemInst =
true;
2266 DepChecker->addAccess(St);
2267 if (EnableMemAccessVersioningOfLoop)
2268 collectStridedAccess(St);
2273 if (HasComplexMemInst) {
2283 if (!Stores.
size()) {
2290 AccessAnalysis Accesses(TheLoop, AA, LI, DependentAccesses, *PSE);
2306 if (isUniform(
Ptr)) {
2308 StoresToInvariantAddresses.push_back(ST);
2309 HasDependenceInvolvingLoopInvariantAddress |=
2316 if (Seen.
insert({Ptr, AccessTy}).second) {
2323 if (blockNeedsPredication(
ST->getParent(), TheLoop, DT))
2327 [&Accesses, AccessTy, Loc](
Value *
Ptr) {
2328 MemoryLocation NewLoc = Loc.getWithNewPtr(Ptr);
2329 Accesses.addStore(NewLoc, AccessTy);
2334 if (IsAnnotatedParallel) {
2336 dbgs() <<
"LAA: A loop annotated parallel, ignore memory dependency "
2352 bool IsReadOnlyPtr =
false;
2354 if (Seen.
insert({Ptr, AccessTy}).second ||
2355 !
getPtrStride(*PSE,
LD->getType(),
Ptr, TheLoop, SymbolicStrides).value_or(0)) {
2357 IsReadOnlyPtr =
true;
2363 LLVM_DEBUG(
dbgs() <<
"LAA: Found an unsafe dependency between a uniform "
2364 "load and uniform store to the same address!\n");
2365 HasDependenceInvolvingLoopInvariantAddress =
true;
2372 if (blockNeedsPredication(
LD->getParent(), TheLoop, DT))
2376 [&Accesses, AccessTy, Loc, IsReadOnlyPtr](
Value *
Ptr) {
2377 MemoryLocation NewLoc = Loc.getWithNewPtr(Ptr);
2378 Accesses.addLoad(NewLoc, AccessTy, IsReadOnlyPtr);
2384 if (NumReadWrites == 1 && NumReads == 0) {
2392 Accesses.buildDependenceSets();
2396 Value *UncomputablePtr =
nullptr;
2397 bool CanDoRTIfNeeded =
2398 Accesses.canCheckPtrAtRT(*PtrRtChecking, PSE->
getSE(), TheLoop,
2399 SymbolicStrides, UncomputablePtr,
false);
2400 if (!CanDoRTIfNeeded) {
2401 auto *
I = dyn_cast_or_null<Instruction>(UncomputablePtr);
2402 recordAnalysis(
"CantIdentifyArrayBounds",
I)
2403 <<
"cannot identify array bounds";
2404 LLVM_DEBUG(
dbgs() <<
"LAA: We can't vectorize because we can't find "
2405 <<
"the array bounds.\n");
2411 dbgs() <<
"LAA: May be able to perform a memory runtime check if needed.\n");
2414 if (Accesses.isDependencyCheckNeeded()) {
2416 CanVecMem = DepChecker->areDepsSafe(
2417 DependentAccesses, Accesses.getDependenciesToCheck(), SymbolicStrides);
2418 MaxSafeDepDistBytes = DepChecker->getMaxSafeDepDistBytes();
2420 if (!CanVecMem && DepChecker->shouldRetryWithRuntimeCheck()) {
2424 Accesses.resetDepChecks(*DepChecker);
2426 PtrRtChecking->reset();
2427 PtrRtChecking->Need =
true;
2429 auto *SE = PSE->
getSE();
2430 UncomputablePtr =
nullptr;
2431 CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(
2432 *PtrRtChecking, SE, TheLoop, SymbolicStrides, UncomputablePtr,
true);
2435 if (!CanDoRTIfNeeded) {
2436 auto *
I = dyn_cast_or_null<Instruction>(UncomputablePtr);
2437 recordAnalysis(
"CantCheckMemDepsAtRunTime",
I)
2438 <<
"cannot check memory dependencies at runtime";
2439 LLVM_DEBUG(
dbgs() <<
"LAA: Can't vectorize with memory checks\n");
2448 if (HasConvergentOp) {
2449 recordAnalysis(
"CantInsertRuntimeCheckWithConvergent")
2450 <<
"cannot add control dependency to convergent operation";
2451 LLVM_DEBUG(
dbgs() <<
"LAA: We can't vectorize because a runtime check "
2452 "would be needed with a convergent operation\n");
2459 dbgs() <<
"LAA: No unsafe dependent memory operations in loop. We"
2460 << (PtrRtChecking->Need ?
"" :
" don't")
2461 <<
" need runtime memory checks.\n");
2463 emitUnsafeDependenceRemark();
2466void LoopAccessInfo::emitUnsafeDependenceRemark() {
2467 auto Deps = getDepChecker().getDependences();
2474 if (Found == Deps->end())
2478 LLVM_DEBUG(
dbgs() <<
"LAA: unsafe dependent memory operations in loop\n");
2483 <<
"unsafe dependent memory operations in loop. Use "
2484 "#pragma loop distribute(enable) to allow loop distribution "
2485 "to attempt to isolate the offending operations into a separate "
2494 R <<
"\nBackward loop carried data dependence.";
2497 R <<
"\nForward loop carried data dependence that prevents "
2498 "store-to-load forwarding.";
2501 R <<
"\nBackward loop carried data dependence that prevents "
2502 "store-to-load forwarding.";
2505 R <<
"\nUnknown data dependence.";
2512 SourceLoc = DD->getDebugLoc();
2514 R <<
" Memory location is the same as accessed at "
2515 <<
ore::NV(
"Location", SourceLoc);
2530 assert(!Report &&
"Multiple reports generated");
2536 CodeRegion =
I->getParent();
2539 if (
I->getDebugLoc())
2540 DL =
I->getDebugLoc();
2543 Report = std::make_unique<OptimizationRemarkAnalysis>(
DEBUG_TYPE, RemarkName,
DL,
2549 auto *SE = PSE->
getSE();
2559void LoopAccessInfo::collectStridedAccess(
Value *MemAccess) {
2568 LLVM_DEBUG(
dbgs() <<
"LAA: Found a strided access that is a candidate for "
2594 const SCEV *CastedStride = StrideExpr;
2595 const SCEV *CastedBECount = BETakenCount;
2597 if (BETypeSizeBits >= StrideTypeSizeBits)
2601 const SCEV *StrideMinusBETaken = SE->
getMinusSCEV(CastedStride, CastedBECount);
2607 dbgs() <<
"LAA: Stride>=TripCount; No point in versioning as the "
2608 "Stride==1 predicate will imply that the loop executes "
2612 LLVM_DEBUG(
dbgs() <<
"LAA: Found a strided access that we can version.\n");
2614 SymbolicStrides[
Ptr] = Stride;
2615 StrideSet.insert(Stride);
2622 PtrRtChecking(nullptr),
2624 PtrRtChecking = std::make_unique<RuntimePointerChecking>(*DepChecker, SE);
2625 if (canAnalyzeLoop()) {
2626 analyzeLoop(AA, LI, TLI, DT);
2633 if (MaxSafeDepDistBytes != -1ULL)
2634 OS <<
" with a maximum dependence distance of " << MaxSafeDepDistBytes
2636 if (PtrRtChecking->Need)
2637 OS <<
" with run-time checks";
2641 if (HasConvergentOp)
2647 if (
auto *Dependences = DepChecker->getDependences()) {
2649 for (
const auto &Dep : *Dependences) {
2650 Dep.
print(
OS,
Depth + 2, DepChecker->getMemoryInstructions());
2657 PtrRtChecking->print(
OS,
Depth);
2660 OS.
indent(
Depth) <<
"Non vectorizable stores to invariant address were "
2661 << (HasDependenceInvolvingLoopInvariantAddress ?
"" :
"not ")
2662 <<
"found in loop.\n";
2674 auto I = LoopAccessInfoMap.insert({&L,
nullptr});
2678 std::make_unique<LoopAccessInfo>(&L, &SE, TLI, &AA, &DT, &LI);
2680 return *
I.first->second;
2688 auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
2689 auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
2690 auto *TLI = TLIP ? &TLIP->getTLI(
F) :
nullptr;
2691 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
2692 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2693 auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
2694 LAIs = std::make_unique<LoopAccessInfoManager>(SE, AA, DT, LI, TLI);
2737#define LAA_NAME "loop-accesses"
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file implements a class to represent arbitrary precision integral constant values and operations...
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
This file defines the DenseMap class.
This file builds on the ADT/GraphTraits.h file to build generic depth first graph iterator.
Generic implementation of equivalence classes through the use Tarjan's efficient union-find algorithm...
static cl::opt< unsigned > MaxDependences("max-dependences", cl::Hidden, cl::desc("Maximum number of dependences collected by " "loop-access analysis (default = 100)"), cl::init(100))
We collect dependences up to this threshold.
static cl::opt< bool > EnableForwardingConflictDetection("store-to-load-forwarding-conflict-detection", cl::Hidden, cl::desc("Enable conflict detection in loop-access analysis"), cl::init(true))
Enable store-to-load forwarding conflict detection.
static void findForkedSCEVs(ScalarEvolution *SE, const Loop *L, Value *Ptr, SmallVectorImpl< PointerIntPair< const SCEV *, 1, bool > > &ScevList, unsigned Depth)
static bool hasComputableBounds(PredicatedScalarEvolution &PSE, Value *Ptr, const SCEV *PtrScev, Loop *L, bool Assume)
Check whether a pointer can participate in a runtime bounds check.
static cl::opt< unsigned > MemoryCheckMergeThreshold("memory-check-merge-threshold", cl::Hidden, cl::desc("Maximum number of comparisons done when trying to merge " "runtime memory checks. (default = 100)"), cl::init(100))
The maximum iterations used to merge memory checks.
static SmallVector< PointerIntPair< const SCEV *, 1, bool > > findForkedPointer(PredicatedScalarEvolution &PSE, const ValueToValueMap &StridesMap, Value *Ptr, const Loop *L)
static cl::opt< unsigned, true > VectorizationInterleave("force-vector-interleave", cl::Hidden, cl::desc("Sets the vectorization interleave count. " "Zero is autoselect."), cl::location(VectorizerParams::VectorizationInterleave))
static cl::opt< unsigned, true > VectorizationFactor("force-vector-width", cl::Hidden, cl::desc("Sets the SIMD width. Zero is autoselect."), cl::location(VectorizerParams::VectorizationFactor))
static const char laa_name[]
static cl::opt< unsigned, true > RuntimeMemoryCheckThreshold("runtime-memory-check-threshold", cl::Hidden, cl::desc("When performing memory disambiguation checks at runtime do not " "generate more than this number of comparisons (default = 8)."), cl::location(VectorizerParams::RuntimeMemoryCheckThreshold), cl::init(8))
static void visitPointers(Value *StartPtr, const Loop &InnermostLoop, function_ref< void(Value *)> AddPointer)
static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR, PredicatedScalarEvolution &PSE, const Loop *L)
Return true if an AddRec pointer Ptr is unsigned non-wrapping, i.e.
static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE, const SCEV &BackedgeTakenCount, const SCEV &Dist, uint64_t Stride, uint64_t TypeByteSize)
Given a dependence-distance Dist between two memory accesses, that have the same stride whose absolut...
static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride, uint64_t TypeByteSize)
Check the dependence for two accesses with the same stride Stride.
static bool isInBoundsGep(Value *Ptr)
static const SCEV * getMinFromExprs(const SCEV *I, const SCEV *J, ScalarEvolution *SE)
Compare I and J and return the minimum.
static bool isNoWrap(PredicatedScalarEvolution &PSE, const ValueToValueMap &Strides, Value *Ptr, Type *AccessTy, Loop *L)
Check whether a pointer address cannot wrap.
static cl::opt< unsigned > MaxForkedSCEVDepth("max-forked-scev-depth", cl::Hidden, cl::desc("Maximum recursion depth when finding forked SCEVs (default = 5)"), cl::init(5))
static cl::opt< bool > EnableMemAccessVersioning("enable-mem-access-versioning", cl::init(true), cl::Hidden, cl::desc("Enable symbolic stride memory access versioning"))
This enables versioning on the strides of symbolically striding memory accesses in code like the foll...
This header provides classes for managing per-loop analyses.
This file provides utility analysis objects describing memory locations.
FunctionAnalysisManager FAM
This header defines various interfaces for pass management in LLVM.
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file defines the PointerIntPair class.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static LLVM_ATTRIBUTE_ALWAYS_INLINE bool CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N, const TargetLowering *TLI, const DataLayout &DL)
This file implements a set that has insertion order iteration characteristics.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
static SymbolRef::Type getType(const Symbol *Sym)
static const X86InstrFMA3Group Groups[]
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Class for arbitrary precision integers.
uint64_t getZExtValue() const
Get zero extended value.
APInt abs() const
Get the absolute value.
unsigned getBitWidth() const
Return the number of bits in the APInt.
bool isNegative() const
Determine sign of this APInt.
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
bool isStrictlyPositive() const
Determine if this APInt Value is positive.
int64_t getSExtValue() const
Get sign extended value.
This templated class represents "all analyses that operate over <a particular IR unit>" (e....
API to communicate dependencies between analyses during invalidation.
bool invalidate(IRUnitT &IR, const PreservedAnalyses &PA)
Trigger the invalidation of some other analysis pass if not already handled and return whether it was...
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
void setPreservesAll()
Set by analyses that do not transform their input at all.
AnalysisUsage & addRequiredTransitive()
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
LLVM Basic Block Representation.
const Function * getParent() const
Return the enclosing method, or null if none.
const Module * getModule() const
Return the module owning the function this basic block belongs to, or nullptr if the function does no...
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
A parsed version of the target data layout string in and methods for querying it.
iterator find(const_arg_type_t< KeyT > Val)
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Analysis pass which computes a DominatorTree.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
EquivalenceClasses - This represents a collection of equivalence classes and supports three efficient...
iterator findValue(const ElemTy &V) const
findValue - Return an iterator to the specified value.
iterator insert(const ElemTy &Data)
insert - Insert a new value into the union/find set, ignoring the request if the value already exists...
member_iterator member_end() const
typename std::set< ECValue, ECValueComparator >::const_iterator iterator
iterator* - Provides a way to iterate over all values in the set.
member_iterator member_begin(iterator I) const
member_iterator unionSets(const ElemTy &V1, const ElemTy &V2)
union - Merge the two equivalence sets for the specified values, inserting them if they do not alread...
const ElemTy & getLeaderValue(const ElemTy &V) const
getLeaderValue - Return the leader for the specified value that is in the set.
FunctionPass class - This class is used to implement most global optimizations.
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
PointerType * getType() const
Global values are always pointers.
Class to represent integer types.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
An instruction for reading from memory.
Value * getPointerOperand()
static constexpr LocationSize beforeOrAfterPointer()
Any location before or after the base pointer (but still within the underlying object).
This analysis provides dependence information for the memory accesses of a loop.
Result run(Function &F, FunctionAnalysisManager &AM)
bool invalidate(Function &F, const PreservedAnalyses &PA, FunctionAnalysisManager::Invalidator &Inv)
const LoopAccessInfo & getInfo(Loop &L)
Drive the analysis of memory accesses in the loop.
bool isUniform(Value *V) const
Returns true if the value V is uniform within the loop.
void print(raw_ostream &OS, unsigned Depth=0) const
Print the information about the memory accesses in the loop.
LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetLibraryInfo *TLI, AAResults *AA, DominatorTree *DT, LoopInfo *LI)
static bool blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, DominatorTree *DT)
Return true if the block BB needs to be predicated in order for the loop to be vectorized.
This analysis provides dependence information for the memory accesses of a loop.
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
LoopAccessLegacyAnalysis()
bool runOnFunction(Function &F) override
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
Analysis pass that exposes the LoopInfo for a function.
bool contains(const LoopT *L) const
Return true if the specified loop is contained within in this loop.
BlockT * getLoopLatch() const
If there is a single latch block for this loop, return it.
bool isInnermost() const
Return true if the loop does not contain any (natural) loops.
unsigned getNumBackEdges() const
Calculate the number of back edges to the loop header.
BlockT * getHeader() const
Wrapper class to LoopBlocksDFS that provides a standard begin()/end() interface for the DFS reverse p...
The legacy pass manager's analysis pass to compute loop information.
Represents a single loop in the control flow graph.
bool isAnnotatedParallel() const
Returns true if the loop is annotated parallel.
DebugLoc getStartLoc() const
Return the debug location of the start of this loop.
This class implements a map that also provides access to all stored values in a deterministic order.
Checks memory dependences among accesses to the same underlying object to determine whether there vec...
ArrayRef< unsigned > getOrderForAccess(Value *Ptr, bool IsWrite) const
Return the program order indices for the access location (Ptr, IsWrite).
const Loop * getInnermostLoop() const
bool isSafeForVectorization() const
No memory dependence was encountered that would inhibit vectorization.
SmallVector< Instruction *, 4 > getInstructionsForAccess(Value *Ptr, bool isWrite) const
Find the set of instructions that read or write via Ptr.
VectorizationSafetyStatus
Type to keep track of the status of the dependence check.
@ PossiblySafeWithRtChecks
void addAccess(StoreInst *SI)
Register the location (instructions are given increasing numbers) of a write access.
bool areDepsSafe(DepCandidates &AccessSets, MemAccessInfoList &CheckDeps, const ValueToValueMap &Strides)
Check whether the dependencies between the accesses are safe.
PointerIntPair< Value *, 1, bool > MemAccessInfo
Representation for a specific memory location.
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
AAMDNodes AATags
The metadata nodes which describes the aliasing of the location (each member is null if that kind of ...
const Value * Ptr
The address of the start of the location.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
An interface layer with SCEV used to manage how we see SCEV expressions for values in the context of ...
void addPredicate(const SCEVPredicate &Pred)
Adds a new predicate.
ScalarEvolution * getSE() const
Returns the ScalarEvolution analysis used.
const SCEVPredicate & getPredicate() const
bool hasNoOverflow(Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags)
Returns true if we've proved that V doesn't wrap by means of a SCEV predicate.
void setNoOverflow(Value *V, SCEVWrapPredicate::IncrementWrapFlags Flags)
Proves that V doesn't overflow by adding SCEV predicate.
void print(raw_ostream &OS, unsigned Depth) const
Print the SCEV mappings done by the Predicated Scalar Evolution.
const SCEVAddRecExpr * getAsAddRec(Value *V)
Attempts to produce an AddRecExpr for V by adding additional SCEV predicates.
const SCEV * getBackedgeTakenCount()
Get the (predicated) backedge count for the analyzed loop.
const SCEV * getSCEV(Value *V)
Returns the SCEV expression of V, in the context of the current SCEV predicate.
A set of analyses that are preserved following a run of a transformation pass.
PreservedAnalysisChecker getChecker() const
Build a checker for this PreservedAnalyses and the specified analysis type.
Holds information about the memory runtime legality checks to verify that a group of pointers do not ...
bool Need
This flag indicates if we need to add the runtime check.
void reset()
Reset the state of the pointer runtime information.
unsigned getNumberOfChecks() const
Returns the number of run-time checks required according to needsChecking.
void printChecks(raw_ostream &OS, const SmallVectorImpl< RuntimePointerCheck > &Checks, unsigned Depth=0) const
Print Checks.
bool needsChecking(const RuntimeCheckingPtrGroup &M, const RuntimeCheckingPtrGroup &N) const
Decide if we need to add a check between two groups of pointers, according to needsChecking.
void print(raw_ostream &OS, unsigned Depth=0) const
Print the list run-time memory checks necessary.
SmallVector< RuntimeCheckingPtrGroup, 2 > CheckingGroups
Holds a partitioning of pointers into "check groups".
void generateChecks(MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies)
Generate the checks and store it.
friend struct RuntimeCheckingPtrGroup
static bool arePointersInSamePartition(const SmallVectorImpl< int > &PtrToPartition, unsigned PtrIdx1, unsigned PtrIdx2)
Check if pointers are in the same partition.
SmallVector< PointerInfo, 2 > Pointers
Information about the pointers that may require checking.
void insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr, Type *AccessTy, bool WritePtr, unsigned DepSetId, unsigned ASId, PredicatedScalarEvolution &PSE, bool NeedsFreeze)
Insert a pointer and calculate the start and end SCEVs.
This node represents a polynomial recurrence on the trip count of the specified loop.
const SCEV * getStart() const
const SCEV * evaluateAtIteration(const SCEV *It, ScalarEvolution &SE) const
Return the value of this chain of recurrences at the specified iteration number.
const SCEV * getStepRecurrence(ScalarEvolution &SE) const
Constructs and returns the recurrence indicating how much this expression steps by.
bool isAffine() const
Return true if this represents an expression A + B*x where A and B are loop invariant values.
const Loop * getLoop() const
This class represents a constant integer value.
NoWrapFlags getNoWrapFlags(NoWrapFlags Mask=NoWrapMask) const
virtual void print(raw_ostream &OS, unsigned Depth=0) const =0
Prints a textual representation of this predicate with an indentation of Depth.
This class represents an analyzed expression in the program.
Type * getType() const
Return the LLVM type of this SCEV expression.
Analysis pass that exposes the ScalarEvolution for a function.
The main scalar evolution driver.
const SCEV * getNegativeSCEV(const SCEV *V, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap)
Return the SCEV object corresponding to -V.
const SCEV * getUMaxExpr(const SCEV *LHS, const SCEV *RHS)
const SCEVPredicate * getEqualPredicate(const SCEV *LHS, const SCEV *RHS)
const SCEV * getConstant(ConstantInt *V)
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
const SCEV * getNoopOrSignExtend(const SCEV *V, Type *Ty)
Return a SCEV corresponding to a conversion of the input value to the specified type.
const SCEV * getOne(Type *Ty)
Return a SCEV for the constant 1 of a specific type.
const SCEV * getPtrToIntExpr(const SCEV *Op, Type *Ty)
bool isLoopInvariant(const SCEV *S, const Loop *L)
Return true if the value of the given SCEV is unchanging in the specified loop.
bool isKnownPositive(const SCEV *S)
Test if the given expression is known to be positive.
const SCEV * getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth=0)
bool isSCEVable(Type *Ty) const
Test if values of the given type are analyzable within the SCEV framework.
Type * getEffectiveSCEVType(Type *Ty) const
Return a type with the same bitwidth as the given type and which represents how SCEV will treat the g...
const SCEV * getUMinExpr(const SCEV *LHS, const SCEV *RHS, bool Sequential=false)
const SCEV * getStoreSizeOfExpr(Type *IntTy, Type *StoreTy)
Return an expression for the store size of StoreTy that is type IntTy.
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
const SCEV * getMulExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical multiply expression, or something simpler if possible.
const SCEV * getSizeOfExpr(Type *IntTy, TypeSize Size)
Return an expression for a TypeSize.
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
const SCEV * getTruncateOrSignExtend(const SCEV *V, Type *Ty, unsigned Depth=0)
Return a SCEV corresponding to a conversion of the input value to the specified type.
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
reference emplace_back(ArgTypes &&... Args)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
bool isPointerTy() const
True if this is an instance of PointerType.
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
A Use represents the edge between a Value definition and its users.
static SmallVector< VFInfo, 8 > getMappings(const CallInst &CI)
Retrieve all the VFInfo instances associated to the CallInst CI.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
StringRef getName() const
Return a constant reference to the value's name.
constexpr ScalarTy getFixedValue() const
An efficient, type-erasing, non-owning reference to a callable.
This class implements an extremely fast bulk output stream that can only output to a stream.
raw_ostream & indent(unsigned NumSpaces)
indent - Insert 'NumSpaces' spaces.
friend const_iterator end(StringRef path)
Get end iterator over path.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
initializer< Ty > init(const Ty &Val)
LocationClass< Ty > location(Ty &L)
DiagnosticInfoOptimizationBase::Argument NV
This is an optimization pass for GlobalISel generic memory operations.
std::optional< int > getPointersDiff(Type *ElemTyA, Value *PtrA, Type *ElemTyB, Value *PtrB, const DataLayout &DL, ScalarEvolution &SE, bool StrictCheck=false, bool CheckType=true)
Returns the distance between the pointers PtrA and PtrB iff they are compatible and it is possible to...
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Intrinsic::ID getVectorIntrinsicIDForCall(const CallInst *CI, const TargetLibraryInfo *TLI)
Returns intrinsic ID for call.
std::optional< int64_t > getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, const Loop *Lp, const ValueToValueMap &StridesMap=ValueToValueMap(), bool Assume=false, bool ShouldCheckWrap=true)
If the pointer has a constant stride return it in units of the access type size.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
const SCEV * replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const ValueToValueMap &PtrToStride, Value *Ptr)
Return the SCEV corresponding to a pointer with the symbolic stride replaced with constant one,...
void initializeLoopAccessLegacyAnalysisPass(PassRegistry &)
const Value * getPointerOperand(const Value *V)
A helper function that returns the pointer operand of a load, store or GEP instruction.
Value * getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp)
Get the stride of a pointer access in a loop.
OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F)
Wrapper function around std::transform to apply a function to a range and store the result elsewhere.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Value * stripIntegerCast(Value *V)
bool NullPointerIsDefined(const Function *F, unsigned AS=0)
Check whether null pointer dereferencing is considered undefined behavior for a given function or an ...
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool sortPtrAccesses(ArrayRef< Value * > VL, Type *ElemTy, const DataLayout &DL, ScalarEvolution &SE, SmallVectorImpl< unsigned > &SortedIndices)
Attempt to sort the pointers in VL and return the sorted indices in SortedIndices,...
void getUnderlyingObjects(const Value *V, SmallVectorImpl< const Value * > &Objects, LoopInfo *LI=nullptr, unsigned MaxLookup=6)
This method is similar to getUnderlyingObject except that it can look through phi and select instruct...
bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, ScalarEvolution &SE, bool CheckType=true)
Returns true if the memory operations A and B are consecutive.
bool isGuaranteedNotToBeUndefOrPoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Return true if this function can prove that V does not have undef bits and is never poison.
OutputIt copy(R &&Range, OutputIt Out)
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Type * getLoadStoreType(Value *I)
A helper function that returns the type of a load or store instruction.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
MDNode * TBAA
The tag for type-based alias analysis.
A special type used by analysis passes to provide an address that identifies that particular analysis...
Dependece between memory access instructions.
DepType Type
The type of the dependence.
bool isPossiblyBackward() const
May be a lexically backward dependence type (includes Unknown).
bool isForward() const
Lexically forward dependence.
bool isBackward() const
Lexically backward dependence.
void print(raw_ostream &OS, unsigned Depth, const SmallVectorImpl< Instruction * > &Instrs) const
Print the dependence.
Instruction * getDestination(const LoopAccessInfo &LAI) const
Return the destination instruction of the dependence.
Instruction * getSource(const LoopAccessInfo &LAI) const
Return the source instruction of the dependence.
DepType
The type of the dependence.
@ BackwardVectorizableButPreventsForwarding
@ ForwardButPreventsForwarding
static const char * DepName[]
String version of the types.
static VectorizationSafetyStatus isSafeForVectorization(DepType Type)
Dependence types that don't prevent vectorization.
unsigned AddressSpace
Address space of the involved pointers.
bool addPointer(unsigned Index, RuntimePointerChecking &RtCheck)
Tries to add the pointer recorded in RtCheck at index Index to this pointer checking group.
bool NeedsFreeze
Whether the pointer needs to be frozen after expansion, e.g.
const SCEV * High
The SCEV expression which represents the upper bound of all the pointers in this group.
SmallVector< unsigned, 2 > Members
Indices of all the pointers that constitute this grouping.
RuntimeCheckingPtrGroup(unsigned Index, RuntimePointerChecking &RtCheck)
Create a new pointer checking group containing a single pointer, with index Index in RtCheck.
const SCEV * Low
The SCEV expression which represents the lower bound of all the pointers in this group.
bool IsWritePtr
Holds the information if this pointer is used for writing to memory.
unsigned DependencySetId
Holds the id of the set of pointers that could be dependent because of a shared underlying object.
unsigned AliasSetId
Holds the id of the disjoint alias set to which this pointer belongs.
static const unsigned MaxVectorWidth
Maximum SIMD width.
static unsigned VectorizationFactor
VF as overridden by the user.
static unsigned RuntimeMemoryCheckThreshold
\When performing memory disambiguation checks at runtime do not make more than this number of compari...
static bool isInterleaveForced()
True if force-vector-interleave was specified by the user.
static unsigned VectorizationInterleave
Interleave factor as overridden by the user.
Function object to check whether the first component of a container supported by std::get (like std::...