116#include <type_traits>
122#define DEBUG_TYPE "load-store-vectorizer"
124STATISTIC(NumVectorInstructions,
"Number of vector accesses generated");
125STATISTIC(NumScalarsVectorized,
"Number of scalar accesses vectorized");
135 std::tuple<
const Value * ,
141 const EqClassKey &K) {
144 <<
" of element size " << ElementSize <<
" bits in addrspace "
161 APInt OffsetFromLeader;
165void sortChainInBBOrder(Chain &
C) {
166 sort(
C, [](
auto &
A,
auto &
B) {
return A.Inst->comesBefore(
B.Inst); });
169void sortChainInOffsetOrder(Chain &
C) {
170 sort(
C, [](
const auto &
A,
const auto &
B) {
171 if (
A.OffsetFromLeader !=
B.OffsetFromLeader)
172 return A.OffsetFromLeader.slt(
B.OffsetFromLeader);
173 return A.Inst->comesBefore(
B.Inst);
178 for (
const auto &
E :
C) {
179 dbgs() <<
" " << *
E.Inst <<
" (offset " <<
E.OffsetFromLeader <<
")\n";
183using EquivalenceClassMap =
187constexpr unsigned StackAdjustedAlignment = 4;
191 for (
const ChainElem &
E :
C)
197 const LoadInst *LI = dyn_cast<LoadInst>(
I);
198 return LI !=
nullptr && LI->
hasMetadata(LLVMContext::MD_invariant_load);
208 while (!Worklist.
empty()) {
211 for (
int i = 0; i < NumOperands; i++) {
213 if (!IM || IM->
getOpcode() == Instruction::PHI)
222 InstructionsToMove.
insert(IM);
229 for (
auto BBI =
I->getIterator(),
E =
I->getParent()->end(); BBI !=
E;) {
231 if (!InstructionsToMove.
count(IM))
256 :
F(
F), AA(AA), AC(AC), DT(DT), SE(SE),
TTI(
TTI),
262 static const unsigned MaxDepth = 3;
271 bool runOnEquivalenceClass(
const EqClassKey &EqClassKey,
277 bool runOnChain(Chain &
C);
282 std::vector<Chain> splitChainByContiguity(Chain &
C);
287 std::vector<Chain> splitChainByMayAliasInstrs(Chain &
C);
291 std::vector<Chain> splitChainByAlignment(Chain &
C);
295 bool vectorizeChain(Chain &
C);
298 std::optional<APInt> getConstantOffset(
Value *PtrA,
Value *PtrB,
301 std::optional<APInt> getConstantOffsetComplexAddrs(
Value *PtrA,
Value *PtrB,
304 std::optional<APInt> getConstantOffsetSelects(
Value *PtrA,
Value *PtrB,
311 Type *getChainElemTy(
const Chain &
C);
320 template <
bool IsLoadChain>
341class LoadStoreVectorizerLegacyPass :
public FunctionPass {
353 return "GPU Load and Store Vectorizer";
368char LoadStoreVectorizerLegacyPass::ID = 0;
371 "Vectorize load and Store instructions",
false,
false)
382 return new LoadStoreVectorizerLegacyPass();
385bool LoadStoreVectorizerLegacyPass::runOnFunction(
Function &
F) {
387 if (skipFunction(
F) ||
F.hasFnAttribute(Attribute::NoImplicitFloat))
390 AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
391 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
392 ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
394 getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
F);
397 getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
F);
399 return Vectorizer(
F, AA, AC, DT, SE,
TTI).run();
405 if (
F.hasFnAttribute(Attribute::NoImplicitFloat))
414 bool Changed = Vectorizer(
F, AA, AC, DT, SE,
TTI).
run();
420bool Vectorizer::run() {
421 bool Changed =
false;
447 for (
auto It = Barriers.
begin(),
End = std::prev(Barriers.
end()); It !=
End;
449 Changed |= runOnPseudoBB(*It, *std::next(It));
454 I->eraseFromParent();
466 dbgs() <<
"LSV: Running on pseudo-BB [" << *Begin <<
" ... ";
467 if (
End != Begin->getParent()->end())
470 dbgs() <<
"<BB end>";
474 bool Changed =
false;
475 for (
const auto &[EqClassKey, EqClass] :
476 collectEquivalenceClasses(Begin,
End))
477 Changed |= runOnEquivalenceClass(EqClassKey, EqClass);
482bool Vectorizer::runOnEquivalenceClass(
const EqClassKey &EqClassKey,
484 bool Changed =
false;
487 dbgs() <<
"LSV: Running on equivalence class of size " << EqClass.
size()
488 <<
" keyed on " << EqClassKey <<
":\n";
490 dbgs() <<
" " << *
I <<
"\n";
493 std::vector<Chain> Chains = gatherChains(EqClass);
495 <<
" nontrivial chains.\n";);
496 for (Chain &
C : Chains)
497 Changed |= runOnChain(
C);
501bool Vectorizer::runOnChain(Chain &
C) {
503 dbgs() <<
"LSV: Running on chain with " <<
C.size() <<
" instructions:\n";
513 bool Changed =
false;
514 for (
auto &
C : splitChainByMayAliasInstrs(
C))
515 for (
auto &
C : splitChainByContiguity(
C))
516 for (
auto &
C : splitChainByAlignment(
C))
517 Changed |= vectorizeChain(
C);
521std::vector<Chain> Vectorizer::splitChainByMayAliasInstrs(Chain &
C) {
525 sortChainInBBOrder(
C);
528 dbgs() <<
"LSV: splitChainByMayAliasInstrs considering chain:\n";
536 for (
const auto &
E :
C)
537 ChainOffsets.insert({&*
E.Inst,
E.OffsetFromLeader});
550 auto Impl = [&](
auto IsLoad) {
552 auto [ChainBegin, ChainEnd] = [&](
auto IsLoad) {
553 if constexpr (IsLoad())
554 return std::make_pair(
C.begin(),
C.end());
556 return std::make_pair(
C.rbegin(),
C.rend());
558 assert(ChainBegin != ChainEnd);
560 std::vector<Chain> Chains;
563 for (
auto ChainIt = std::next(ChainBegin); ChainIt != ChainEnd; ++ChainIt) {
564 if (isSafeToMove<IsLoad>(ChainIt->Inst, NewChain.
front().Inst,
566 LLVM_DEBUG(
dbgs() <<
"LSV: No intervening may-alias instrs; can merge "
567 << *ChainIt->Inst <<
" into " << *ChainBegin->Inst
572 dbgs() <<
"LSV: Found intervening may-alias instrs; cannot merge "
573 << *ChainIt->Inst <<
" into " << *ChainBegin->Inst <<
"\n");
574 if (NewChain.
size() > 1) {
576 dbgs() <<
"LSV: got nontrivial chain without aliasing instrs:\n";
579 Chains.push_back(std::move(NewChain));
586 if (NewChain.
size() > 1) {
588 dbgs() <<
"LSV: got nontrivial chain without aliasing instrs:\n";
591 Chains.push_back(std::move(NewChain));
596 if (isa<LoadInst>(
C[0].Inst))
597 return Impl(std::bool_constant<true>());
599 assert(isa<StoreInst>(
C[0].Inst));
600 return Impl(std::bool_constant<false>());
603std::vector<Chain> Vectorizer::splitChainByContiguity(Chain &
C) {
607 sortChainInOffsetOrder(
C);
610 dbgs() <<
"LSV: splitChainByContiguity considering chain:\n";
614 std::vector<Chain>
Ret;
615 Ret.push_back({
C.front()});
617 for (
auto It = std::next(
C.begin()),
End =
C.end(); It !=
End; ++It) {
619 auto &CurChain =
Ret.back();
620 const ChainElem &Prev = CurChain.back();
622 assert(SzBits % 8 == 0 &&
"Non-byte sizes should have been filtered out by "
623 "collectEquivalenceClass");
624 APInt PrevReadEnd = Prev.OffsetFromLeader + SzBits / 8;
627 bool AreContiguous = It->OffsetFromLeader == PrevReadEnd;
629 << (AreContiguous ?
"" :
"not ") <<
"contiguous: "
630 << *Prev.Inst <<
" (ends at offset " << PrevReadEnd
631 <<
") -> " << *It->Inst <<
" (starts at offset "
632 << It->OffsetFromLeader <<
")\n");
634 CurChain.push_back(*It);
636 Ret.push_back({*It});
640 llvm::erase_if(Ret, [](
const auto &Chain) {
return Chain.size() <= 1; });
644Type *Vectorizer::getChainElemTy(
const Chain &
C) {
657 if (
any_of(
C, [](
const ChainElem &
E) {
665 for (
const ChainElem &
E :
C)
671std::vector<Chain> Vectorizer::splitChainByAlignment(Chain &
C) {
684 sortChainInOffsetOrder(
C);
687 dbgs() <<
"LSV: splitChainByAlignment considering chain:\n";
691 bool IsLoadChain = isa<LoadInst>(
C[0].Inst);
692 auto getVectorFactor = [&](
unsigned VF,
unsigned LoadStoreSize,
695 ChainSizeBytes, VecTy)
697 ChainSizeBytes, VecTy);
701 for (
const auto &
E :
C) {
704 "Should have filtered out non-power-of-two elements in "
705 "collectEquivalenceClasses.");
712 std::vector<Chain>
Ret;
713 for (
unsigned CBegin = 0; CBegin <
C.size(); ++CBegin) {
718 for (
unsigned CEnd = CBegin + 1,
Size =
C.size(); CEnd <
Size; ++CEnd) {
719 APInt Sz =
C[CEnd].OffsetFromLeader +
721 C[CBegin].OffsetFromLeader;
722 if (Sz.
sgt(VecRegBytes))
724 CandidateChains.push_back(
729 for (
auto It = CandidateChains.rbegin(),
End = CandidateChains.rend();
731 auto [CEnd, SizeBytes] = *It;
733 dbgs() <<
"LSV: splitChainByAlignment considering candidate chain ["
734 << *
C[CBegin].Inst <<
" ... " << *
C[CEnd].Inst <<
"]\n");
736 Type *VecElemTy = getChainElemTy(
C);
740 unsigned VecElemBits =
DL.getTypeSizeInBits(VecElemTy);
743 assert((8 * SizeBytes) % VecElemBits == 0);
744 unsigned NumVecElems = 8 * SizeBytes / VecElemBits;
746 unsigned VF = 8 * VecRegBytes / VecElemBits;
749 unsigned TargetVF = getVectorFactor(VF, VecElemBits,
750 VecElemBits * NumVecElems / 8, VecTy);
751 if (TargetVF != VF && TargetVF < NumVecElems) {
753 dbgs() <<
"LSV: splitChainByAlignment discarding candidate chain "
755 << TargetVF <<
" != VF=" << VF
756 <<
" and TargetVF < NumVecElems=" << NumVecElems <<
"\n");
764 auto IsAllowedAndFast = [&, SizeBytes = SizeBytes, &
TTI =
TTI,
766 if (Alignment.value() % SizeBytes == 0)
768 unsigned VectorizedSpeed = 0;
770 F.getContext(), SizeBytes * 8, AS, Alignment, &VectorizedSpeed);
771 if (!AllowsMisaligned) {
773 <<
"LSV: Access of " << SizeBytes <<
"B in addrspace "
774 << AS <<
" with alignment " << Alignment.value()
775 <<
" is misaligned, and therefore can't be vectorized.\n");
779 unsigned ElementwiseSpeed = 0;
780 (
TTI).allowsMisalignedMemoryAccesses((
F).getContext(), VecElemBits, AS,
781 Alignment, &ElementwiseSpeed);
782 if (VectorizedSpeed < ElementwiseSpeed) {
784 <<
"LSV: Access of " << SizeBytes <<
"B in addrspace "
785 << AS <<
" with alignment " << Alignment.value()
786 <<
" has relative speed " << VectorizedSpeed
787 <<
", which is lower than the elementwise speed of "
789 <<
". Therefore this access won't be vectorized.\n");
805 bool IsAllocaAccess = AS ==
DL.getAllocaAddrSpace() &&
808 Align PrefAlign =
Align(StackAdjustedAlignment);
809 if (IsAllocaAccess && Alignment.
value() % SizeBytes != 0 &&
810 IsAllowedAndFast(PrefAlign)) {
812 PtrOperand, PrefAlign,
DL,
C[CBegin].Inst,
nullptr, &DT);
813 if (NewAlign >= Alignment) {
815 <<
"LSV: splitByChain upgrading alloca alignment from "
816 << Alignment.
value() <<
" to " << NewAlign.
value()
818 Alignment = NewAlign;
822 if (!IsAllowedAndFast(Alignment)) {
824 dbgs() <<
"LSV: splitChainByAlignment discarding candidate chain "
825 "because its alignment is not AllowedAndFast: "
826 << Alignment.
value() <<
"\n");
835 dbgs() <<
"LSV: splitChainByAlignment discarding candidate chain "
836 "because !isLegalToVectorizeLoad/StoreChain.");
841 Chain &NewChain =
Ret.emplace_back();
842 for (
unsigned I = CBegin;
I <= CEnd; ++
I)
843 NewChain.push_back(
C[
I]);
851bool Vectorizer::vectorizeChain(Chain &
C) {
855 sortChainInOffsetOrder(
C);
858 dbgs() <<
"LSV: Vectorizing chain of " <<
C.size() <<
" instructions:\n";
862 Type *VecElemTy = getChainElemTy(
C);
863 bool IsLoadChain = isa<LoadInst>(
C[0].Inst);
865 unsigned ChainBytes = std::accumulate(
866 C.begin(),
C.end(), 0u, [&](
unsigned Bytes,
const ChainElem &
E) {
867 return Bytes + DL.getTypeStoreSize(getLoadStoreType(E.Inst));
869 assert(ChainBytes %
DL.getTypeStoreSize(VecElemTy) == 0);
873 VecElemTy, 8 * ChainBytes /
DL.getTypeSizeInBits(VecElemTy));
878 if (AS ==
DL.getAllocaAddrSpace()) {
879 Alignment = std::max(
887 for (
const ChainElem &
E :
C)
889 DL.getTypeStoreSize(VecElemTy));
897 std::min_element(
C.begin(),
C.end(), [](
const auto &
A,
const auto &
B) {
898 return A.Inst->comesBefore(B.Inst);
903 VecInst =
Builder.CreateAlignedLoad(VecTy,
908 for (
const ChainElem &
E :
C) {
912 if (
auto *VT = dyn_cast<FixedVectorType>(
T)) {
913 auto Mask = llvm::to_vector<8>(
914 llvm::seq<int>(VecIdx, VecIdx + VT->getNumElements()));
915 V =
Builder.CreateShuffleVector(VecInst, Mask,
I->getName());
916 VecIdx += VT->getNumElements();
922 if (
V->getType() !=
I->getType())
923 V =
Builder.CreateBitOrPointerCast(V,
I->getType());
924 I->replaceAllUsesWith(V);
950 std::max_element(
C.begin(),
C.end(), [](
auto &
A,
auto &
B) {
951 return A.Inst->comesBefore(B.Inst);
957 auto InsertElem = [&](
Value *
V) {
958 if (
V->getType() != VecElemTy)
959 V =
Builder.CreateBitOrPointerCast(V, VecElemTy);
960 Vec =
Builder.CreateInsertElement(Vec, V,
Builder.getInt32(VecIdx++));
962 for (
const ChainElem &
E :
C) {
963 auto I = cast<StoreInst>(
E.Inst);
966 for (
int J = 0, JE = VT->getNumElements(); J < JE; ++J) {
967 InsertElem(
Builder.CreateExtractElement(
I->getValueOperand(),
971 InsertElem(
I->getValueOperand());
977 VecInst =
Builder.CreateAlignedStore(
985 for (
const ChainElem &
E :
C)
986 ToErase.push_back(
E.Inst);
988 ++NumVectorInstructions;
989 NumScalarsVectorized +=
C.size();
993template <
bool IsLoadChain>
994bool Vectorizer::isSafeToMove(
998 << *ChainBegin <<
")\n");
1000 assert(isa<LoadInst>(ChainElem) == IsLoadChain);
1001 if (ChainElem == ChainBegin)
1006 if (isInvariantLoad(ChainElem))
1009 auto BBIt = std::next([&] {
1010 if constexpr (IsLoadChain)
1015 auto BBItEnd = std::next([&] {
1016 if constexpr (IsLoadChain)
1022 const APInt &ChainElemOffset = ChainOffsets.
at(ChainElem);
1023 const unsigned ChainElemSize =
1026 for (; BBIt != BBItEnd; ++BBIt) {
1029 if (!
I->mayReadOrWriteMemory())
1033 if (IsLoadChain && isa<LoadInst>(
I))
1037 if (!IsLoadChain && isInvariantLoad(
I))
1046 if (
auto OffsetIt = ChainOffsets.
find(
I); OffsetIt != ChainOffsets.
end()) {
1053 const APInt &IOffset = OffsetIt->second;
1055 if (IOffset == ChainElemOffset ||
1056 (IOffset.
sle(ChainElemOffset) &&
1057 (IOffset + IElemSize).sgt(ChainElemOffset)) ||
1058 (ChainElemOffset.
sle(IOffset) &&
1059 (ChainElemOffset + ChainElemSize).sgt(OffsetIt->second))) {
1065 dbgs() <<
"LSV: Found alias in chain: " << *
I <<
"\n";
1077 <<
" Aliasing instruction:\n"
1078 <<
" " << *
I <<
'\n'
1079 <<
" Aliased instruction and pointer:\n"
1080 <<
" " << *ChainElem <<
'\n'
1098 unsigned MatchingOpIdxB,
bool Signed) {
1099 LLVM_DEBUG(
dbgs() <<
"LSV: checkIfSafeAddSequence IdxDiff=" << IdxDiff
1100 <<
", AddOpA=" << *AddOpA <<
", MatchingOpIdxA="
1101 << MatchingOpIdxA <<
", AddOpB=" << *AddOpB
1102 <<
", MatchingOpIdxB=" << MatchingOpIdxB
1103 <<
", Signed=" <<
Signed <<
"\n");
1119 AddOpB->
getOpcode() == Instruction::Add &&
1123 Value *OtherOperandA = AddOpA->
getOperand(MatchingOpIdxA == 1 ? 0 : 1);
1124 Value *OtherOperandB = AddOpB->
getOperand(MatchingOpIdxB == 1 ? 0 : 1);
1125 Instruction *OtherInstrA = dyn_cast<Instruction>(OtherOperandA);
1126 Instruction *OtherInstrB = dyn_cast<Instruction>(OtherOperandB);
1128 if (OtherInstrB && OtherInstrB->
getOpcode() == Instruction::Add &&
1130 isa<ConstantInt>(OtherInstrB->
getOperand(1))) {
1132 cast<ConstantInt>(OtherInstrB->
getOperand(1))->getSExtValue();
1133 if (OtherInstrB->
getOperand(0) == OtherOperandA &&
1138 if (OtherInstrA && OtherInstrA->
getOpcode() == Instruction::Add &&
1140 isa<ConstantInt>(OtherInstrA->
getOperand(1))) {
1142 cast<ConstantInt>(OtherInstrA->
getOperand(1))->getSExtValue();
1143 if (OtherInstrA->
getOperand(0) == OtherOperandB &&
1149 if (OtherInstrA && OtherInstrB &&
1150 OtherInstrA->
getOpcode() == Instruction::Add &&
1151 OtherInstrB->
getOpcode() == Instruction::Add &&
1154 isa<ConstantInt>(OtherInstrA->
getOperand(1)) &&
1155 isa<ConstantInt>(OtherInstrB->
getOperand(1))) {
1157 cast<ConstantInt>(OtherInstrA->
getOperand(1))->getSExtValue();
1159 cast<ConstantInt>(OtherInstrB->
getOperand(1))->getSExtValue();
1168std::optional<APInt> Vectorizer::getConstantOffsetComplexAddrs(
1170 LLVM_DEBUG(
dbgs() <<
"LSV: getConstantOffsetComplexAddrs PtrA=" << *PtrA
1171 <<
" PtrB=" << *PtrB <<
" ContextInst=" << *ContextInst
1172 <<
" Depth=" <<
Depth <<
"\n");
1173 auto *GEPA = dyn_cast<GetElementPtrInst>(PtrA);
1174 auto *GEPB = dyn_cast<GetElementPtrInst>(PtrB);
1176 return getConstantOffsetSelects(PtrA, PtrB, ContextInst,
Depth);
1180 if (GEPA->getNumOperands() != GEPB->getNumOperands() ||
1181 GEPA->getPointerOperand() != GEPB->getPointerOperand())
1182 return std::nullopt;
1185 for (
unsigned I = 0,
E = GEPA->getNumIndices() - 1;
I <
E; ++
I) {
1187 return std::nullopt;
1196 return std::nullopt;
1201 if (!isa<SExtInst>(OpA) && !isa<ZExtInst>(OpA))
1202 return std::nullopt;
1204 bool Signed = isa<SExtInst>(OpA);
1208 OpB = dyn_cast<Instruction>(OpB->
getOperand(0));
1210 return std::nullopt;
1216 return std::nullopt;
1220 return std::nullopt;
1223 LLVM_DEBUG(
dbgs() <<
"LSV: getConstantOffsetComplexAddrs IdxDiff=" << IdxDiff
1231 if (OpB->
getOpcode() == Instruction::Add &&
1233 IdxDiff.
sle(cast<ConstantInt>(OpB->
getOperand(1))->getSExtValue()) &&
1239 OpA = dyn_cast<Instruction>(ValA);
1240 if (!Safe && OpA && OpA->
getOpcode() == Instruction::Add &&
1246 for (
unsigned MatchingOpIdxA : {0, 1})
1247 for (
unsigned MatchingOpIdxB : {0, 1})
1271 if (BitsAllowedToBeSet.
ult(IdxDiff.
abs()))
1272 return std::nullopt;
1277 return IdxDiff * Stride;
1278 return std::nullopt;
1281std::optional<APInt> Vectorizer::getConstantOffsetSelects(
1284 return std::nullopt;
1286 if (
auto *SelectA = dyn_cast<SelectInst>(PtrA)) {
1287 if (
auto *SelectB = dyn_cast<SelectInst>(PtrB)) {
1288 if (SelectA->getCondition() != SelectB->getCondition())
1289 return std::nullopt;
1290 LLVM_DEBUG(
dbgs() <<
"LSV: getConstantOffsetSelects, PtrA=" << *PtrA
1291 <<
", PtrB=" << *PtrB <<
", ContextInst="
1292 << *ContextInst <<
", Depth=" <<
Depth <<
"\n");
1293 std::optional<APInt> TrueDiff = getConstantOffset(
1294 SelectA->getTrueValue(), SelectB->getTrueValue(), ContextInst,
Depth);
1295 if (!TrueDiff.has_value())
1296 return std::nullopt;
1297 std::optional<APInt> FalseDiff =
1298 getConstantOffset(SelectA->getFalseValue(), SelectB->getFalseValue(),
1299 ContextInst,
Depth);
1300 if (TrueDiff == FalseDiff)
1304 return std::nullopt;
1310 EquivalenceClassMap
Ret;
1314 if (
const auto *Sel = dyn_cast<SelectInst>(ObjPtr)) {
1321 return Sel->getCondition();
1327 auto *LI = dyn_cast<LoadInst>(&
I);
1328 auto *
SI = dyn_cast<StoreInst>(&
I);
1332 if ((LI && !LI->
isSimple()) || (SI && !
SI->isSimple()))
1345 unsigned TySize =
DL.getTypeSizeInBits(Ty);
1346 if ((TySize % 8) != 0)
1357 unsigned AS =
Ptr->getType()->getPointerAddressSpace();
1360 unsigned VF = VecRegSize / TySize;
1361 VectorType *VecTy = dyn_cast<VectorType>(Ty);
1365 (VecTy && !
isPowerOf2_32(
DL.getTypeSizeInBits(VecTy->getScalarType()))))
1369 if (TySize > VecRegSize / 2 ||
1387 unsigned ASPtrBits =
DL.getIndexSizeInBits(AS);
1391 for (
size_t I = 1;
I < Instrs.
size(); ++
I) {
1392 assert(Instrs[
I - 1]->comesBefore(Instrs[
I]));
1401 struct InstrListElem :
ilist_node<InstrListElem>,
1402 std::pair<Instruction *, Chain> {
1406 struct InstrListElemDenseMapInfo {
1409 static InstrListElem *getEmptyKey() {
return PtrInfo::getEmptyKey(); }
1410 static InstrListElem *getTombstoneKey() {
1411 return PtrInfo::getTombstoneKey();
1413 static unsigned getHashValue(
const InstrListElem *
E) {
1414 return IInfo::getHashValue(
E->first);
1416 static bool isEqual(
const InstrListElem *
A,
const InstrListElem *
B) {
1417 if (
A == getEmptyKey() ||
B == getEmptyKey())
1418 return A == getEmptyKey() &&
B == getEmptyKey();
1419 if (
A == getTombstoneKey() ||
B == getTombstoneKey())
1420 return A == getTombstoneKey() &&
B == getTombstoneKey();
1421 return IInfo::isEqual(
A->first,
B->first);
1432 constexpr int MaxChainsToTry = 64;
1434 bool MatchFound =
false;
1435 auto ChainIter = MRU.
begin();
1436 for (
size_t J = 0; J < MaxChainsToTry && ChainIter != MRU.
end();
1438 std::optional<APInt>
Offset = getConstantOffset(
1442 (ChainIter->first->comesBefore(
I) ?
I : ChainIter->first));
1443 if (
Offset.has_value()) {
1446 ChainIter->second.push_back(ChainElem{
I,
Offset.value()});
1456 APInt ZeroOffset(ASPtrBits, 0);
1457 InstrListElem *
E =
new (
Allocator.Allocate()) InstrListElem(
I);
1458 E->second.push_back(ChainElem{
I, ZeroOffset});
1464 std::vector<Chain>
Ret;
1468 if (
E.second.size() > 1)
1469 Ret.push_back(std::move(
E.second));
1473std::optional<APInt> Vectorizer::getConstantOffset(
Value *PtrA,
Value *PtrB,
1477 <<
", PtrB=" << *PtrB <<
", ContextInst= " << *ContextInst
1478 <<
", Depth=" <<
Depth <<
"\n");
1481 unsigned OrigBitWidth =
DL.getIndexTypeSizeInBits(PtrA->
getType());
1482 APInt OffsetA(OrigBitWidth, 0);
1483 APInt OffsetB(OrigBitWidth, 0);
1486 unsigned NewPtrBitWidth =
DL.getTypeStoreSizeInBits(PtrA->
getType());
1487 if (NewPtrBitWidth !=
DL.getTypeStoreSizeInBits(PtrB->
getType()))
1488 return std::nullopt;
1493 assert(OffsetA.getSignificantBits() <= NewPtrBitWidth &&
1494 OffsetB.getSignificantBits() <= NewPtrBitWidth);
1496 OffsetA = OffsetA.sextOrTrunc(NewPtrBitWidth);
1497 OffsetB = OffsetB.sextOrTrunc(NewPtrBitWidth);
1499 return (OffsetB - OffsetA).sextOrTrunc(OrigBitWidth);
1504 LLVM_DEBUG(
dbgs() <<
"LSV: SCEV PtrB - PtrA =" << *DistScev <<
"\n");
1510 return (OffsetB - OffsetA + Dist).
sextOrTrunc(OrigBitWidth);
1513 std::optional<APInt> Diff =
1514 getConstantOffsetComplexAddrs(PtrA, PtrB, ContextInst,
Depth);
1515 if (Diff.has_value())
1516 return (OffsetB - OffsetA + Diff->sext(OffsetB.getBitWidth()))
1517 .sextOrTrunc(OrigBitWidth);
1518 return std::nullopt;
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file implements a class to represent arbitrary precision integral constant values and operations...
static bool isEqual(const Function &Caller, const Function &Callee)
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
static const unsigned MaxDepth
Select target instructions out of generic instructions
static bool checkNoWrapFlags(Instruction *I, bool Signed)
static bool checkIfSafeAddSequence(const APInt &IdxDiff, Instruction *AddOpA, unsigned MatchingOpIdxA, Instruction *AddOpB, unsigned MatchingOpIdxB, bool Signed)
This file implements a map that provides insertion order iteration.
This file provides utility analysis objects describing memory locations.
Module.h This file contains the declarations for the Module class.
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Provides some synthesis utilities to produce sequences of values.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static bool isSafeToMove(const MachineOperand *Def, const MachineOperand *Use, const MachineInstr *Insert, const WebAssemblyFunctionInfo &MFI, const MachineRegisterInfo &MRI)
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Check whether or not an instruction may read or write the optionally specified memory location.
Class for arbitrary precision integers.
void clearBit(unsigned BitPosition)
Set a given bit to 0.
APInt zext(unsigned width) const
Zero extend to a new width.
APInt abs() const
Get the absolute value.
bool sgt(const APInt &RHS) const
Signed greater than comparison.
unsigned getBitWidth() const
Return the number of bits in the APInt.
bool ult(const APInt &RHS) const
Unsigned less than comparison.
bool sle(const APInt &RHS) const
Signed less or equal comparison.
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
bool sge(const APInt &RHS) const
Signed greater or equal comparison.
int64_t getSExtValue() const
Get sign extended value.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
A function analysis which provides an AssumptionCache.
AssumptionCache run(Function &F, FunctionAnalysisManager &)
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
InstListType::reverse_iterator reverse_iterator
InstListType::iterator iterator
Instruction iterators...
Represents analyses that only rely on functions' control flow.
This class represents a range of values.
const APInt * getSingleElement() const
If this set contains a single element, return it, otherwise return null.
bool isSingleElement() const
Return true if this set contains exactly one member.
A parsed version of the target data layout string in and methods for querying it.
iterator find(const_arg_type_t< KeyT > Val)
const ValueT & at(const_arg_type_t< KeyT > Val) const
at - Return the entry for the specified key, or abort if no such entry exists.
Implements a dense probed hash-table based set.
Analysis pass which computes a DominatorTree.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Class to represent fixed width SIMD vectors.
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
FunctionPass class - This class is used to implement most global optimizations.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
Legacy wrapper pass to provide the GlobalsAAResult object.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
bool hasNoUnsignedWrap() const LLVM_READONLY
Determine whether the no unsigned wrap flag is set.
bool hasNoSignedWrap() const LLVM_READONLY
Determine whether the no signed wrap flag is set.
bool hasMetadata() const
Return true if this instruction has any metadata attached to it.
const BasicBlock * getParent() const
bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
An instruction for reading from memory.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
This class implements a map that also provides access to all stored values in a deterministic order.
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void preserveSet()
Mark an analysis set as preserved.
Legacy wrapper pass to provide the SCEVAAResult object.
This class represents an analyzed expression in the program.
Analysis pass that exposes the ScalarEvolution for a function.
The main scalar evolution driver.
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
ConstantRange getSignedRange(const SCEV *S)
Determine the signed range for a particular SCEV.
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
const SCEV * getCouldNotCompute()
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A BumpPtrAllocator that allows only elements of a specific type to be allocated.
StringRef - Represent a constant reference to a string, i.e.
Analysis pass providing the TargetTransformInfo.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
bool isPointerTy() const
True if this is an instance of PointerType.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
std::pair< iterator, bool > insert(const ValueT &V)
Value * getOperand() const
Type * getIndexedType() const
This class implements an extremely fast bulk output stream that can only output to a stream.
A simple intrusive list implementation.
void push_front(reference Node)
Insert a node at the front; never copies.
void remove(reference N)
Remove a node by reference; never deletes.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
This is an optimization pass for GlobalISel generic memory operations.
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Pass * createLoadStoreVectorizerPass()
Create a legacy pass manager instance of the LoadStoreVectorizer pass.
unsigned getLoadStoreAddressSpace(Value *I)
A helper function that returns the address space of the pointer operand of load or store instruction.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value,...
iterator_range< po_iterator< T > > post_order(const T &G)
Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Align getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
bool isModSet(const ModRefInfo MRI)
void sort(IteratorTy Start, IteratorTy End)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool isModOrRefSet(const ModRefInfo MRI)
Align getLoadStoreAlignment(Value *I)
A helper function that returns the alignment of load or store instruction.
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
constexpr unsigned BitWidth
bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
gep_type_iterator gep_type_begin(const User *GEP)
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
void initializeLoadStoreVectorizerLegacyPassPass(PassRegistry &)
Type * getLoadStoreType(Value *I)
A helper function that returns the type of a load or store instruction.
Implement std::hash so that hash_code can be used in STL containers.
This struct is a compact representation of a valid (non-zero power of two) alignment.
uint64_t value() const
This is a hole in the type system and should not be abused.
An information struct used to provide DenseMap with the various necessary components for a given valu...
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.