114#include <type_traits>
120#define DEBUG_TYPE "load-store-vectorizer"
122STATISTIC(NumVectorInstructions,
"Number of vector accesses generated");
123STATISTIC(NumScalarsVectorized,
"Number of scalar accesses vectorized");
133 std::tuple<
const Value * ,
139 const EqClassKey &K) {
142 <<
" of element size " << ElementSize <<
" bits in addrspace "
159 APInt OffsetFromLeader;
163void sortChainInBBOrder(Chain &
C) {
164 sort(
C, [](
auto &
A,
auto &
B) {
return A.Inst->comesBefore(
B.Inst); });
167void sortChainInOffsetOrder(Chain &
C) {
168 sort(
C, [](
const auto &
A,
const auto &
B) {
169 if (
A.OffsetFromLeader !=
B.OffsetFromLeader)
170 return A.OffsetFromLeader.slt(
B.OffsetFromLeader);
171 return A.Inst->comesBefore(
B.Inst);
176 for (
const auto &E :
C) {
177 dbgs() <<
" " << *E.Inst <<
" (offset " << E.OffsetFromLeader <<
")\n";
181using EquivalenceClassMap =
185constexpr unsigned StackAdjustedAlignment = 4;
189 for (
const ChainElem &E :
C)
195 const LoadInst *LI = dyn_cast<LoadInst>(
I);
196 return LI !=
nullptr && LI->
hasMetadata(LLVMContext::MD_invariant_load);
206 while (!Worklist.
empty()) {
209 for (
int i = 0; i < NumOperands; i++) {
211 if (!IM || IM->
getOpcode() == Instruction::PHI)
220 InstructionsToMove.
insert(IM);
227 for (
auto BBI =
I->getIterator(), E =
I->getParent()->end(); BBI != E;) {
229 if (!InstructionsToMove.
count(IM))
254 :
F(
F), AA(AA), AC(AC), DT(DT), SE(SE),
TTI(
TTI),
255 DL(
F.
getParent()->getDataLayout()), Builder(SE.getContext()) {}
260 static const unsigned MaxDepth = 3;
269 bool runOnEquivalenceClass(
const EqClassKey &EqClassKey,
275 bool runOnChain(Chain &
C);
280 std::vector<Chain> splitChainByContiguity(Chain &
C);
285 std::vector<Chain> splitChainByMayAliasInstrs(Chain &
C);
289 std::vector<Chain> splitChainByAlignment(Chain &
C);
293 bool vectorizeChain(Chain &
C);
296 std::optional<APInt> getConstantOffset(
Value *PtrA,
Value *PtrB,
299 std::optional<APInt> getConstantOffsetComplexAddrs(
Value *PtrA,
Value *PtrB,
302 std::optional<APInt> getConstantOffsetSelects(
Value *PtrA,
Value *PtrB,
309 Type *getChainElemTy(
const Chain &
C);
318 template <
bool IsLoadChain>
339class LoadStoreVectorizerLegacyPass :
public FunctionPass {
351 return "GPU Load and Store Vectorizer";
366char LoadStoreVectorizerLegacyPass::ID = 0;
369 "Vectorize load and Store instructions",
false,
false)
380 return new LoadStoreVectorizerLegacyPass();
383bool LoadStoreVectorizerLegacyPass::runOnFunction(
Function &
F) {
385 if (skipFunction(
F) ||
F.hasFnAttribute(Attribute::NoImplicitFloat))
388 AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
389 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
390 ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
392 getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
F);
395 getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
F);
397 return Vectorizer(
F, AA, AC, DT, SE,
TTI).run();
403 if (
F.hasFnAttribute(Attribute::NoImplicitFloat))
412 bool Changed = Vectorizer(
F, AA, AC, DT, SE,
TTI).
run();
418bool Vectorizer::run() {
419 bool Changed =
false;
445 for (
auto It = Barriers.
begin(),
End = std::prev(Barriers.
end()); It !=
End;
447 Changed |= runOnPseudoBB(*It, *std::next(It));
452 I->eraseFromParent();
464 dbgs() <<
"LSV: Running on pseudo-BB [" << *Begin <<
" ... ";
465 if (
End != Begin->getParent()->end())
468 dbgs() <<
"<BB end>";
472 bool Changed =
false;
473 for (
const auto &[EqClassKey, EqClass] :
474 collectEquivalenceClasses(Begin,
End))
475 Changed |= runOnEquivalenceClass(EqClassKey, EqClass);
480bool Vectorizer::runOnEquivalenceClass(
const EqClassKey &EqClassKey,
482 bool Changed =
false;
485 dbgs() <<
"LSV: Running on equivalence class of size " << EqClass.
size()
486 <<
" keyed on " << EqClassKey <<
":\n";
488 dbgs() <<
" " << *
I <<
"\n";
491 std::vector<Chain> Chains = gatherChains(EqClass);
493 <<
" nontrivial chains.\n";);
494 for (Chain &
C : Chains)
495 Changed |= runOnChain(
C);
499bool Vectorizer::runOnChain(Chain &
C) {
501 dbgs() <<
"LSV: Running on chain with " <<
C.size() <<
" instructions:\n";
511 bool Changed =
false;
512 for (
auto &
C : splitChainByMayAliasInstrs(
C))
513 for (
auto &
C : splitChainByContiguity(
C))
514 for (
auto &
C : splitChainByAlignment(
C))
515 Changed |= vectorizeChain(
C);
519std::vector<Chain> Vectorizer::splitChainByMayAliasInstrs(Chain &
C) {
523 sortChainInBBOrder(
C);
526 dbgs() <<
"LSV: splitChainByMayAliasInstrs considering chain:\n";
534 for (
const auto &E :
C)
535 ChainOffsets.insert({&*E.Inst, E.OffsetFromLeader});
548 auto Impl = [&](
auto IsLoad) {
550 auto [ChainBegin, ChainEnd] = [&](
auto IsLoad) {
551 if constexpr (IsLoad())
552 return std::make_pair(
C.begin(),
C.end());
554 return std::make_pair(
C.rbegin(),
C.rend());
556 assert(ChainBegin != ChainEnd);
558 std::vector<Chain> Chains;
561 for (
auto ChainIt = std::next(ChainBegin); ChainIt != ChainEnd; ++ChainIt) {
562 if (isSafeToMove<IsLoad>(ChainIt->Inst, NewChain.
front().Inst,
564 LLVM_DEBUG(
dbgs() <<
"LSV: No intervening may-alias instrs; can merge "
565 << *ChainIt->Inst <<
" into " << *ChainBegin->Inst
570 dbgs() <<
"LSV: Found intervening may-alias instrs; cannot merge "
571 << *ChainIt->Inst <<
" into " << *ChainBegin->Inst <<
"\n");
572 if (NewChain.
size() > 1) {
574 dbgs() <<
"LSV: got nontrivial chain without aliasing instrs:\n";
577 Chains.push_back(std::move(NewChain));
584 if (NewChain.
size() > 1) {
586 dbgs() <<
"LSV: got nontrivial chain without aliasing instrs:\n";
589 Chains.push_back(std::move(NewChain));
594 if (isa<LoadInst>(
C[0].Inst))
595 return Impl(std::bool_constant<true>());
597 assert(isa<StoreInst>(
C[0].Inst));
598 return Impl(std::bool_constant<false>());
601std::vector<Chain> Vectorizer::splitChainByContiguity(Chain &
C) {
605 sortChainInOffsetOrder(
C);
608 dbgs() <<
"LSV: splitChainByContiguity considering chain:\n";
612 std::vector<Chain>
Ret;
613 Ret.push_back({
C.front()});
615 for (
auto It = std::next(
C.begin()),
End =
C.end(); It !=
End; ++It) {
617 auto &CurChain =
Ret.back();
618 const ChainElem &Prev = CurChain.back();
620 assert(SzBits % 8 == 0 &&
"Non-byte sizes should have been filtered out by "
621 "collectEquivalenceClass");
622 APInt PrevReadEnd = Prev.OffsetFromLeader + SzBits / 8;
625 bool AreContiguous = It->OffsetFromLeader == PrevReadEnd;
627 << (AreContiguous ?
"" :
"not ") <<
"contiguous: "
628 << *Prev.Inst <<
" (ends at offset " << PrevReadEnd
629 <<
") -> " << *It->Inst <<
" (starts at offset "
630 << It->OffsetFromLeader <<
")\n");
632 CurChain.push_back(*It);
634 Ret.push_back({*It});
638 llvm::erase_if(Ret, [](
const auto &Chain) {
return Chain.size() <= 1; });
642Type *Vectorizer::getChainElemTy(
const Chain &
C) {
655 if (
any_of(
C, [](
const ChainElem &E) {
663 for (
const ChainElem &E :
C)
669std::vector<Chain> Vectorizer::splitChainByAlignment(Chain &
C) {
682 sortChainInOffsetOrder(
C);
685 dbgs() <<
"LSV: splitChainByAlignment considering chain:\n";
689 bool IsLoadChain = isa<LoadInst>(
C[0].Inst);
690 auto getVectorFactor = [&](
unsigned VF,
unsigned LoadStoreSize,
693 ChainSizeBytes, VecTy)
695 ChainSizeBytes, VecTy);
699 for (
const auto &E :
C) {
702 "Should have filtered out non-power-of-two elements in "
703 "collectEquivalenceClasses.");
710 std::vector<Chain>
Ret;
711 for (
unsigned CBegin = 0; CBegin <
C.size(); ++CBegin) {
716 for (
unsigned CEnd = CBegin + 1,
Size =
C.size(); CEnd <
Size; ++CEnd) {
717 APInt Sz =
C[CEnd].OffsetFromLeader +
719 C[CBegin].OffsetFromLeader;
720 if (Sz.
sgt(VecRegBytes))
722 CandidateChains.push_back(
727 for (
auto It = CandidateChains.rbegin(),
End = CandidateChains.rend();
729 auto [CEnd, SizeBytes] = *It;
731 dbgs() <<
"LSV: splitChainByAlignment considering candidate chain ["
732 << *
C[CBegin].Inst <<
" ... " << *
C[CEnd].Inst <<
"]\n");
734 Type *VecElemTy = getChainElemTy(
C);
738 unsigned VecElemBits =
DL.getTypeSizeInBits(VecElemTy);
741 assert((8 * SizeBytes) % VecElemBits == 0);
742 unsigned NumVecElems = 8 * SizeBytes / VecElemBits;
744 unsigned VF = 8 * VecRegBytes / VecElemBits;
747 unsigned TargetVF = getVectorFactor(VF, VecElemBits,
748 VecElemBits * NumVecElems / 8, VecTy);
749 if (TargetVF != VF && TargetVF < NumVecElems) {
751 dbgs() <<
"LSV: splitChainByAlignment discarding candidate chain "
753 << TargetVF <<
" != VF=" << VF
754 <<
" and TargetVF < NumVecElems=" << NumVecElems <<
"\n");
762 auto IsAllowedAndFast = [&, SizeBytes = SizeBytes, &
TTI =
TTI,
764 if (Alignment.value() % SizeBytes == 0)
766 unsigned VectorizedSpeed = 0;
768 F.getContext(), SizeBytes * 8, AS, Alignment, &VectorizedSpeed);
769 if (!AllowsMisaligned) {
771 <<
"LSV: Access of " << SizeBytes <<
"B in addrspace "
772 << AS <<
" with alignment " << Alignment.value()
773 <<
" is misaligned, and therefore can't be vectorized.\n");
777 unsigned ElementwiseSpeed = 0;
778 (
TTI).allowsMisalignedMemoryAccesses((
F).getContext(), VecElemBits, AS,
779 Alignment, &ElementwiseSpeed);
780 if (VectorizedSpeed < ElementwiseSpeed) {
782 <<
"LSV: Access of " << SizeBytes <<
"B in addrspace "
783 << AS <<
" with alignment " << Alignment.value()
784 <<
" has relative speed " << VectorizedSpeed
785 <<
", which is lower than the elementwise speed of "
787 <<
". Therefore this access won't be vectorized.\n");
803 bool IsAllocaAccess = AS ==
DL.getAllocaAddrSpace() &&
806 Align PrefAlign =
Align(StackAdjustedAlignment);
807 if (IsAllocaAccess && Alignment.
value() % SizeBytes != 0 &&
808 IsAllowedAndFast(PrefAlign)) {
810 PtrOperand, PrefAlign,
DL,
C[CBegin].Inst,
nullptr, &DT);
811 if (NewAlign >= Alignment) {
813 <<
"LSV: splitByChain upgrading alloca alignment from "
814 << Alignment.
value() <<
" to " << NewAlign.
value()
816 Alignment = NewAlign;
820 if (!IsAllowedAndFast(Alignment)) {
822 dbgs() <<
"LSV: splitChainByAlignment discarding candidate chain "
823 "because its alignment is not AllowedAndFast: "
824 << Alignment.
value() <<
"\n");
833 dbgs() <<
"LSV: splitChainByAlignment discarding candidate chain "
834 "because !isLegalToVectorizeLoad/StoreChain.");
839 Chain &NewChain =
Ret.emplace_back();
840 for (
unsigned I = CBegin;
I <= CEnd; ++
I)
841 NewChain.push_back(
C[
I]);
849bool Vectorizer::vectorizeChain(Chain &
C) {
853 sortChainInOffsetOrder(
C);
856 dbgs() <<
"LSV: Vectorizing chain of " <<
C.size() <<
" instructions:\n";
860 Type *VecElemTy = getChainElemTy(
C);
861 bool IsLoadChain = isa<LoadInst>(
C[0].Inst);
863 unsigned ChainBytes = std::accumulate(
864 C.begin(),
C.end(), 0u, [&](
unsigned Bytes,
const ChainElem &E) {
865 return Bytes + DL.getTypeStoreSize(getLoadStoreType(E.Inst));
867 assert(ChainBytes %
DL.getTypeStoreSize(VecElemTy) == 0);
871 VecElemTy, 8 * ChainBytes /
DL.getTypeSizeInBits(VecElemTy));
876 if (AS ==
DL.getAllocaAddrSpace()) {
877 Alignment = std::max(
885 for (
const ChainElem &E :
C)
887 DL.getTypeStoreSize(VecElemTy));
894 Builder.SetInsertPoint(
896 return A.Inst->comesBefore(
B.Inst);
901 VecInst = Builder.CreateAlignedLoad(VecTy,
906 for (
const ChainElem &E :
C) {
910 if (
auto *VT = dyn_cast<FixedVectorType>(
T)) {
911 auto Mask = llvm::to_vector<8>(
912 llvm::seq<int>(VecIdx, VecIdx + VT->getNumElements()));
913 V = Builder.CreateShuffleVector(VecInst, Mask,
I->getName());
914 VecIdx += VT->getNumElements();
916 V = Builder.CreateExtractElement(VecInst, Builder.getInt32(VecIdx),
920 if (
V->getType() !=
I->getType())
921 V = Builder.CreateBitOrPointerCast(V,
I->getType());
922 I->replaceAllUsesWith(V);
948 return A.Inst->comesBefore(
B.Inst);
954 auto InsertElem = [&](
Value *
V) {
955 if (
V->getType() != VecElemTy)
956 V = Builder.CreateBitOrPointerCast(V, VecElemTy);
957 Vec = Builder.CreateInsertElement(Vec, V, Builder.getInt32(VecIdx++));
959 for (
const ChainElem &E :
C) {
960 auto I = cast<StoreInst>(E.Inst);
963 for (
int J = 0, JE = VT->getNumElements(); J < JE; ++J) {
964 InsertElem(Builder.CreateExtractElement(
I->getValueOperand(),
965 Builder.getInt32(J)));
968 InsertElem(
I->getValueOperand());
974 VecInst = Builder.CreateAlignedStore(
982 for (
const ChainElem &E :
C)
983 ToErase.push_back(E.Inst);
985 ++NumVectorInstructions;
986 NumScalarsVectorized +=
C.size();
990template <
bool IsLoadChain>
991bool Vectorizer::isSafeToMove(
995 << *ChainBegin <<
")\n");
997 assert(isa<LoadInst>(ChainElem) == IsLoadChain);
998 if (ChainElem == ChainBegin)
1003 if (isInvariantLoad(ChainElem))
1006 auto BBIt = std::next([&] {
1007 if constexpr (IsLoadChain)
1012 auto BBItEnd = std::next([&] {
1013 if constexpr (IsLoadChain)
1019 const APInt &ChainElemOffset = ChainOffsets.
at(ChainElem);
1020 const unsigned ChainElemSize =
1023 for (; BBIt != BBItEnd; ++BBIt) {
1026 if (!
I->mayReadOrWriteMemory())
1030 if (IsLoadChain && isa<LoadInst>(
I))
1034 if (!IsLoadChain && isInvariantLoad(
I))
1043 if (
auto OffsetIt = ChainOffsets.
find(
I); OffsetIt != ChainOffsets.
end()) {
1050 const APInt &IOffset = OffsetIt->second;
1052 if (IOffset == ChainElemOffset ||
1053 (IOffset.
sle(ChainElemOffset) &&
1054 (IOffset + IElemSize).sgt(ChainElemOffset)) ||
1055 (ChainElemOffset.
sle(IOffset) &&
1056 (ChainElemOffset + ChainElemSize).sgt(OffsetIt->second))) {
1062 dbgs() <<
"LSV: Found alias in chain: " << *
I <<
"\n";
1074 <<
" Aliasing instruction:\n"
1075 <<
" " << *
I <<
'\n'
1076 <<
" Aliased instruction and pointer:\n"
1077 <<
" " << *ChainElem <<
'\n'
1095 unsigned MatchingOpIdxB,
bool Signed) {
1096 LLVM_DEBUG(
dbgs() <<
"LSV: checkIfSafeAddSequence IdxDiff=" << IdxDiff
1097 <<
", AddOpA=" << *AddOpA <<
", MatchingOpIdxA="
1098 << MatchingOpIdxA <<
", AddOpB=" << *AddOpB
1099 <<
", MatchingOpIdxB=" << MatchingOpIdxB
1100 <<
", Signed=" <<
Signed <<
"\n");
1116 AddOpB->
getOpcode() == Instruction::Add &&
1120 Value *OtherOperandA = AddOpA->
getOperand(MatchingOpIdxA == 1 ? 0 : 1);
1121 Value *OtherOperandB = AddOpB->
getOperand(MatchingOpIdxB == 1 ? 0 : 1);
1122 Instruction *OtherInstrA = dyn_cast<Instruction>(OtherOperandA);
1123 Instruction *OtherInstrB = dyn_cast<Instruction>(OtherOperandB);
1125 if (OtherInstrB && OtherInstrB->
getOpcode() == Instruction::Add &&
1127 isa<ConstantInt>(OtherInstrB->
getOperand(1))) {
1129 cast<ConstantInt>(OtherInstrB->
getOperand(1))->getSExtValue();
1130 if (OtherInstrB->
getOperand(0) == OtherOperandA &&
1135 if (OtherInstrA && OtherInstrA->
getOpcode() == Instruction::Add &&
1137 isa<ConstantInt>(OtherInstrA->
getOperand(1))) {
1139 cast<ConstantInt>(OtherInstrA->
getOperand(1))->getSExtValue();
1140 if (OtherInstrA->
getOperand(0) == OtherOperandB &&
1146 if (OtherInstrA && OtherInstrB &&
1147 OtherInstrA->
getOpcode() == Instruction::Add &&
1148 OtherInstrB->
getOpcode() == Instruction::Add &&
1151 isa<ConstantInt>(OtherInstrA->
getOperand(1)) &&
1152 isa<ConstantInt>(OtherInstrB->
getOperand(1))) {
1154 cast<ConstantInt>(OtherInstrA->
getOperand(1))->getSExtValue();
1156 cast<ConstantInt>(OtherInstrB->
getOperand(1))->getSExtValue();
1165std::optional<APInt> Vectorizer::getConstantOffsetComplexAddrs(
1167 LLVM_DEBUG(
dbgs() <<
"LSV: getConstantOffsetComplexAddrs PtrA=" << *PtrA
1168 <<
" PtrB=" << *PtrB <<
" ContextInst=" << *ContextInst
1169 <<
" Depth=" <<
Depth <<
"\n");
1170 auto *GEPA = dyn_cast<GetElementPtrInst>(PtrA);
1171 auto *GEPB = dyn_cast<GetElementPtrInst>(PtrB);
1173 return getConstantOffsetSelects(PtrA, PtrB, ContextInst,
Depth);
1177 if (GEPA->getNumOperands() != GEPB->getNumOperands() ||
1178 GEPA->getPointerOperand() != GEPB->getPointerOperand())
1179 return std::nullopt;
1182 for (
unsigned I = 0, E = GEPA->getNumIndices() - 1;
I < E; ++
I) {
1184 return std::nullopt;
1193 return std::nullopt;
1198 if (!isa<SExtInst>(OpA) && !isa<ZExtInst>(OpA))
1199 return std::nullopt;
1201 bool Signed = isa<SExtInst>(OpA);
1205 OpB = dyn_cast<Instruction>(OpB->
getOperand(0));
1207 return std::nullopt;
1213 return std::nullopt;
1217 return std::nullopt;
1220 LLVM_DEBUG(
dbgs() <<
"LSV: getConstantOffsetComplexAddrs IdxDiff=" << IdxDiff
1228 if (OpB->
getOpcode() == Instruction::Add &&
1230 IdxDiff.
sle(cast<ConstantInt>(OpB->
getOperand(1))->getSExtValue()) &&
1236 OpA = dyn_cast<Instruction>(ValA);
1237 if (!Safe && OpA && OpA->
getOpcode() == Instruction::Add &&
1243 for (
unsigned MatchingOpIdxA : {0, 1})
1244 for (
unsigned MatchingOpIdxB : {0, 1})
1268 if (BitsAllowedToBeSet.
ult(IdxDiff.
abs()))
1269 return std::nullopt;
1274 return IdxDiff * Stride;
1275 return std::nullopt;
1278std::optional<APInt> Vectorizer::getConstantOffsetSelects(
1281 return std::nullopt;
1283 if (
auto *SelectA = dyn_cast<SelectInst>(PtrA)) {
1284 if (
auto *SelectB = dyn_cast<SelectInst>(PtrB)) {
1285 if (SelectA->getCondition() != SelectB->getCondition())
1286 return std::nullopt;
1287 LLVM_DEBUG(
dbgs() <<
"LSV: getConstantOffsetSelects, PtrA=" << *PtrA
1288 <<
", PtrB=" << *PtrB <<
", ContextInst="
1289 << *ContextInst <<
", Depth=" <<
Depth <<
"\n");
1290 std::optional<APInt> TrueDiff = getConstantOffset(
1291 SelectA->getTrueValue(), SelectB->getTrueValue(), ContextInst,
Depth);
1292 if (!TrueDiff.has_value())
1293 return std::nullopt;
1294 std::optional<APInt> FalseDiff =
1295 getConstantOffset(SelectA->getFalseValue(), SelectB->getFalseValue(),
1296 ContextInst,
Depth);
1297 if (TrueDiff == FalseDiff)
1301 return std::nullopt;
1307 EquivalenceClassMap
Ret;
1311 if (
const auto *Sel = dyn_cast<SelectInst>(ObjPtr)) {
1318 return Sel->getCondition();
1324 auto *LI = dyn_cast<LoadInst>(&
I);
1325 auto *
SI = dyn_cast<StoreInst>(&
I);
1329 if ((LI && !LI->
isSimple()) || (SI && !
SI->isSimple()))
1342 unsigned TySize =
DL.getTypeSizeInBits(Ty);
1343 if ((TySize % 8) != 0)
1354 unsigned AS =
Ptr->getType()->getPointerAddressSpace();
1357 unsigned VF = VecRegSize / TySize;
1358 VectorType *VecTy = dyn_cast<VectorType>(Ty);
1362 (VecTy && !
isPowerOf2_32(
DL.getTypeSizeInBits(VecTy->getScalarType()))))
1366 if (TySize > VecRegSize / 2 ||
1384 unsigned ASPtrBits =
DL.getIndexSizeInBits(AS);
1388 for (
size_t I = 1;
I < Instrs.
size(); ++
I) {
1389 assert(Instrs[
I - 1]->comesBefore(Instrs[
I]));
1398 struct InstrListElem :
ilist_node<InstrListElem>,
1399 std::pair<Instruction *, Chain> {
1403 struct InstrListElemDenseMapInfo {
1406 static InstrListElem *getEmptyKey() {
return PtrInfo::getEmptyKey(); }
1407 static InstrListElem *getTombstoneKey() {
1408 return PtrInfo::getTombstoneKey();
1410 static unsigned getHashValue(
const InstrListElem *
E) {
1411 return IInfo::getHashValue(
E->first);
1413 static bool isEqual(
const InstrListElem *
A,
const InstrListElem *
B) {
1414 if (
A == getEmptyKey() ||
B == getEmptyKey())
1415 return A == getEmptyKey() &&
B == getEmptyKey();
1416 if (
A == getTombstoneKey() ||
B == getTombstoneKey())
1417 return A == getTombstoneKey() &&
B == getTombstoneKey();
1418 return IInfo::isEqual(
A->first,
B->first);
1429 constexpr int MaxChainsToTry = 64;
1431 bool MatchFound =
false;
1432 auto ChainIter = MRU.
begin();
1433 for (
size_t J = 0; J < MaxChainsToTry && ChainIter != MRU.
end();
1435 std::optional<APInt>
Offset = getConstantOffset(
1439 (ChainIter->first->comesBefore(
I) ?
I : ChainIter->first));
1440 if (
Offset.has_value()) {
1443 ChainIter->second.push_back(ChainElem{
I,
Offset.value()});
1453 APInt ZeroOffset(ASPtrBits, 0);
1454 InstrListElem *E =
new (
Allocator.Allocate()) InstrListElem(
I);
1455 E->second.push_back(ChainElem{
I, ZeroOffset});
1461 std::vector<Chain>
Ret;
1465 if (E.second.size() > 1)
1466 Ret.push_back(std::move(E.second));
1470std::optional<APInt> Vectorizer::getConstantOffset(
Value *PtrA,
Value *PtrB,
1474 <<
", PtrB=" << *PtrB <<
", ContextInst= " << *ContextInst
1475 <<
", Depth=" <<
Depth <<
"\n");
1478 unsigned OrigBitWidth =
DL.getIndexTypeSizeInBits(PtrA->
getType());
1479 APInt OffsetA(OrigBitWidth, 0);
1480 APInt OffsetB(OrigBitWidth, 0);
1483 unsigned NewPtrBitWidth =
DL.getTypeStoreSizeInBits(PtrA->
getType());
1484 if (NewPtrBitWidth !=
DL.getTypeStoreSizeInBits(PtrB->
getType()))
1485 return std::nullopt;
1490 assert(OffsetA.getSignificantBits() <= NewPtrBitWidth &&
1491 OffsetB.getSignificantBits() <= NewPtrBitWidth);
1493 OffsetA = OffsetA.sextOrTrunc(NewPtrBitWidth);
1494 OffsetB = OffsetB.sextOrTrunc(NewPtrBitWidth);
1496 return (OffsetB - OffsetA).sextOrTrunc(OrigBitWidth);
1501 LLVM_DEBUG(
dbgs() <<
"LSV: SCEV PtrB - PtrA =" << *DistScev <<
"\n");
1507 return (OffsetB - OffsetA + Dist).
sextOrTrunc(OrigBitWidth);
1510 std::optional<APInt> Diff =
1511 getConstantOffsetComplexAddrs(PtrA, PtrB, ContextInst,
Depth);
1512 if (Diff.has_value())
1513 return (OffsetB - OffsetA + Diff->sext(OffsetB.getBitWidth()))
1514 .sextOrTrunc(OrigBitWidth);
1515 return std::nullopt;
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
AMDGPU Mark last scratch load
This file implements a class to represent arbitrary precision integral constant values and operations...
Expand Atomic instructions
static bool isEqual(const Function &Caller, const Function &Callee)
This file contains the simple types necessary to represent the attributes associated with functions a...
static const Function * getParent(const Value *V)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file defines the DenseMap class.
static const unsigned MaxDepth
static bool checkNoWrapFlags(Instruction *I, bool Signed)
static bool checkIfSafeAddSequence(const APInt &IdxDiff, Instruction *AddOpA, unsigned MatchingOpIdxA, Instruction *AddOpB, unsigned MatchingOpIdxB, bool Signed)
This file implements a map that provides insertion order iteration.
This file provides utility analysis objects describing memory locations.
Module.h This file contains the declarations for the Module class.
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Provides some synthesis utilities to produce sequences of values.
This file defines the SmallPtrSet class.
This file defines the SmallVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static bool isSafeToMove(const MachineOperand *Def, const MachineOperand *Use, const MachineInstr *Insert, const WebAssemblyFunctionInfo &MFI, const MachineRegisterInfo &MRI)
A manager for alias analyses.
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Check whether or not an instruction may read or write the optionally specified memory location.
Class for arbitrary precision integers.
void clearBit(unsigned BitPosition)
Set a given bit to 0.
APInt zext(unsigned width) const
Zero extend to a new width.
APInt abs() const
Get the absolute value.
bool sgt(const APInt &RHS) const
Signed greater than comparison.
unsigned getBitWidth() const
Return the number of bits in the APInt.
bool ult(const APInt &RHS) const
Unsigned less than comparison.
bool sle(const APInt &RHS) const
Signed less or equal comparison.
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
uint64_t getLimitedValue(uint64_t Limit=UINT64_MAX) const
If this value is smaller than the specified limit, return it, otherwise return the limit value.
bool sge(const APInt &RHS) const
Signed greater or equal comparison.
int64_t getSExtValue() const
Get sign extended value.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
A function analysis which provides an AssumptionCache.
AssumptionCache run(Function &F, FunctionAnalysisManager &)
An immutable pass that tracks lazily created AssumptionCache objects.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
InstListType::reverse_iterator reverse_iterator
InstListType::iterator iterator
Instruction iterators...
Represents analyses that only rely on functions' control flow.
This class represents a range of values.
const APInt * getSingleElement() const
If this set contains a single element, return it, otherwise return null.
bool isSingleElement() const
Return true if this set contains exactly one member.
A parsed version of the target data layout string in and methods for querying it.
iterator find(const_arg_type_t< KeyT > Val)
const ValueT & at(const_arg_type_t< KeyT > Val) const
at - Return the entry for the specified key, or abort if no such entry exists.
Implements a dense probed hash-table based set.
Analysis pass which computes a DominatorTree.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Class to represent fixed width SIMD vectors.
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
FunctionPass class - This class is used to implement most global optimizations.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
Legacy wrapper pass to provide the GlobalsAAResult object.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
bool hasNoUnsignedWrap() const LLVM_READONLY
Determine whether the no unsigned wrap flag is set.
bool hasNoSignedWrap() const LLVM_READONLY
Determine whether the no signed wrap flag is set.
bool hasMetadata() const
Return true if this instruction has any metadata attached to it.
const BasicBlock * getParent() const
bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
An instruction for reading from memory.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
This class implements a map that also provides access to all stored values in a deterministic order.
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Pass interface - Implemented by all 'passes'.
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void preserveSet()
Mark an analysis set as preserved.
Legacy wrapper pass to provide the SCEVAAResult object.
This class represents an analyzed expression in the program.
Analysis pass that exposes the ScalarEvolution for a function.
The main scalar evolution driver.
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
ConstantRange getSignedRange(const SCEV *S)
Determine the signed range for a particular SCEV.
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
const SCEV * getCouldNotCompute()
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
A BumpPtrAllocator that allows only elements of a specific type to be allocated.
StringRef - Represent a constant reference to a string, i.e.
Analysis pass providing the TargetTransformInfo.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
bool isPointerTy() const
True if this is an instance of PointerType.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Value * getOperand(unsigned i) const
unsigned getNumOperands() const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
std::pair< iterator, bool > insert(const ValueT &V)
TypeSize getSequentialElementStride(const DataLayout &DL) const
Value * getOperand() const
This class implements an extremely fast bulk output stream that can only output to a stream.
A simple intrusive list implementation.
void push_front(reference Node)
Insert a node at the front; never copies.
void remove(reference N)
Remove a node by reference; never deletes.
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
This is an optimization pass for GlobalISel generic memory operations.
auto min_element(R &&Range)
bool RecursivelyDeleteTriviallyDeadInstructions(Value *V, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
If the specified value is a trivially dead instruction, delete it.
Pass * createLoadStoreVectorizerPass()
Create a legacy pass manager instance of the LoadStoreVectorizer pass.
unsigned getLoadStoreAddressSpace(Value *I)
A helper function that returns the address space of the pointer operand of load or store instruction.
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value,...
iterator_range< po_iterator< T > > post_order(const T &G)
Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Align getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
bool isModSet(const ModRefInfo MRI)
void sort(IteratorTy Start, IteratorTy End)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool isModOrRefSet(const ModRefInfo MRI)
Align getLoadStoreAlignment(Value *I)
A helper function that returns the alignment of load or store instruction.
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
auto max_element(R &&Range)
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
constexpr unsigned BitWidth
bool isGuaranteedToTransferExecutionToSuccessor(const Instruction *I)
Return true if this function can prove that the instruction I will always transfer execution to one o...
gep_type_iterator gep_type_begin(const User *GEP)
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
void initializeLoadStoreVectorizerLegacyPassPass(PassRegistry &)
Type * getLoadStoreType(Value *I)
A helper function that returns the type of a load or store instruction.
Implement std::hash so that hash_code can be used in STL containers.
This struct is a compact representation of a valid (non-zero power of two) alignment.
uint64_t value() const
This is a hole in the type system and should not be abused.
An information struct used to provide DenseMap with the various necessary components for a given valu...
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.