52#define DEBUG_TYPE "scalarizer"
56 cl::desc(
"Allow the scalarizer pass to scalarize "
57 "insertelement/extractelement with variable index"));
64 cl::desc(
"Allow the scalarizer pass to scalarize loads and store"));
73 cl::desc(
"Instruct the scalarizer pass to attempt to keep values of a "
74 "minimum number of bits"));
80 if (isa<PHINode>(Itr))
96using ScatterMap = std::map<std::pair<Value *, Type *>, ValueVector>;
107 unsigned NumPacked = 0;
111 unsigned NumFragments = 0;
114 Type *SplitTy =
nullptr;
118 Type *RemainderTy =
nullptr;
120 Type *getFragmentType(
unsigned I)
const {
121 return RemainderTy &&
I == NumFragments - 1 ? RemainderTy : SplitTy;
129 Scatterer() =
default;
135 const VectorSplit &VS, ValueVector *cachePtr =
nullptr);
138 Value *operator[](
unsigned I);
141 unsigned size()
const {
return VS.NumFragments; }
149 ValueVector *CachePtr;
156 FCmpSplitter(
FCmpInst &fci) : FCI(fci) {}
160 return Builder.CreateFCmp(FCI.getPredicate(), Op0, Op1,
Name);
169 ICmpSplitter(
ICmpInst &ici) : ICI(ici) {}
173 return Builder.CreateICmp(ICI.getPredicate(), Op0, Op1,
Name);
181struct UnarySplitter {
193struct BinarySplitter {
198 return Builder.CreateBinOp(BO.getOpcode(), Op0, Op1,
Name);
209 Align getFragmentAlign(
unsigned Frag) {
227 unsigned NumElements =
VS.VecTy->getNumElements();
231 if (
VS.NumPacked > 1) {
234 ExtendMask.
resize(NumElements, -1);
235 for (
unsigned I = 0;
I <
VS.NumPacked; ++
I)
238 InsertMask.
resize(NumElements);
239 for (
unsigned I = 0;
I < NumElements; ++
I)
244 for (
unsigned I = 0;
I <
VS.NumFragments; ++
I) {
245 Value *Fragment = Fragments[
I];
247 unsigned NumPacked =
VS.NumPacked;
248 if (
I ==
VS.NumFragments - 1 &&
VS.RemainderTy) {
249 if (
auto *RemVecTy = dyn_cast<FixedVectorType>(
VS.RemainderTy))
250 NumPacked = RemVecTy->getNumElements();
255 if (NumPacked == 1) {
256 Res =
Builder.CreateInsertElement(Res, Fragment,
I *
VS.NumPacked,
259 Fragment =
Builder.CreateShuffleVector(Fragment, Fragment, ExtendMask);
263 for (
unsigned J = 0; J < NumPacked; ++J)
264 InsertMask[
I *
VS.NumPacked + J] = NumElements + J;
265 Res =
Builder.CreateShuffleVector(Res, Fragment, InsertMask,
267 for (
unsigned J = 0; J < NumPacked; ++J)
268 InsertMask[
I *
VS.NumPacked + J] =
I *
VS.NumPacked + J;
277T getWithDefaultOverride(
const cl::opt<T> &ClOption,
278 const std::optional<T> &DefaultOverride) {
280 : DefaultOverride.value_or(ClOption);
283class ScalarizerVisitor :
public InstVisitor<ScalarizerVisitor, bool> {
285 ScalarizerVisitor(
unsigned ParallelLoopAccessMDKind,
DominatorTree *DT,
287 : ParallelLoopAccessMDKind(ParallelLoopAccessMDKind), DT(DT),
288 ScalarizeVariableInsertExtract(
290 Options.ScalarizeVariableInsertExtract)),
320 void gather(
Instruction *
Op,
const ValueVector &CV,
const VectorSplit &VS);
322 bool canTransferMetadata(
unsigned Kind);
323 void transferMetadataAndIRFlags(
Instruction *
Op,
const ValueVector &CV);
324 std::optional<VectorSplit> getVectorSplit(
Type *Ty);
325 std::optional<VectorLayout> getVectorLayout(
Type *Ty,
Align Alignment,
329 template<
typename T>
bool splitUnary(
Instruction &,
const T &);
330 template<
typename T>
bool splitBinary(
Instruction &,
const T &);
334 ScatterMap Scattered;
340 unsigned ParallelLoopAccessMDKind;
344 const bool ScalarizeVariableInsertExtract;
345 const bool ScalarizeLoadStore;
346 const unsigned ScalarizeMinBits;
367char ScalarizerLegacyPass::ID = 0;
369 "Scalarize vector operations",
false,
false)
375 const VectorSplit &VS, ValueVector *cachePtr)
376 : BB(bb), BBI(bbi), V(v), VS(VS), CachePtr(cachePtr) {
377 IsPointer = V->getType()->isPointerTy();
379 Tmp.resize(VS.NumFragments,
nullptr);
381 assert((CachePtr->empty() || VS.NumFragments == CachePtr->size() ||
383 "Inconsistent vector sizes");
384 if (VS.NumFragments > CachePtr->size())
385 CachePtr->resize(VS.NumFragments,
nullptr);
390Value *Scatterer::operator[](
unsigned Frag) {
391 ValueVector &CV = CachePtr ? *CachePtr : Tmp;
400 CV[Frag] =
Builder.CreateConstGEP1_32(
VS.SplitTy, V, Frag,
401 V->getName() +
".i" +
Twine(Frag));
405 Type *FragmentTy =
VS.getFragmentType(Frag);
407 if (
auto *VecTy = dyn_cast<FixedVectorType>(FragmentTy)) {
409 for (
unsigned J = 0; J < VecTy->getNumElements(); ++J)
410 Mask.push_back(Frag *
VS.NumPacked + J);
413 V->getName() +
".i" +
Twine(Frag));
425 unsigned J =
Idx->getZExtValue();
427 if (Frag *
VS.NumPacked == J) {
428 CV[Frag] =
Insert->getOperand(1);
432 if (
VS.NumPacked == 1 && !CV[J]) {
436 CV[J] =
Insert->getOperand(1);
439 CV[Frag] =
Builder.CreateExtractElement(V, Frag *
VS.NumPacked,
440 V->getName() +
".i" +
Twine(Frag));
446bool ScalarizerLegacyPass::runOnFunction(
Function &
F) {
451 unsigned ParallelLoopAccessMDKind =
452 M.getContext().getMDKindID(
"llvm.mem.parallel_loop_access");
453 DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
455 return Impl.visit(
F);
459 return new ScalarizerLegacyPass();
462bool ScalarizerVisitor::visit(
Function &
F) {
463 assert(Gathered.empty() && Scattered.empty());
475 if (
Done &&
I->getType()->isVoidTy())
476 I->eraseFromParent();
485 const VectorSplit &VS) {
486 if (
Argument *VArg = dyn_cast<Argument>(V)) {
491 return Scatterer(BB, BB->
begin(), V, VS, &Scattered[{V, VS.SplitTy}]);
508 &Scattered[{
V,
VS.SplitTy}]);
519void ScalarizerVisitor::gather(
Instruction *
Op,
const ValueVector &CV,
520 const VectorSplit &VS) {
521 transferMetadataAndIRFlags(
Op, CV);
525 ValueVector &SV = Scattered[{
Op,
VS.SplitTy}];
527 for (
unsigned I = 0,
E = SV.size();
I !=
E; ++
I) {
529 if (V ==
nullptr || SV[
I] == CV[
I])
533 if (isa<Instruction>(CV[
I]))
534 CV[
I]->takeName(Old);
536 PotentiallyDeadInstrs.emplace_back(Old);
540 Gathered.push_back(GatherList::value_type(
Op, &SV));
546 Op->replaceAllUsesWith(CV);
547 PotentiallyDeadInstrs.emplace_back(
Op);
554bool ScalarizerVisitor::canTransferMetadata(
unsigned Tag) {
555 return (
Tag == LLVMContext::MD_tbaa
556 ||
Tag == LLVMContext::MD_fpmath
557 ||
Tag == LLVMContext::MD_tbaa_struct
558 ||
Tag == LLVMContext::MD_invariant_load
559 ||
Tag == LLVMContext::MD_alias_scope
560 ||
Tag == LLVMContext::MD_noalias
561 ||
Tag == ParallelLoopAccessMDKind
562 ||
Tag == LLVMContext::MD_access_group);
567void ScalarizerVisitor::transferMetadataAndIRFlags(
Instruction *
Op,
568 const ValueVector &CV) {
570 Op->getAllMetadataOtherThanDebugLoc(MDs);
571 for (
unsigned I = 0,
E = CV.size();
I !=
E; ++
I) {
573 for (
const auto &MD : MDs)
574 if (canTransferMetadata(MD.first))
575 New->setMetadata(MD.first, MD.second);
576 New->copyIRFlags(
Op);
577 if (
Op->getDebugLoc() && !
New->getDebugLoc())
578 New->setDebugLoc(
Op->getDebugLoc());
584std::optional<VectorSplit> ScalarizerVisitor::getVectorSplit(
Type *Ty) {
586 Split.VecTy = dyn_cast<FixedVectorType>(Ty);
590 unsigned NumElems =
Split.VecTy->getNumElements();
591 Type *ElemTy =
Split.VecTy->getElementType();
596 Split.NumFragments = NumElems;
597 Split.SplitTy = ElemTy;
600 if (
Split.NumPacked >= NumElems)
606 unsigned RemainderElems = NumElems %
Split.NumPacked;
607 if (RemainderElems > 1)
609 else if (RemainderElems == 1)
610 Split.RemainderTy = ElemTy;
619std::optional<VectorLayout>
620ScalarizerVisitor::getVectorLayout(
Type *Ty,
Align Alignment,
622 std::optional<VectorSplit>
VS = getVectorSplit(Ty);
629 if (!
DL.typeSizeEqualsStoreSize(
VS->SplitTy) ||
630 (
VS->RemainderTy && !
DL.typeSizeEqualsStoreSize(
VS->RemainderTy)))
632 Layout.VecAlign = Alignment;
633 Layout.SplitSize =
DL.getTypeStoreSize(
VS->SplitTy);
639template<
typename Splitter>
640bool ScalarizerVisitor::splitUnary(
Instruction &
I,
const Splitter &Split) {
641 std::optional<VectorSplit>
VS = getVectorSplit(
I.getType());
645 std::optional<VectorSplit> OpVS;
646 if (
I.getOperand(0)->getType() ==
I.getType()) {
649 OpVS = getVectorSplit(
I.getOperand(0)->getType());
650 if (!OpVS ||
VS->NumPacked != OpVS->NumPacked)
655 Scatterer
Op = scatter(&
I,
I.getOperand(0), *OpVS);
656 assert(
Op.size() ==
VS->NumFragments &&
"Mismatched unary operation");
658 Res.resize(
VS->NumFragments);
659 for (
unsigned Frag = 0; Frag <
VS->NumFragments; ++Frag)
660 Res[Frag] =
Split(Builder,
Op[Frag],
I.getName() +
".i" +
Twine(Frag));
661 gather(&
I, Res, *VS);
667template<
typename Splitter>
668bool ScalarizerVisitor::splitBinary(
Instruction &
I,
const Splitter &Split) {
669 std::optional<VectorSplit>
VS = getVectorSplit(
I.getType());
673 std::optional<VectorSplit> OpVS;
674 if (
I.getOperand(0)->getType() ==
I.getType()) {
677 OpVS = getVectorSplit(
I.getOperand(0)->getType());
678 if (!OpVS ||
VS->NumPacked != OpVS->NumPacked)
683 Scatterer VOp0 = scatter(&
I,
I.getOperand(0), *OpVS);
684 Scatterer VOp1 = scatter(&
I,
I.getOperand(1), *OpVS);
685 assert(VOp0.size() ==
VS->NumFragments &&
"Mismatched binary operation");
686 assert(VOp1.size() ==
VS->NumFragments &&
"Mismatched binary operation");
688 Res.resize(
VS->NumFragments);
689 for (
unsigned Frag = 0; Frag <
VS->NumFragments; ++Frag) {
690 Value *Op0 = VOp0[Frag];
691 Value *Op1 = VOp1[Frag];
692 Res[Frag] =
Split(Builder, Op0, Op1,
I.getName() +
".i" +
Twine(Frag));
694 gather(&
I, Res, *VS);
704bool ScalarizerVisitor::splitCall(
CallInst &CI) {
705 std::optional<VectorSplit>
VS = getVectorSplit(CI.
getType());
720 ValueVector ScalarOperands(NumArgs);
731 for (
unsigned I = 0;
I != NumArgs; ++
I) {
733 if ([[maybe_unused]]
auto *OpVecTy =
734 dyn_cast<FixedVectorType>(OpI->
getType())) {
735 assert(OpVecTy->getNumElements() ==
VS->VecTy->getNumElements());
736 std::optional<VectorSplit> OpVS = getVectorSplit(OpI->
getType());
737 if (!OpVS || OpVS->NumPacked !=
VS->NumPacked) {
748 Scattered[
I] = scatter(&CI, OpI, *OpVS);
750 OverloadIdx[
I] = Tys.
size();
754 ScalarOperands[
I] = OpI;
760 ValueVector Res(
VS->NumFragments);
761 ValueVector ScalarCallOps(NumArgs);
767 for (
unsigned I = 0;
I <
VS->NumFragments; ++
I) {
768 bool IsRemainder =
I ==
VS->NumFragments - 1 &&
VS->RemainderTy;
769 ScalarCallOps.clear();
772 Tys[0] =
VS->RemainderTy;
774 for (
unsigned J = 0; J != NumArgs; ++J) {
776 ScalarCallOps.push_back(ScalarOperands[J]);
778 ScalarCallOps.push_back(Scattered[J][
I]);
779 if (IsRemainder && OverloadIdx[J] >= 0)
780 Tys[OverloadIdx[J]] = Scattered[J][
I]->getType();
787 Res[
I] =
Builder.CreateCall(NewIntrin, ScalarCallOps,
791 gather(&CI, Res, *VS);
795bool ScalarizerVisitor::visitSelectInst(
SelectInst &SI) {
796 std::optional<VectorSplit>
VS = getVectorSplit(
SI.getType());
800 std::optional<VectorSplit> CondVS;
801 if (isa<FixedVectorType>(
SI.getCondition()->getType())) {
802 CondVS = getVectorSplit(
SI.getCondition()->getType());
803 if (!CondVS || CondVS->NumPacked !=
VS->NumPacked) {
810 Scatterer VOp1 = scatter(&SI,
SI.getOperand(1), *VS);
811 Scatterer VOp2 = scatter(&SI,
SI.getOperand(2), *VS);
812 assert(VOp1.size() ==
VS->NumFragments &&
"Mismatched select");
813 assert(VOp2.size() ==
VS->NumFragments &&
"Mismatched select");
815 Res.resize(
VS->NumFragments);
818 Scatterer VOp0 = scatter(&SI,
SI.getOperand(0), *CondVS);
819 assert(VOp0.size() == CondVS->NumFragments &&
"Mismatched select");
820 for (
unsigned I = 0;
I <
VS->NumFragments; ++
I) {
824 Res[
I] =
Builder.CreateSelect(Op0, Op1, Op2,
829 for (
unsigned I = 0;
I <
VS->NumFragments; ++
I) {
832 Res[
I] =
Builder.CreateSelect(Op0, Op1, Op2,
836 gather(&SI, Res, *VS);
840bool ScalarizerVisitor::visitICmpInst(
ICmpInst &ICI) {
841 return splitBinary(ICI, ICmpSplitter(ICI));
844bool ScalarizerVisitor::visitFCmpInst(
FCmpInst &FCI) {
845 return splitBinary(FCI, FCmpSplitter(FCI));
848bool ScalarizerVisitor::visitUnaryOperator(
UnaryOperator &UO) {
849 return splitUnary(UO, UnarySplitter(UO));
853 return splitBinary(BO, BinarySplitter(BO));
857 std::optional<VectorSplit>
VS = getVectorSplit(GEPI.
getType());
868 for (
unsigned I = 0;
I < 1 + NumIndices; ++
I) {
871 std::optional<VectorSplit> OpVS = getVectorSplit(VecTy);
872 if (!OpVS || OpVS->NumPacked !=
VS->NumPacked) {
876 ScatterOps[
I] = scatter(&GEPI, GEPI.
getOperand(
I), *OpVS);
883 Res.resize(
VS->NumFragments);
884 for (
unsigned I = 0;
I <
VS->NumFragments; ++
I) {
886 SplitOps.
resize(1 + NumIndices);
887 for (
unsigned J = 0; J < 1 + NumIndices; ++J) {
889 SplitOps[J] = ScalarOps[J];
891 SplitOps[J] = ScatterOps[J][
I];
898 NewGEPI->setIsInBounds();
900 gather(&GEPI, Res, *VS);
904bool ScalarizerVisitor::visitCastInst(
CastInst &CI) {
905 std::optional<VectorSplit> DestVS = getVectorSplit(CI.
getDestTy());
909 std::optional<VectorSplit> SrcVS = getVectorSplit(CI.
getSrcTy());
910 if (!SrcVS || SrcVS->NumPacked != DestVS->NumPacked)
914 Scatterer Op0 = scatter(&CI, CI.
getOperand(0), *SrcVS);
915 assert(Op0.size() == SrcVS->NumFragments &&
"Mismatched cast");
917 Res.resize(DestVS->NumFragments);
918 for (
unsigned I = 0;
I < DestVS->NumFragments; ++
I)
922 gather(&CI, Res, *DestVS);
926bool ScalarizerVisitor::visitBitCastInst(
BitCastInst &BCI) {
927 std::optional<VectorSplit> DstVS = getVectorSplit(BCI.
getDestTy());
928 std::optional<VectorSplit> SrcVS = getVectorSplit(BCI.
getSrcTy());
929 if (!DstVS || !SrcVS || DstVS->RemainderTy || SrcVS->RemainderTy)
932 const bool isPointerTy = DstVS->VecTy->getElementType()->isPointerTy();
935 assert(!isPointerTy || (DstVS->NumPacked == 1 && SrcVS->NumPacked == 1));
938 Scatterer Op0 = scatter(&BCI, BCI.
getOperand(0), *SrcVS);
940 Res.resize(DstVS->NumFragments);
942 unsigned DstSplitBits = DstVS->SplitTy->getPrimitiveSizeInBits();
943 unsigned SrcSplitBits = SrcVS->SplitTy->getPrimitiveSizeInBits();
945 if (isPointerTy || DstSplitBits == SrcSplitBits) {
946 assert(DstVS->NumFragments == SrcVS->NumFragments);
947 for (
unsigned I = 0;
I < DstVS->NumFragments; ++
I) {
948 Res[
I] =
Builder.CreateBitCast(Op0[
I], DstVS->getFragmentType(
I),
951 }
else if (SrcSplitBits % DstSplitBits == 0) {
955 MidVS.NumPacked = DstVS->NumPacked;
956 MidVS.NumFragments = SrcSplitBits / DstSplitBits;
958 MidVS.NumPacked * MidVS.NumFragments);
959 MidVS.SplitTy = DstVS->SplitTy;
962 for (
unsigned I = 0;
I < SrcVS->NumFragments; ++
I) {
968 while ((VI = dyn_cast<Instruction>(V)) &&
969 VI->getOpcode() == Instruction::BitCast)
970 V =
VI->getOperand(0);
972 V =
Builder.CreateBitCast(V, MidVS.VecTy,
V->getName() +
".cast");
974 Scatterer Mid = scatter(&BCI, V, MidVS);
975 for (
unsigned J = 0; J < MidVS.NumFragments; ++J)
976 Res[ResI++] = Mid[J];
978 }
else if (DstSplitBits % SrcSplitBits == 0) {
982 MidVS.NumFragments = DstSplitBits / SrcSplitBits;
983 MidVS.NumPacked = SrcVS->NumPacked;
985 MidVS.NumPacked * MidVS.NumFragments);
986 MidVS.SplitTy = SrcVS->SplitTy;
990 ConcatOps.
resize(MidVS.NumFragments);
991 for (
unsigned I = 0;
I < DstVS->NumFragments; ++
I) {
992 for (
unsigned J = 0; J < MidVS.NumFragments; ++J)
993 ConcatOps[J] = Op0[SrcI++];
994 Value *
V = concatenate(Builder, ConcatOps, MidVS,
996 Res[
I] =
Builder.CreateBitCast(V, DstVS->getFragmentType(
I),
1003 gather(&BCI, Res, *DstVS);
1008 std::optional<VectorSplit>
VS = getVectorSplit(IEI.
getType());
1013 Scatterer Op0 = scatter(&IEI, IEI.
getOperand(0), *VS);
1018 Res.resize(
VS->NumFragments);
1020 if (
auto *CI = dyn_cast<ConstantInt>(InsIdx)) {
1021 unsigned Idx = CI->getZExtValue();
1022 unsigned Fragment =
Idx /
VS->NumPacked;
1023 for (
unsigned I = 0;
I <
VS->NumFragments; ++
I) {
1024 if (
I == Fragment) {
1026 if (Fragment ==
VS->NumFragments - 1 &&
VS->RemainderTy &&
1027 !
VS->RemainderTy->isVectorTy())
1031 Builder.CreateInsertElement(Op0[
I], NewElt,
Idx %
VS->NumPacked);
1041 if (!ScalarizeVariableInsertExtract ||
VS->NumPacked > 1)
1044 for (
unsigned I = 0;
I <
VS->NumFragments; ++
I) {
1045 Value *ShouldReplace =
1049 Res[
I] =
Builder.CreateSelect(ShouldReplace, NewElt, OldElt,
1054 gather(&IEI, Res, *VS);
1064 Scatterer Op0 = scatter(&EEI, EEI.
getOperand(0), *VS);
1067 if (
auto *CI = dyn_cast<ConstantInt>(ExtIdx)) {
1068 unsigned Idx = CI->getZExtValue();
1069 unsigned Fragment =
Idx /
VS->NumPacked;
1070 Value *Res = Op0[Fragment];
1072 if (Fragment ==
VS->NumFragments - 1 &&
VS->RemainderTy &&
1073 !
VS->RemainderTy->isVectorTy())
1076 Res =
Builder.CreateExtractElement(Res,
Idx %
VS->NumPacked);
1077 replaceUses(&EEI, Res);
1082 if (!ScalarizeVariableInsertExtract ||
VS->NumPacked > 1)
1086 for (
unsigned I = 0;
I <
VS->NumFragments; ++
I) {
1087 Value *ShouldExtract =
1091 Res =
Builder.CreateSelect(ShouldExtract, Elt, Res,
1094 replaceUses(&EEI, Res);
1099 std::optional<VectorSplit>
VS = getVectorSplit(SVI.
getType());
1100 std::optional<VectorSplit> VSOp =
1102 if (!VS || !VSOp ||
VS->NumPacked > 1 || VSOp->NumPacked > 1)
1105 Scatterer Op0 = scatter(&SVI, SVI.
getOperand(0), *VSOp);
1106 Scatterer Op1 = scatter(&SVI, SVI.
getOperand(1), *VSOp);
1108 Res.resize(
VS->NumFragments);
1110 for (
unsigned I = 0;
I <
VS->NumFragments; ++
I) {
1114 else if (
unsigned(Selector) < Op0.size())
1115 Res[
I] = Op0[Selector];
1117 Res[
I] = Op1[Selector - Op0.size()];
1119 gather(&SVI, Res, *VS);
1123bool ScalarizerVisitor::visitPHINode(
PHINode &
PHI) {
1124 std::optional<VectorSplit>
VS = getVectorSplit(
PHI.getType());
1130 Res.resize(
VS->NumFragments);
1132 unsigned NumOps =
PHI.getNumOperands();
1133 for (
unsigned I = 0;
I <
VS->NumFragments; ++
I) {
1134 Res[
I] =
Builder.CreatePHI(
VS->getFragmentType(
I), NumOps,
1138 for (
unsigned I = 0;
I < NumOps; ++
I) {
1139 Scatterer
Op = scatter(&
PHI,
PHI.getIncomingValue(
I), *VS);
1141 for (
unsigned J = 0; J <
VS->NumFragments; ++J)
1142 cast<PHINode>(Res[J])->addIncoming(
Op[J], IncomingBlock);
1144 gather(&
PHI, Res, *VS);
1148bool ScalarizerVisitor::visitLoadInst(
LoadInst &LI) {
1149 if (!ScalarizeLoadStore)
1154 std::optional<VectorLayout> Layout = getVectorLayout(
1162 Res.resize(Layout->VS.NumFragments);
1164 for (
unsigned I = 0;
I < Layout->VS.NumFragments; ++
I) {
1165 Res[
I] =
Builder.CreateAlignedLoad(Layout->VS.getFragmentType(
I),
Ptr[
I],
1166 Align(Layout->getFragmentAlign(
I)),
1169 gather(&LI, Res, Layout->VS);
1173bool ScalarizerVisitor::visitStoreInst(
StoreInst &SI) {
1174 if (!ScalarizeLoadStore)
1179 Value *FullValue =
SI.getValueOperand();
1180 std::optional<VectorLayout> Layout = getVectorLayout(
1181 FullValue->
getType(),
SI.getAlign(),
SI.getModule()->getDataLayout());
1186 Scatterer VPtr = scatter(&SI,
SI.getPointerOperand(), Layout->VS);
1187 Scatterer VVal = scatter(&SI, FullValue, Layout->VS);
1190 Stores.resize(Layout->VS.NumFragments);
1191 for (
unsigned I = 0;
I < Layout->VS.NumFragments; ++
I) {
1195 Builder.CreateAlignedStore(Val,
Ptr, Layout->getFragmentAlign(
I));
1197 transferMetadataAndIRFlags(&SI, Stores);
1201bool ScalarizerVisitor::visitCallInst(
CallInst &CI) {
1202 return splitCall(CI);
1205bool ScalarizerVisitor::visitFreezeInst(
FreezeInst &FI) {
1213bool ScalarizerVisitor::finish() {
1216 if (Gathered.empty() && Scattered.empty() && !Scalarized)
1218 for (
const auto &GMI : Gathered) {
1220 ValueVector &CV = *GMI.second;
1221 if (!
Op->use_empty()) {
1225 if (
auto *Ty = dyn_cast<FixedVectorType>(
Op->getType())) {
1228 if (isa<PHINode>(
Op))
1231 VectorSplit
VS = *getVectorSplit(Ty);
1232 assert(
VS.NumFragments == CV.size());
1234 Res = concatenate(Builder, CV, VS,
Op->getName());
1238 assert(CV.size() == 1 &&
Op->getType() == CV[0]->getType());
1243 Op->replaceAllUsesWith(Res);
1245 PotentiallyDeadInstrs.emplace_back(
Op);
1258 unsigned ParallelLoopAccessMDKind =
1259 M.getContext().getMDKindID(
"llvm.mem.parallel_loop_access");
1261 ScalarizerVisitor Impl(ParallelLoopAccessMDKind, DT, Options);
1262 bool Changed = Impl.visit(
F);
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
Module.h This file contains the declarations for the Module class.
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
This file builds on the ADT/GraphTraits.h file to build a generic graph post order iterator.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool isTriviallyScalariable(Intrinsic::ID ID)
Scalarize vector operations
static cl::opt< bool > ClScalarizeVariableInsertExtract("scalarize-variable-insert-extract", cl::init(true), cl::Hidden, cl::desc("Allow the scalarizer pass to scalarize " "insertelement/extractelement with variable index"))
static cl::opt< bool > ClScalarizeLoadStore("scalarize-load-store", cl::init(false), cl::Hidden, cl::desc("Allow the scalarizer pass to scalarize loads and store"))
static cl::opt< unsigned > ClScalarizeMinBits("scalarize-min-bits", cl::init(0), cl::Hidden, cl::desc("Instruct the scalarizer pass to attempt to keep values of a " "minimum number of bits"))
This pass converts vector operations into scalar operations (or, optionally, operations on smaller ve...
This file defines the SmallVector class.
A container for analyses that lazily runs them and caches their results.
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
This class represents an incoming formal argument to a Function.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
InstListType::iterator iterator
Instruction iterators...
This class represents a no-op cast from one type to another.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
unsigned arg_size() const
This class represents a function call, abstracting a target machine's calling convention.
This is the base class for all instructions that perform data casts.
Type * getSrcTy() const
Return the source type, as a convenience.
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Type * getDestTy() const
Return the destination type, as a convenience.
This is the shared class of boolean and integer constants.
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Analysis pass which computes a DominatorTree.
Legacy analysis pass which computes a DominatorTree.
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
This instruction compares its operands according to the predicate given to the constructor.
Class to represent fixed width SIMD vectors.
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
This class represents a freeze function that returns random concrete value if an operand is either a ...
FunctionPass class - This class is used to implement most global optimizations.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
bool isInBounds() const
Determine whether the GEP has the inbounds flag.
Type * getSourceElementType() const
unsigned getNumIndices() const
This instruction compares its operands according to the predicate given to the constructor.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
This instruction inserts a single (scalar) element into a VectorType value.
VectorType * getType() const
Overload to return most specific vector type.
Base class for instruction visitors.
RetTy visitFreezeInst(FreezeInst &I)
RetTy visitFCmpInst(FCmpInst &I)
RetTy visitExtractElementInst(ExtractElementInst &I)
RetTy visitShuffleVectorInst(ShuffleVectorInst &I)
RetTy visitBitCastInst(BitCastInst &I)
void visit(Iterator Start, Iterator End)
RetTy visitPHINode(PHINode &I)
RetTy visitUnaryOperator(UnaryOperator &I)
RetTy visitStoreInst(StoreInst &I)
RetTy visitInsertElementInst(InsertElementInst &I)
RetTy visitBinaryOperator(BinaryOperator &I)
RetTy visitICmpInst(ICmpInst &I)
RetTy visitCallInst(CallInst &I)
RetTy visitCastInst(CastInst &I)
RetTy visitSelectInst(SelectInst &I)
RetTy visitGetElementPtrInst(GetElementPtrInst &I)
void visitInstruction(Instruction &I)
RetTy visitLoadInst(LoadInst &I)
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
const BasicBlock * getParent() const
An instruction for reading from memory.
Value * getPointerOperand()
Align getAlign() const
Return the alignment of the access that is being performed.
A Module instance is used to store all the information related to an LLVM module.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void preserve()
Mark an analysis as preserved.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
This class represents the LLVM 'select' instruction.
This instruction constructs a fixed permutation of two input vectors.
int getMaskValue(unsigned Elt) const
Return the shuffle mask value of this instruction for the given element index.
VectorType * getType() const
Overload to return most specific vector type.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
The instances of the Type class are immutable: once they are created, they are never changed.
bool isPointerTy() const
True if this is an instance of PointerType.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Value * getOperand(unsigned i) const
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
StringRef getName() const
Return a constant reference to the value's name.
void takeName(Value *V)
Transfer the name from V to this value.
int getNumOccurrences() const
self_iterator getIterator()
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
bool isVectorIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx)
Identifies if the vector form of the intrinsic is overloaded on the type of the operand at index OpdI...
BasicBlock::iterator skipDebugIntrinsics(BasicBlock::iterator It)
Advance It while it points to a debug instruction and return the result.
FunctionPass * createScalarizerPass()
Create a legacy pass manager instance of the Scalarizer pass.
DWARFExpression::Operation Op
bool RecursivelyDeleteTriviallyDeadInstructionsPermissive(SmallVectorImpl< WeakTrackingVH > &DeadInsts, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
Same functionality as RecursivelyDeleteTriviallyDeadInstructions, but allow instructions that are not...
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
bool isVectorIntrinsicWithScalarOpAtArg(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the vector form of the intrinsic has a scalar operand.
bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.
void initializeScalarizerLegacyPassPass(PassRegistry &)
This struct is a compact representation of a valid (non-zero power of two) alignment.