44 #define DEBUG_TYPE "scalarize-masked-mem-intrin"
48 class ScalarizeMaskedMemIntrinLegacyPass :
public FunctionPass {
60 return "Scalarize Masked Memory Intrinsics";
81 "Scalarize unsupported masked memory intrinsics",
false,
90 return new ScalarizeMaskedMemIntrinLegacyPass();
98 unsigned NumElts = cast<FixedVectorType>(
Mask->getType())->getNumElements();
99 for (
unsigned i = 0;
i != NumElts; ++
i) {
101 if (!CElt || !isa<ConstantInt>(CElt))
110 return DL.isBigEndian() ? VectorWidth - 1 - Idx : Idx;
152 const Align AlignVal = cast<ConstantInt>(Alignment)->getAlignValue();
161 Builder.SetInsertPoint(InsertPt);
165 if (isa<Constant>(
Mask) && cast<Constant>(
Mask)->isAllOnesValue()) {
173 const Align AdjustedAlignVal =
178 Value *FirstEltPtr =
Builder.CreateBitCast(Ptr, NewPtrType);
179 unsigned VectorWidth = cast<FixedVectorType>(
VecType)->getNumElements();
182 Value *VResult = Src0;
185 for (
unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
186 if (cast<Constant>(
Mask)->getAggregateElement(Idx)->isNullValue())
188 Value *Gep =
Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx);
190 VResult =
Builder.CreateInsertElement(VResult,
Load, Idx);
200 if (VectorWidth != 1) {
202 SclrMask =
Builder.CreateBitCast(
Mask, SclrMaskTy,
"scalar_mask");
205 for (
unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
214 if (VectorWidth != 1) {
218 Builder.getIntN(VectorWidth, 0));
234 CondBlock->
setName(
"cond.load");
237 Value *Gep =
Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx);
245 IfBlock = NewIfBlock;
248 Builder.SetInsertPoint(NewIfBlock, NewIfBlock->
begin());
294 const Align AlignVal = cast<ConstantInt>(Alignment)->getAlignValue();
301 Builder.SetInsertPoint(InsertPt);
305 if (isa<Constant>(
Mask) && cast<Constant>(
Mask)->isAllOnesValue()) {
306 Builder.CreateAlignedStore(Src, Ptr, AlignVal);
312 const Align AdjustedAlignVal =
317 Value *FirstEltPtr =
Builder.CreateBitCast(Ptr, NewPtrType);
318 unsigned VectorWidth = cast<FixedVectorType>(
VecType)->getNumElements();
321 for (
unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
322 if (cast<Constant>(
Mask)->getAggregateElement(Idx)->isNullValue())
325 Value *Gep =
Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx);
326 Builder.CreateAlignedStore(OneElt, Gep, AdjustedAlignVal);
335 if (VectorWidth != 1) {
337 SclrMask =
Builder.CreateBitCast(
Mask, SclrMaskTy,
"scalar_mask");
340 for (
unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
348 if (VectorWidth != 1) {
352 Builder.getIntN(VectorWidth, 0));
368 CondBlock->
setName(
"cond.store");
372 Value *Gep =
Builder.CreateConstInBoundsGEP1_32(EltTy, FirstEltPtr, Idx);
373 Builder.CreateAlignedStore(OneElt, Gep, AdjustedAlignVal);
379 Builder.SetInsertPoint(NewIfBlock, NewIfBlock->
begin());
428 Builder.SetInsertPoint(InsertPt);
429 MaybeAlign AlignVal = cast<ConstantInt>(Alignment)->getMaybeAlignValue();
434 Value *VResult = Src0;
435 unsigned VectorWidth =
VecType->getNumElements();
439 for (
unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
440 if (cast<Constant>(
Mask)->getAggregateElement(Idx)->isNullValue())
444 Builder.CreateAlignedLoad(EltTy, Ptr, AlignVal,
"Load" +
Twine(Idx));
456 if (VectorWidth != 1) {
458 SclrMask =
Builder.CreateBitCast(
Mask, SclrMaskTy,
"scalar_mask");
461 for (
unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
470 if (VectorWidth != 1) {
474 Builder.getIntN(VectorWidth, 0));
490 CondBlock->
setName(
"cond.load");
495 Builder.CreateAlignedLoad(EltTy, Ptr, AlignVal,
"Load" +
Twine(Idx));
503 IfBlock = NewIfBlock;
506 Builder.SetInsertPoint(NewIfBlock, NewIfBlock->
begin());
552 auto *SrcFVTy = cast<FixedVectorType>(Src->
getType());
555 isa<VectorType>(Ptrs->
getType()) &&
556 isa<PointerType>(cast<VectorType>(Ptrs->
getType())->getElementType()) &&
557 "Vector of pointers is expected in masked scatter intrinsic");
561 Builder.SetInsertPoint(InsertPt);
564 MaybeAlign AlignVal = cast<ConstantInt>(Alignment)->getMaybeAlignValue();
565 unsigned VectorWidth = SrcFVTy->getNumElements();
569 for (
unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
570 if (cast<Constant>(
Mask)->getAggregateElement(Idx)->isNullValue())
573 Builder.CreateExtractElement(Src, Idx,
"Elt" +
Twine(Idx));
575 Builder.CreateAlignedStore(OneElt, Ptr, AlignVal);
584 if (VectorWidth != 1) {
586 SclrMask =
Builder.CreateBitCast(
Mask, SclrMaskTy,
"scalar_mask");
589 for (
unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
597 if (VectorWidth != 1) {
601 Builder.getIntN(VectorWidth, 0));
617 CondBlock->
setName(
"cond.store");
622 Builder.CreateAlignedStore(OneElt, Ptr, AlignVal);
628 Builder.SetInsertPoint(NewIfBlock, NewIfBlock->
begin());
649 Builder.SetInsertPoint(InsertPt);
652 unsigned VectorWidth =
VecType->getNumElements();
655 Value *VResult = PassThru;
664 for (
unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
666 if (cast<Constant>(
Mask)->getAggregateElement(Idx)->isNullValue()) {
668 ShuffleMask[Idx] = Idx + VectorWidth;
672 InsertElt =
Builder.CreateAlignedLoad(EltTy, NewPtr,
Align(1),
673 "Load" +
Twine(Idx));
674 ShuffleMask[Idx] = Idx;
677 VResult =
Builder.CreateInsertElement(VResult, InsertElt, Idx,
680 VResult =
Builder.CreateShuffleVector(VResult, PassThru, ShuffleMask);
689 if (VectorWidth != 1) {
691 SclrMask =
Builder.CreateBitCast(
Mask, SclrMaskTy,
"scalar_mask");
694 for (
unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
703 if (VectorWidth != 1) {
707 Builder.getIntN(VectorWidth, 0));
723 CondBlock->
setName(
"cond.load");
731 if ((Idx + 1) != VectorWidth)
732 NewPtr =
Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, 1);
738 IfBlock = NewIfBlock;
741 Builder.SetInsertPoint(NewIfBlock, NewIfBlock->
begin());
748 if ((Idx + 1) != VectorWidth) {
775 Builder.SetInsertPoint(InsertPt);
780 unsigned VectorWidth =
VecType->getNumElements();
785 for (
unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
786 if (cast<Constant>(
Mask)->getAggregateElement(Idx)->isNullValue())
789 Builder.CreateExtractElement(Src, Idx,
"Elt" +
Twine(Idx));
801 if (VectorWidth != 1) {
803 SclrMask =
Builder.CreateBitCast(
Mask, SclrMaskTy,
"scalar_mask");
806 for (
unsigned Idx = 0; Idx < VectorWidth; ++Idx) {
813 if (VectorWidth != 1) {
817 Builder.getIntN(VectorWidth, 0));
833 CondBlock->
setName(
"cond.store");
841 if ((Idx + 1) != VectorWidth)
842 NewPtr =
Builder.CreateConstInBoundsGEP1_32(EltTy, Ptr, 1);
848 IfBlock = NewIfBlock;
850 Builder.SetInsertPoint(NewIfBlock, NewIfBlock->
begin());
853 if ((Idx + 1) != VectorWidth) {
871 bool EverMadeChange =
false;
872 bool MadeChange =
true;
873 auto &
DL =
F.getParent()->getDataLayout();
878 bool ModifiedDTOnIteration =
false;
884 if (ModifiedDTOnIteration)
888 EverMadeChange |= MadeChange;
890 return EverMadeChange;
894 auto &
TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
F);
896 if (
auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
897 DT = &DTWP->getDomTree();
916 bool MadeChange =
false;
919 while (CurInstIterator !=
BB.end()) {
920 if (
CallInst *CI = dyn_cast<CallInst>(&*CurInstIterator++))
935 if (isa<ScalableVectorType>(II->
getType()) ||
937 [](
Value *V) { return isa<ScalableVectorType>(V->getType()); }))
943 case Intrinsic::masked_load:
951 case Intrinsic::masked_store:
958 case Intrinsic::masked_gather: {
959 unsigned AlignmentInt =
963 DL.getValueOrABITypeAlignment(
MaybeAlign(AlignmentInt), LoadTy);
969 case Intrinsic::masked_scatter: {
970 unsigned AlignmentInt =
974 DL.getValueOrABITypeAlignment(
MaybeAlign(AlignmentInt), StoreTy);
980 case Intrinsic::masked_expandload:
985 case Intrinsic::masked_compressstore: