31 #include "llvm/IR/IntrinsicsHexagon.h"
50 #define DEBUG_TYPE "hexagon-vc"
55 class HexagonVectorCombine {
60 :
F(F_),
DL(
F.getParent()->getDataLayout()),
AA(AA_), AC(AC_), DT(DT_),
70 Type *getByteTy(
int ElemCount = 0)
const;
73 Type *getBoolTy(
int ElemCount = 0)
const;
83 int getSizeOf(
const Value *Val)
const;
84 int getSizeOf(
const Type *Ty)
const;
85 int getAllocSizeOf(
const Type *Ty)
const;
86 int getTypeAlignment(
Type *Ty)
const;
93 int Length,
int Where)
const;
109 template <
typename T = std::vector<Instruction *>>
125 bool isByteVecTy(
Type *Ty)
const;
126 bool isSectorTy(
Type *Ty)
const;
134 AlignVectors(HexagonVectorCombine &HVC_) : HVC(HVC_) {}
139 using InstList = std::vector<Instruction *>;
148 AddrInfo(
const AddrInfo &) =
default;
151 : Inst(
I),
Addr(A), ValTy(
T), HaveAlign(
H),
152 NeedAlign(HVC.getTypeAlignment(ValTy)) {}
163 using AddrList = std::vector<AddrInfo>;
167 return A->comesBefore(
B);
170 using DepList = std::set<Instruction *, InstrLess>;
174 :
Base(
B), Main{AI.Inst}, IsHvx(Hvx), IsLoad(
Load) {}
181 using MoveList = std::vector<MoveGroup>;
186 Segment(
Value *Val,
int Begin,
int Len)
187 : Val(Val), Start(Begin), Size(Len) {}
188 Segment(
const Segment &Seg) =
default;
195 Block(
Value *Val,
int Len,
int Pos) : Seg(Val, 0, Len), Pos(Pos) {}
196 Block(
Value *Val,
int Off,
int Len,
int Pos)
197 : Seg(Val, Off, Len), Pos(Pos) {}
198 Block(
const Block &Blk) =
default;
204 ByteSpan section(
int Start,
int Length)
const;
205 ByteSpan &
shift(
int Offset);
208 int size()
const {
return Blocks.size(); }
209 Block &operator[](
int i) {
return Blocks[
i]; }
211 std::vector<Block> Blocks;
213 using iterator = decltype(Blocks)::iterator;
215 iterator
end() {
return Blocks.
end(); }
216 using const_iterator = decltype(Blocks)::const_iterator;
217 const_iterator
begin()
const {
return Blocks.
begin(); }
218 const_iterator
end()
const {
return Blocks.
end(); }
221 Align getAlignFromValue(
const Value *V)
const;
224 bool isHvx(
const AddrInfo &AI)
const;
233 int Alignment)
const;
239 bool createAddressGroups();
240 MoveList createLoadGroups(
const AddrList &Group)
const;
241 MoveList createStoreGroups(
const AddrList &Group)
const;
242 bool move(
const MoveGroup &Move)
const;
243 bool realignGroup(
const MoveGroup &Move)
const;
249 std::map<Instruction *, AddrList> AddrGroups;
250 HexagonVectorCombine &HVC;
255 OS <<
"Inst: " << AI.Inst <<
" " << *AI.Inst <<
'\n';
256 OS <<
"Addr: " << *AI.Addr <<
'\n';
257 OS <<
"Type: " << *AI.ValTy <<
'\n';
258 OS <<
"HaveAlign: " << AI.HaveAlign.value() <<
'\n';
259 OS <<
"NeedAlign: " << AI.NeedAlign.value() <<
'\n';
260 OS <<
"Offset: " << AI.Offset;
268 OS <<
" " << *
I <<
'\n';
271 OS <<
" " << *
I <<
'\n';
277 OS <<
"ByteSpan[size=" << BS.size() <<
", extent=" << BS.extent() <<
'\n';
278 for (
const AlignVectors::ByteSpan::Block &
B : BS) {
279 OS <<
" @" <<
B.Pos <<
" [" <<
B.Seg.Start <<
',' <<
B.Seg.Size <<
"] "
280 << *
B.Seg.Val <<
'\n';
290 template <
typename T>
T *getIfUnordered(
T *MaybeT) {
291 return MaybeT && MaybeT->isUnordered() ? MaybeT :
nullptr;
294 return dyn_cast<T>(
In);
297 return getIfUnordered(dyn_cast<LoadInst>(
In));
300 return getIfUnordered(dyn_cast<StoreInst>(
In));
303 #if !defined(_MSC_VER) || _MSC_VER >= 1926
307 template <
typename Pred,
typename... Ts>
308 void erase_if(std::map<Ts...> &map, Pred
p)
310 template <
typename Pred,
typename T,
typename U>
311 void erase_if(std::map<T, U> &map, Pred
p)
314 for (
auto i = map.begin(),
e = map.end();
i !=
e;) {
323 template <
typename Pred,
typename T>
void erase_if(
T &&container, Pred
p) {
331 auto AlignVectors::ByteSpan::extent()
const ->
int {
334 int Min = Blocks[0].Pos;
335 int Max = Blocks[0].Pos + Blocks[0].Seg.Size;
336 for (
int i = 1,
e =
size();
i !=
e; ++
i) {
343 auto AlignVectors::ByteSpan::section(
int Start,
int Length)
const -> ByteSpan {
345 for (
const ByteSpan::Block &
B : Blocks) {
347 int R =
std::min(
B.Pos +
B.Seg.Size, Start + Length);
350 int Off = L >
B.Pos ? L -
B.Pos : 0;
351 Section.Blocks.emplace_back(
B.Seg.Val,
B.Seg.Start + Off, R - L, L);
358 for (Block &
B : Blocks)
365 for (
int i = 0,
e = Blocks.size();
i !=
e; ++
i)
366 Values[
i] = Blocks[
i].Seg.Val;
370 auto AlignVectors::getAlignFromValue(
const Value *V)
const ->
Align {
371 const auto *
C = dyn_cast<ConstantInt>(V);
372 assert(
C &&
"Alignment must be a compile-time constant integer");
373 return C->getAlignValue();
377 if (
auto *L = isCandidate<LoadInst>(&
In))
378 return AddrInfo(HVC, L, L->getPointerOperand(), L->getType(),
380 if (
auto *
S = isCandidate<StoreInst>(&
In))
381 return AddrInfo(HVC,
S,
S->getPointerOperand(),
382 S->getValueOperand()->getType(),
S->getAlign());
383 if (
auto *II = isCandidate<IntrinsicInst>(&
In)) {
386 case Intrinsic::masked_load:
387 return AddrInfo(HVC, II, II->getArgOperand(0), II->getType(),
388 getAlignFromValue(II->getArgOperand(1)));
389 case Intrinsic::masked_store:
390 return AddrInfo(HVC, II, II->getArgOperand(1),
391 II->getArgOperand(0)->getType(),
392 getAlignFromValue(II->getArgOperand(2)));
398 auto AlignVectors::isHvx(
const AddrInfo &AI)
const ->
bool {
399 return HVC.HST.isTypeForHVX(AI.ValTy);
402 auto AlignVectors::getPayload(
Value *Val)
const ->
Value * {
403 if (
auto *
In = dyn_cast<Instruction>(Val)) {
405 if (
auto *II = dyn_cast<IntrinsicInst>(
In))
406 ID = II->getIntrinsicID();
407 if (isa<StoreInst>(
In) ||
ID == Intrinsic::masked_store)
408 return In->getOperand(0);
413 auto AlignVectors::getMask(
Value *Val)
const ->
Value * {
414 if (
auto *II = dyn_cast<IntrinsicInst>(Val)) {
415 switch (II->getIntrinsicID()) {
416 case Intrinsic::masked_load:
417 return II->getArgOperand(2);
418 case Intrinsic::masked_store:
419 return II->getArgOperand(3);
423 Type *ValTy = getPayload(Val)->getType();
424 if (
auto *VecTy = dyn_cast<VectorType>(ValTy)) {
425 int ElemCount = VecTy->getElementCount().getFixedValue();
426 return HVC.getFullValue(HVC.getBoolTy(ElemCount));
428 return HVC.getFullValue(HVC.getBoolTy());
431 auto AlignVectors::getPassThrough(
Value *Val)
const ->
Value * {
432 if (
auto *II = dyn_cast<IntrinsicInst>(Val)) {
433 if (II->getIntrinsicID() == Intrinsic::masked_load)
434 return II->getArgOperand(3);
440 Type *ValTy,
int Adjust)
const
444 auto *PtrTy = cast<PointerType>(Ptr->getType());
445 if (!PtrTy->isOpaque()) {
447 int ElemSize = HVC.getAllocSizeOf(ElemTy);
448 if (Adjust % ElemSize == 0 && Adjust != 0) {
450 Builder.CreateGEP(ElemTy, Ptr, HVC.getConstInt(Adjust / ElemSize));
456 Value *Tmp0 =
Builder.CreatePointerCast(Ptr, CharPtrTy);
458 HVC.getConstInt(Adjust));
463 Type *ValTy,
int Alignment)
const
465 Value *AsInt =
Builder.CreatePtrToInt(Ptr, HVC.getIntTy());
466 Value *
Mask = HVC.getConstInt(-Alignment);
475 if (HVC.isZero(
Mask))
478 return Builder.CreateAlignedLoad(ValTy, Ptr,
Align(Alignment));
479 return Builder.CreateMaskedLoad(ValTy, Ptr,
Align(Alignment),
Mask, PassThru);
483 Value *Ptr,
int Alignment,
485 if (HVC.isZero(
Mask) || HVC.isUndef(Val) || HVC.isUndef(
Mask))
488 return Builder.CreateAlignedStore(Val, Ptr,
Align(Alignment));
492 auto AlignVectors::createAddressGroups() ->
bool {
497 auto findBaseAndOffset = [&](AddrInfo &AI) -> std::pair<Instruction *, int> {
498 for (AddrInfo &
W : WorkStack) {
499 if (
auto D = HVC.calculatePointerDifference(AI.Addr,
W.Addr))
500 return std::make_pair(
W.Inst, *
D);
502 return std::make_pair(
nullptr, 0);
505 auto traverseBlock = [&](
DomTreeNode *DomN,
auto Visit) ->
void {
508 auto AI = this->getAddrInfo(
I);
511 auto F = findBaseAndOffset(*AI);
514 AI->Offset =
F.second;
517 WorkStack.push_back(*AI);
518 GroupInst = AI->Inst;
520 AddrGroups[GroupInst].push_back(*AI);
526 while (!WorkStack.empty() && WorkStack.back().Inst->getParent() == &Block)
527 WorkStack.pop_back();
530 traverseBlock(HVC.DT.getRootNode(), traverseBlock);
531 assert(WorkStack.empty());
536 erase_if(AddrGroups, [](
auto &
G) {
return G.second.size() == 1; });
540 G.second, [&](
auto &
I) { return HVC.HST.isTypeForHVX(I.ValTy); });
543 return !AddrGroups.empty();
546 auto AlignVectors::createLoadGroups(
const AddrList &Group)
const -> MoveList {
554 "Base and In should be in the same block");
555 assert(
Base->comesBefore(
In) &&
"Base should come before In");
558 std::deque<Instruction *> WorkQ = {
In};
559 while (!WorkQ.empty()) {
564 if (
auto *
I = dyn_cast<Instruction>(
Op)) {
565 if (
I->getParent() == Parent &&
Base->comesBefore(
I))
573 auto tryAddTo = [&](
const AddrInfo &
Info, MoveGroup &Move) {
574 assert(!Move.Main.empty() &&
"Move group should have non-empty Main");
576 if (Move.IsHvx != isHvx(
Info))
580 if (
Base->getParent() !=
Info.Inst->getParent())
584 return HVC.isSafeToMoveBeforeInBB(*
I,
Base->getIterator());
586 DepList Deps = getUpwardDeps(
Info.Inst,
Base);
592 Deps.erase(
Info.Inst);
593 auto inAddrMap = [&](
Instruction *
I) {
return AddrGroups.count(
I) > 0; };
596 Move.Main.push_back(
Info.Inst);
603 for (
const AddrInfo &
Info : Group) {
604 if (!
Info.Inst->mayReadFromMemory())
606 if (LoadGroups.empty() || !tryAddTo(
Info, LoadGroups.back()))
607 LoadGroups.emplace_back(
Info, Group.front().Inst, isHvx(
Info),
true);
611 erase_if(LoadGroups, [](
const MoveGroup &
G) {
return G.Main.size() <= 1; });
615 auto AlignVectors::createStoreGroups(
const AddrList &Group)
const -> MoveList {
620 auto tryAddTo = [&](
const AddrInfo &
Info, MoveGroup &Move) {
621 assert(!Move.Main.empty() &&
"Move group should have non-empty Main");
625 "Not handling stores with return values");
627 if (Move.IsHvx != isHvx(
Info))
633 if (
Base->getParent() !=
Info.Inst->getParent())
635 if (!HVC.isSafeToMoveBeforeInBB(*
Info.Inst,
Base->getIterator(), Move.Main))
637 Move.Main.push_back(
Info.Inst);
641 MoveList StoreGroups;
643 for (
auto I = Group.rbegin(),
E = Group.rend();
I !=
E; ++
I) {
644 const AddrInfo &
Info = *
I;
645 if (!
Info.Inst->mayWriteToMemory())
647 if (StoreGroups.empty() || !tryAddTo(
Info, StoreGroups.back()))
648 StoreGroups.emplace_back(
Info, Group.front().Inst, isHvx(
Info),
false);
652 erase_if(StoreGroups, [](
const MoveGroup &
G) {
return G.Main.size() <= 1; });
657 assert(!Move.Main.empty() &&
"Move group should have non-empty Main");
663 D->moveBefore(Where);
673 assert(Move.Deps.empty());
677 M->moveBefore(Where);
682 return Move.Main.size() + Move.Deps.size() > 1;
685 auto AlignVectors::realignGroup(
const MoveGroup &Move)
const ->
bool {
692 auto getMaxOf = [](
auto Range,
auto GetValue) {
693 return *std::max_element(
694 Range.begin(), Range.end(),
695 [&GetValue](
auto &A,
auto &
B) { return GetValue(A) < GetValue(B); });
698 const AddrList &BaseInfos = AddrGroups.at(Move.Base);
713 std::set<Instruction *> TestSet(Move.Main.begin(), Move.Main.end());
716 BaseInfos, std::back_inserter(MoveInfos),
717 [&TestSet](
const AddrInfo &AI) {
return TestSet.count(AI.Inst); });
720 const AddrInfo &WithMaxAlign =
721 getMaxOf(MoveInfos, [](
const AddrInfo &AI) {
return AI.HaveAlign; });
722 Align MaxGiven = WithMaxAlign.HaveAlign;
725 const AddrInfo &WithMinOffset =
726 getMaxOf(MoveInfos, [](
const AddrInfo &AI) {
return -AI.Offset; });
728 const AddrInfo &WithMaxNeeded =
729 getMaxOf(MoveInfos, [](
const AddrInfo &AI) {
return AI.NeedAlign; });
730 Align MinNeeded = WithMaxNeeded.NeedAlign;
733 Instruction *TopIn = Move.IsLoad ? Move.Main.front() : Move.Main.back();
735 Value *AlignAddr =
nullptr;
736 Value *AlignVal =
nullptr;
738 if (MinNeeded <= MaxGiven) {
739 int Start = WithMinOffset.Offset;
740 int OffAtMax = WithMaxAlign.Offset;
747 int Adjust = -
alignTo(OffAtMax - Start, MinNeeded.value());
748 AlignAddr = createAdjustedPointer(
Builder, WithMaxAlign.Addr,
749 WithMaxAlign.ValTy, Adjust);
750 int Diff = Start - (OffAtMax + Adjust);
751 AlignVal = HVC.getConstInt(Diff);
753 assert(
static_cast<decltype(MinNeeded.value())
>(Diff) < MinNeeded.value());
762 AlignAddr = createAlignedPointer(
Builder, WithMinOffset.Addr,
763 WithMinOffset.ValTy, MinNeeded.value());
764 AlignVal =
Builder.CreatePtrToInt(WithMinOffset.Addr, HVC.getIntTy());
768 for (
const AddrInfo &AI : MoveInfos) {
769 VSpan.Blocks.emplace_back(AI.Inst, HVC.getSizeOf(AI.ValTy),
770 AI.Offset - WithMinOffset.Offset);
776 int ScLen = Move.IsHvx ? HVC.HST.getVectorLength()
777 : std::max<int>(MinNeeded.value(), 4);
778 assert(!Move.IsHvx || ScLen == 64 || ScLen == 128);
779 assert(Move.IsHvx || ScLen == 4 || ScLen == 8);
781 Type *SecTy = HVC.getByteTy(ScLen);
782 int NumSectors = (VSpan.extent() + ScLen - 1) / ScLen;
783 bool DoAlign = !HVC.isZero(AlignVal);
787 auto *True = HVC.getFullValue(HVC.getBoolTy(ScLen));
790 for (
int i = 0;
i != NumSectors + DoAlign; ++
i) {
791 Value *Ptr = createAdjustedPointer(
Builder, AlignAddr, SecTy,
i * ScLen);
796 int Start = (
i - DoAlign) * ScLen;
797 int Width = (1 + DoAlign) * ScLen;
799 VSpan.section(Start,
Width).values());
800 ASpan.Blocks.emplace_back(
Load, ScLen,
i * ScLen);
804 for (
int j = 0;
j != NumSectors; ++
j) {
805 ASpan[
j].Seg.Val = HVC.vralignb(
Builder, ASpan[
j].Seg.Val,
806 ASpan[
j + 1].Seg.Val, AlignVal);
810 for (ByteSpan::Block &
B : VSpan) {
811 ByteSpan ASection = ASpan.section(
B.Pos,
B.Seg.Size).shift(-
B.Pos);
813 for (ByteSpan::Block &
S : ASection) {
816 HVC.insertb(
Builder, Accum, Pay,
S.Seg.Start,
S.Seg.Size,
S.Pos);
824 Type *ValTy = getPayload(
B.Seg.Val)->getType();
827 getPassThrough(
B.Seg.Val));
828 B.Seg.Val->replaceAllUsesWith(Sel);
832 ByteSpan ASpanV, ASpanM;
837 Type *Ty = Val->getType();
841 return Builder.CreateBitCast(Val, VecTy);
846 for (
int i = (DoAlign ? -1 : 0);
i != NumSectors + DoAlign; ++
i) {
849 ByteSpan VSection = VSpan.section(
i * ScLen, ScLen).shift(-
i * ScLen);
851 Value *AccumM = HVC.getNullValue(SecTy);
852 for (ByteSpan::Block &
S : VSection) {
853 Value *Pay = getPayload(
S.Seg.Val);
855 Pay->
getType(), HVC.getByteTy());
857 S.Seg.Start,
S.Seg.Size,
S.Pos);
859 S.Seg.Start,
S.Seg.Size,
S.Pos);
861 ASpanV.Blocks.emplace_back(AccumV, ScLen,
i * ScLen);
862 ASpanM.Blocks.emplace_back(AccumM, ScLen,
i * ScLen);
867 for (
int j = 1;
j != NumSectors + 2; ++
j) {
868 ASpanV[
j - 1].Seg.Val = HVC.vlalignb(
Builder, ASpanV[
j - 1].Seg.Val,
869 ASpanV[
j].Seg.Val, AlignVal);
870 ASpanM[
j - 1].Seg.Val = HVC.vlalignb(
Builder, ASpanM[
j - 1].Seg.Val,
871 ASpanM[
j].Seg.Val, AlignVal);
875 for (
int i = 0;
i != NumSectors + DoAlign; ++
i) {
876 Value *Ptr = createAdjustedPointer(
Builder, AlignAddr, SecTy,
i * ScLen);
877 Value *Val = ASpanV[
i].Seg.Val;
879 if (!HVC.isUndef(Val) && !HVC.isZero(
Mask)) {
884 int Start = (
i - DoAlign) * ScLen;
885 int Width = (1 + DoAlign) * ScLen;
887 VSpan.section(Start,
Width).values());
892 for (
auto *Inst : Move.Main)
893 Inst->eraseFromParent();
899 if (!createAddressGroups())
902 bool Changed =
false;
903 MoveList LoadGroups, StoreGroups;
905 for (
auto &
G : AddrGroups) {
910 for (
auto &M : LoadGroups)
912 for (
auto &M : StoreGroups)
915 for (
auto &M : LoadGroups)
916 Changed |= realignGroup(M);
917 for (
auto &M : StoreGroups)
918 Changed |= realignGroup(M);
926 if (!HST.useHVXOps())
929 bool Changed = AlignVectors(*this).run();
937 auto HexagonVectorCombine::getByteTy(
int ElemCount)
const ->
Type * {
945 auto HexagonVectorCombine::getBoolTy(
int ElemCount)
const ->
Type * {
953 auto HexagonVectorCombine::getConstInt(
int Val)
const ->
ConstantInt * {
958 if (
auto *
C = dyn_cast<Constant>(Val))
959 return C->isZeroValue();
963 auto HexagonVectorCombine::getIntValue(
const Value *Val)
const
965 if (
auto *CI = dyn_cast<ConstantInt>(Val))
966 return CI->getValue();
971 return isa<UndefValue>(Val);
974 auto HexagonVectorCombine::getSizeOf(
const Value *Val)
const ->
int {
975 return getSizeOf(Val->
getType());
978 auto HexagonVectorCombine::getSizeOf(
const Type *Ty)
const ->
int {
979 return DL.getTypeStoreSize(
const_cast<Type *
>(Ty)).getFixedValue();
982 auto HexagonVectorCombine::getAllocSizeOf(
const Type *Ty)
const ->
int {
983 return DL.getTypeAllocSize(
const_cast<Type *
>(Ty)).getFixedValue();
986 auto HexagonVectorCombine::getTypeAlignment(
Type *Ty)
const ->
int {
989 if (HST.isTypeForHVX(Ty))
990 return HST.getVectorLength();
991 return DL.getABITypeAlign(Ty).value();
994 auto HexagonVectorCombine::getNullValue(
Type *Ty)
const ->
Constant * {
997 if (
auto *VecTy = dyn_cast<VectorType>(Ty))
1002 auto HexagonVectorCombine::getFullValue(
Type *Ty)
const ->
Constant * {
1005 if (
auto *VecTy = dyn_cast<VectorType>(Ty))
1012 int Start,
int Length,
int Where)
const
1014 assert(isByteVecTy(Dst->getType()) && isByteVecTy(Src->getType()));
1015 int SrcLen = getSizeOf(Src);
1016 int DstLen = getSizeOf(Dst);
1017 assert(0 <= Start && Start + Length <= SrcLen);
1018 assert(0 <= Where && Where + Length <= DstLen);
1026 for (
int i = 0;
i != P2Len; ++
i) {
1030 (Where <=
i &&
i < Where + Length) ? P2Len + Start + (
i - Where) :
i;
1033 Value *P2Insert =
Builder.CreateShuffleVector(P2Dst, P2Src, SMask);
1039 assert(
Lo->getType() ==
Hi->getType() &&
"Argument type mismatch");
1043 int VecLen = getSizeOf(Hi);
1044 if (
auto IntAmt = getIntValue(Amt))
1045 return getElementRange(
Builder, Lo, Hi, VecLen - IntAmt->getSExtValue(),
1048 if (HST.isTypeForHVX(
Hi->getType())) {
1049 int HwLen = HST.getVectorLength();
1050 assert(VecLen == HwLen &&
"Expecting an exact HVX type");
1052 ? Intrinsic::hexagon_V6_vlalignb
1053 : Intrinsic::hexagon_V6_vlalignb_128B;
1054 return createHvxIntrinsic(
Builder, V6_vlalignb,
Hi->getType(),
1062 return Builder.CreateBitCast(Trunc,
Hi->getType());
1065 Value *Sub =
Builder.CreateSub(getConstInt(VecLen), Amt);
1066 return vralignb(
Builder, Lo, Hi, Sub);
1073 assert(
Lo->getType() ==
Hi->getType() &&
"Argument type mismatch");
1077 int VecLen = getSizeOf(Lo);
1078 if (
auto IntAmt = getIntValue(Amt))
1079 return getElementRange(
Builder, Lo, Hi, IntAmt->getSExtValue(), VecLen);
1081 if (HST.isTypeForHVX(
Lo->getType())) {
1082 int HwLen = HST.getVectorLength();
1083 assert(VecLen == HwLen &&
"Expecting an exact HVX type");
1084 Intrinsic::ID V6_valignb = HwLen == 64 ? Intrinsic::hexagon_V6_valignb
1085 : Intrinsic::hexagon_V6_valignb_128B;
1086 return createHvxIntrinsic(
Builder, V6_valignb,
Lo->getType(),
1094 return Builder.CreateBitCast(Trunc,
Lo->getType());
1101 Intrinsic::hexagon_S2_valignrb);
1103 return Builder.CreateBitCast(Call,
Lo->getType());
1113 std::vector<Value *> Work[2];
1114 int ThisW = 0, OtherW = 1;
1116 Work[ThisW].
assign(Vecs.begin(), Vecs.end());
1117 while (Work[ThisW].
size() > 1) {
1118 auto *Ty = cast<VectorType>(Work[ThisW].front()->
getType());
1119 int ElemCount = Ty->getElementCount().getFixedValue();
1120 SMask.
resize(ElemCount * 2);
1121 std::iota(SMask.begin(), SMask.end(), 0);
1123 Work[OtherW].clear();
1124 if (Work[ThisW].
size() % 2 != 0)
1126 for (
int i = 0,
e = Work[ThisW].
size();
i <
e;
i += 2) {
1128 Work[ThisW][
i + 1], SMask);
1129 Work[OtherW].push_back(Joined);
1137 SMask.
resize(Vecs.size() * getSizeOf(Vecs.front()->getType()));
1138 std::iota(SMask.begin(), SMask.end(), 0);
1139 Value *Total = Work[OtherW].front();
1140 return Builder.CreateShuffleVector(Total, SMask);
1146 auto *ValTy = cast<VectorType>(Val->
getType());
1147 assert(ValTy->getElementType() == Pad->getType());
1149 int CurSize = ValTy->getElementCount().getFixedValue();
1150 if (CurSize == NewSize)
1153 if (CurSize > NewSize)
1154 return getElementRange(
Builder, Val, Val, 0, NewSize);
1157 std::iota(SMask.begin(), SMask.begin() + CurSize, 0);
1158 std::fill(SMask.begin() + CurSize, SMask.end(), CurSize);
1159 Value *PadVec =
Builder.CreateVectorSplat(CurSize, Pad);
1160 return Builder.CreateShuffleVector(Val, PadVec, SMask);
1172 if (FromSTy == ToSTy)
1175 int FromSize = getSizeOf(FromSTy);
1176 int ToSize = getSizeOf(ToSTy);
1177 assert(FromSize % ToSize == 0 || ToSize % FromSize == 0);
1179 auto *MaskTy = cast<VectorType>(
Mask->getType());
1180 int FromCount = MaskTy->getElementCount().getFixedValue();
1181 int ToCount = (FromCount * FromSize) / ToSize;
1182 assert((FromCount * FromSize) % ToSize == 0);
1201 if (ScalarTy == getBoolTy())
1205 if (
auto *VecTy = dyn_cast<VectorType>(Bytes->
getType()))
1206 return Builder.CreateTrunc(Bytes, getBoolTy(getSizeOf(VecTy)));
1209 return Builder.CreateTrunc(Bytes, getBoolTy());
1216 if (ScalarTy == getByteTy())
1219 if (ScalarTy != getBoolTy())
1220 return Builder.CreateBitCast(Val, getByteTy(getSizeOf(Val)));
1222 if (
auto *VecTy = dyn_cast<VectorType>(Val->
getType()))
1224 return Builder.CreateSExt(Val, getByteTy());
1231 int HwLen = HST.getVectorLength();
1236 auto getTypeForIntrin = [&](
Type *Ty) ->
Type * {
1237 if (HST.isTypeForHVX(Ty,
true)) {
1238 Type *ElemTy = cast<VectorType>(Ty)->getElementType();
1241 if (ElemTy == BoolTy)
1252 Type *SrcTy = Val->getType();
1253 if (SrcTy == DestTy)
1255 if (HST.isTypeForHVX(SrcTy,
true)) {
1256 if (cast<VectorType>(SrcTy)->getElementType() == BoolTy) {
1260 ? Intrinsic::hexagon_V6_pred_typecast
1261 : Intrinsic::hexagon_V6_pred_typecast_128B;
1263 {DestTy, Val->getType()});
1264 return Builder.CreateCall(FI, {Val});
1267 return Builder.CreateBitCast(Val, DestTy);
1276 IntOps.push_back(getCast(
Builder, A, getTypeForIntrin(
A->getType())));
1281 if (CallTy == RetTy)
1284 assert(HST.isTypeForHVX(CallTy,
true));
1285 if (cast<VectorType>(CallTy)->getElementType() == BoolTy)
1286 return getCast(
Builder, Call, RetTy);
1287 return Builder.CreateBitCast(Call, RetTy);
1290 auto HexagonVectorCombine::calculatePointerDifference(
Value *Ptr0,
1297 I->eraseFromParent();
1302 #define CallBuilder(B, F) \
1305 if (auto *I = dyn_cast<Instruction>(V)) \
1306 B_.ToErase.push_back(I); \
1311 if (
auto *
I = dyn_cast<Instruction>(V)) {
1319 auto StripBitCast = [](
Value *V) {
1320 while (
auto *
C = dyn_cast<BitCastInst>(V))
1321 V =
C->getOperand(0);
1325 Ptr0 = StripBitCast(Ptr0);
1326 Ptr1 = StripBitCast(Ptr1);
1327 if (!isa<GetElementPtrInst>(Ptr0) || !isa<GetElementPtrInst>(Ptr1))
1330 auto *Gep0 = cast<GetElementPtrInst>(Ptr0);
1331 auto *Gep1 = cast<GetElementPtrInst>(Ptr1);
1332 if (Gep0->getPointerOperand() != Gep1->getPointerOperand())
1336 int Scale = getAllocSizeOf(Gep0->getSourceElementType());
1339 if (Gep0->getNumOperands() != 2 || Gep1->getNumOperands() != 2)
1342 Value *Idx0 = Gep0->getOperand(1);
1343 Value *Idx1 = Gep1->getOperand(1);
1346 if (
auto *Diff = dyn_cast<ConstantInt>(
1348 return Diff->getSExtValue() * Scale;
1361 if (
auto *
C = dyn_cast<ConstantInt>(SubU)) {
1362 Diff0 =
C->getSExtValue();
1372 if (
auto *
C = dyn_cast<ConstantInt>(SubK)) {
1373 Diff1 =
C->getSExtValue();
1378 return (Diff0 + Diff1) * Scale;
1383 template <
typename T>
1384 auto HexagonVectorCombine::isSafeToMoveBeforeInBB(
const Instruction &
In,
1389 if (
const auto *II = dyn_cast<IntrinsicInst>(&
I)) {
1390 switch (II->getIntrinsicID()) {
1391 case Intrinsic::masked_load:
1393 case Intrinsic::masked_store:
1402 assert(
Block.begin() == To ||
Block.end() == To || To->getParent() == &Block);
1404 if (isa<PHINode>(
In) || (To !=
Block.end() && isa<PHINode>(*To)))
1409 bool MayWrite =
In.mayWriteToMemory();
1410 auto MaybeLoc = getLocOrNone(
In);
1412 auto From =
In.getIterator();
1415 bool MoveUp = (To !=
Block.end() && To->comesBefore(&
In));
1417 MoveUp ? std::make_pair(To,
From) :
std::make_pair(
std::next(
From), To);
1418 for (
auto It = Range.first; It != Range.second; ++It) {
1423 if (
auto *II = dyn_cast<IntrinsicInst>(&
I)) {
1424 if (II->getIntrinsicID() == Intrinsic::assume)
1430 if (
auto *CB = dyn_cast<CallBase>(&
I)) {
1431 if (!CB->hasFnAttr(Attribute::WillReturn))
1433 if (!CB->hasFnAttr(Attribute::NoSync))
1436 if (
I.mayReadOrWriteMemory()) {
1437 auto MaybeLocI = getLocOrNone(
I);
1438 if (MayWrite ||
I.mayWriteToMemory()) {
1439 if (!MaybeLoc || !MaybeLocI)
1441 if (!
AA.isNoAlias(*MaybeLoc, *MaybeLocI))
1450 auto HexagonVectorCombine::isByteVecTy(
Type *Ty)
const ->
bool {
1451 if (
auto *VecTy = dyn_cast<VectorType>(Ty))
1452 return VecTy->getElementType() == getByteTy();
1456 auto HexagonVectorCombine::isSectorTy(
Type *Ty)
const ->
bool {
1457 if (!isByteVecTy(Ty))
1459 int Size = getSizeOf(Ty);
1460 if (HST.isTypeForHVX(Ty))
1461 return Size ==
static_cast<int>(HST.getVectorLength());
1467 Value *Hi,
int Start,
1468 int Length)
const ->
Value * {
1469 assert(0 <= Start && Start < Length);
1471 std::iota(SMask.begin(), SMask.end(), Start);
1472 return Builder.CreateShuffleVector(Lo, Hi, SMask);
1483 class HexagonVectorCombineLegacy :
public FunctionPass {
1489 StringRef getPassName()
const override {
return "Hexagon Vector Combine"; }
1502 if (skipFunction(
F))
1504 AliasAnalysis &
AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
1506 getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
F);
1507 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1509 getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
F);
1511 HexagonVectorCombine HVC(
F,
AA, AC, DT, TLI,
TM);
1520 "Hexagon Vector Combine",
false,
false)
1530 return new HexagonVectorCombineLegacy();