15#include "llvm/IR/IntrinsicsHexagon.h"
26#include <unordered_map>
30#define DEBUG_TYPE "hexagon-isel"
100enum class ColorKind {
None, Red, Black };
106 using MapType = std::map<Node, ColorKind>;
115 const MapType &colors()
const {
119 ColorKind other(ColorKind Color) {
120 if (Color == ColorKind::None)
121 return ColorKind::Red;
122 return Color == ColorKind::Red ? ColorKind::Black : ColorKind::Red;
130 std::set<Node> Needed;
132 using NodeSet = std::set<Node>;
133 std::map<Node,NodeSet> Edges;
137 return (Pos < Num/2) ? Pos + Num/2 : Pos - Num/2;
140 ColorKind getColor(
Node N) {
141 auto F = Colors.find(
N);
142 return F != Colors.end() ?
F->second : ColorKind::None;
145 std::pair<bool, ColorKind> getUniqueColor(
const NodeSet &Nodes);
152std::pair<bool, ColorKind> Coloring::getUniqueColor(
const NodeSet &Nodes) {
153 auto Color = ColorKind::None;
154 for (Node
N : Nodes) {
155 ColorKind ColorN = getColor(
N);
156 if (ColorN == ColorKind::None)
158 if (Color == ColorKind::None)
160 else if (Color != ColorKind::None && Color != ColorN)
161 return {
false, ColorKind::None };
163 return {
true, Color };
166void Coloring::build() {
168 for (
unsigned P = 0;
P != Order.size(); ++
P) {
172 Node PC = Order[conj(
P)];
178 for (
unsigned I = 0;
I != Order.size(); ++
I) {
179 if (!Needed.count(
I))
185 NodeSet &Is = Edges[
I];
191bool Coloring::color() {
192 SetVector<Node> FirstQ;
193 auto Enqueue = [
this,&FirstQ] (Node
N) {
196 for (
unsigned I = 0;
I != Q.
size(); ++
I) {
197 NodeSet &Ns = Edges[Q[
I]];
202 for (Node
N : Needed)
205 for (Node
N : FirstQ) {
208 NodeSet &Ns = Edges[
N];
209 auto P = getUniqueColor(Ns);
212 Colors[
N] = other(
P.second);
216 for (
auto E : Edges) {
218 if (!Needed.count(conj(
N)) || Colors.count(
N))
220 auto P = getUniqueColor(
E.second);
223 Colors[
N] = other(
P.second);
228 std::vector<Node> WorkQ;
229 for (
auto E : Edges) {
231 if (!Colors.count(
N))
235 for (Node
N : WorkQ) {
236 NodeSet &Ns = Edges[
N];
237 auto P = getUniqueColor(Ns);
239 Colors[
N] = other(
P.second);
245 ColorKind ColorN = other(ColorKind::None);
246 ColorKind ColorC = other(ColorN);
247 NodeSet &Cs = Edges[
C];
249 for (Node M : CopyNs) {
250 ColorKind ColorM = getColor(M);
251 if (ColorM == ColorC) {
264 for (
unsigned I = 0;
I != Order.size(); ++
I)
265 Colors.try_emplace(
I, ColorKind::None);
270#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
271void Coloring::dump()
const {
272 dbgs() <<
"{ Order: {";
273 for (Node
P : Order) {
280 dbgs() <<
" Needed: {";
281 for (Node
N : Needed)
285 dbgs() <<
" Edges: {\n";
286 for (
auto E : Edges) {
287 dbgs() <<
" " <<
E.first <<
" -> {";
288 for (
auto N :
E.second)
294 auto ColorKindToName = [](ColorKind
C) {
296 case ColorKind::None:
300 case ColorKind::Black:
306 dbgs() <<
" Colors: {\n";
307 for (
auto C : Colors)
308 dbgs() <<
" " <<
C.first <<
" -> " << ColorKindToName(
C.second) <<
"\n";
318 using Controls = std::vector<uint8_t>;
319 using ElemType = int;
320 static constexpr ElemType
Ignore = ElemType(-1);
336 unsigned S = Order.size();
340 Table.resize(Order.size());
341 for (RowType &Row : Table)
342 Row.resize(Mult*Log,
None);
345 void getControls(Controls &V,
unsigned StartAt, uint8_t Dir)
const {
346 unsigned Size = Order.size();
348 for (
unsigned I = 0;
I !=
Size; ++
I) {
350 for (
unsigned L = 0;
L != Log; ++
L) {
351 unsigned C = ctl(
I, StartAt+L) == Switch;
362 uint8_t ctl(ElemType Pos,
unsigned Step)
const {
363 return Table[Pos][Step];
365 unsigned size()
const {
368 unsigned steps()
const {
374 std::vector<ElemType> Order;
375 using RowType = std::vector<uint8_t>;
376 std::vector<RowType> Table;
379struct ForwardDeltaNetwork :
public PermNetwork {
382 bool run(Controls &V) {
383 if (!route(Order.data(), Table.data(),
size(), 0))
385 getControls(V, 0, Forward);
390 bool route(ElemType *
P, RowType *
T,
unsigned Size,
unsigned Step);
393struct ReverseDeltaNetwork :
public PermNetwork {
396 bool run(Controls &V) {
397 if (!route(Order.data(), Table.data(),
size(), 0))
404 bool route(ElemType *
P, RowType *
T,
unsigned Size,
unsigned Step);
407struct BenesNetwork :
public PermNetwork {
410 bool run(Controls &
F, Controls &R) {
411 if (!route(Order.data(), Table.data(),
size(), 0))
414 getControls(
F, 0, Forward);
420 bool route(ElemType *
P, RowType *
T,
unsigned Size,
unsigned Step);
424bool ForwardDeltaNetwork::route(ElemType *
P, RowType *
T,
unsigned Size,
426 bool UseUp =
false, UseDown =
false;
433 for (ElemType J = 0; J != Num; ++J) {
443 S = (J < Num/2) ? Switch :
Pass;
446 ElemType
U = (S ==
Pass) ?
I : (
I < Num/2 ?
I+Num/2 :
I-Num/2);
451 if (
T[U][Step] != S &&
T[U][Step] !=
None)
456 for (ElemType J = 0; J != Num; ++J)
457 if (
P[J] !=
Ignore &&
P[J] >= Num/2)
461 if (UseUp && !route(
P,
T,
Size/2, Step+1))
469bool ReverseDeltaNetwork::route(ElemType *
P, RowType *
T,
unsigned Size,
471 unsigned Pets = Log-1 - Step;
472 bool UseUp =
false, UseDown =
false;
477 const Coloring::MapType &
M =
G.colors();
481 ColorKind ColorUp = ColorKind::None;
482 for (ElemType J = 0; J != Num; ++J) {
488 ColorKind
C =
M.at(
I);
489 if (
C == ColorKind::None)
494 bool InpUp =
I < Num/2;
495 if (ColorUp == ColorKind::None)
496 ColorUp = InpUp ?
C :
G.other(
C);
497 if ((
C == ColorUp) != InpUp) {
507 S = (J < Num/2) ? Switch :
Pass;
515 for (ElemType J = 0,
E =
Size / 2; J !=
E; ++J) {
517 ElemType PC =
P[J+
Size/2];
520 if (
T[J][Pets] == Switch)
522 if (
T[J+
Size/2][Pets] == Switch)
528 for (ElemType J = 0; J != Num; ++J)
529 if (
P[J] !=
Ignore &&
P[J] >= Num/2)
533 if (UseUp && !route(
P,
T,
Size/2, Step+1))
541bool BenesNetwork::route(ElemType *
P, RowType *
T,
unsigned Size,
544 const Coloring::MapType &
M =
G.colors();
549 unsigned Pets = 2*Log-1 - Step;
550 bool UseUp =
false, UseDown =
false;
555 ColorKind ColorUp = ColorKind::None;
556 for (ElemType J = 0; J != Num; ++J) {
560 ColorKind
C =
M.at(
I);
561 if (
C == ColorKind::None)
563 if (ColorUp == ColorKind::None) {
564 ColorUp = (
I < Num / 2) ? ColorKind::Red : ColorKind::Black;
566 unsigned CI = (
I < Num/2) ?
I+Num/2 :
I-Num/2;
579 T[J][Pets] = (J < Num/2) ? Switch :
Pass;
586 for (ElemType J = 0; J != Num/2; ++J) {
588 ElemType PC =
P[J+Num/2];
591 if (
T[J][Pets] == Switch)
593 if (
T[J+Num/2][Pets] == Switch)
599 for (ElemType J = 0; J != Num; ++J)
600 if (
P[J] !=
Ignore &&
P[J] >= Num/2)
604 if (UseUp && !route(
P,
T,
Size/2, Step+1))
619 bool isValue()
const {
return OpV.
getNode() !=
nullptr; }
621 bool isUndef()
const {
return OpN & Undef; }
622 static OpRef res(
int N) {
return OpRef(Whole | (
N & Index)); }
625 static OpRef
lo(
const OpRef &R) {
627 return OpRef(
R.OpN & (Undef | Index | LoHalf));
629 static OpRef
hi(
const OpRef &R) {
631 return OpRef(
R.OpN & (Undef | Index | HiHalf));
633 static OpRef undef(MVT Ty) {
return OpRef(Undef | Ty.
SimpleTy); }
649 Whole = LoHalf | HiHalf,
656 void print(raw_ostream &OS,
const SelectionDAG &
G)
const;
659 OpRef(
unsigned N) : OpN(
N) {}
663 NodeTemplate() =
default;
666 std::vector<OpRef>
Ops;
672 ResultStack(SDNode *Inp)
676 unsigned push(
const NodeTemplate &Res) {
678 return List.size()-1;
680 unsigned push(
unsigned Opc, MVT Ty, std::vector<OpRef> &&
Ops) {
687 bool empty()
const {
return List.empty(); }
688 unsigned size()
const {
return List.size(); }
689 unsigned top()
const {
return size()-1; }
690 const NodeTemplate &operator[](
unsigned I)
const {
return List[
I]; }
691 unsigned reset(
unsigned NewTop) {
692 List.resize(NewTop+1);
696 using BaseType = std::vector<NodeTemplate>;
697 BaseType::iterator
begin() {
return List.begin(); }
698 BaseType::iterator
end() {
return List.end(); }
699 BaseType::const_iterator
begin()
const {
return List.begin(); }
700 BaseType::const_iterator
end()
const {
return List.end(); }
705 void print(raw_ostream &OS,
const SelectionDAG &
G)
const;
709#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
710void OpRef::print(raw_ostream &OS,
const SelectionDAG &
G)
const {
723 if ((OpN & Whole) != Whole) {
724 assert((OpN & Whole) == LoHalf || (OpN & Whole) == HiHalf);
733void NodeTemplate::print(raw_ostream &OS,
const SelectionDAG &
G)
const {
734 const TargetInstrInfo &
TII = *
G.getSubtarget().getInstrInfo();
735 OS <<
format(
"%8s", EVT(Ty).getEVTString().
c_str()) <<
" "
738 for (
const auto &R :
Ops) {
747void ResultStack::print(raw_ostream &OS,
const SelectionDAG &
G)
const {
748 OS <<
"Input node:\n";
752 OS <<
"Result templates:\n";
753 for (
unsigned I = 0,
E =
List.size();
I !=
E; ++
I) {
754 OS <<
'[' <<
I <<
"] ";
763 ShuffleMask(ArrayRef<int> M) : Mask(
M) {
767 MinSrc = (MinSrc == -1) ? M : std::min(MinSrc,
M);
768 MaxSrc = (MaxSrc == -1) ?
M : std::max(MaxSrc, M);
773 int MinSrc = -1, MaxSrc = -1;
775 ShuffleMask
lo()
const {
776 size_t H =
Mask.size()/2;
777 return ShuffleMask(
Mask.take_front(
H));
779 ShuffleMask
hi()
const {
780 size_t H =
Mask.size()/2;
781 return ShuffleMask(
Mask.take_back(
H));
784 void print(raw_ostream &OS)
const {
785 OS <<
"MinSrc:" << MinSrc <<
", MaxSrc:" << MaxSrc <<
" {";
793raw_ostream &
operator<<(raw_ostream &OS,
const ShuffleMask &SM) {
823 for (
int i = 0; i != Len; ++i) {
843 for (
int i = 0; i != Len; ++i) {
854 auto Odd =
static_cast<int>(TakeOdd);
855 for (
int i = 0, e = Len / (2 *
Size); i != e; ++i) {
856 for (
int b = 0; b !=
static_cast<int>(
Size); ++b) {
858 Vd[i *
Size + b] = Vv[(2 * i + Odd) *
Size + b];
859 Vd[i *
Size + b + Len / 2] = Vu[(2 * i + Odd) *
Size + b];
869 auto Odd =
static_cast<int>(TakeOdd);
870 for (
int i = 0, e = Len / (2 *
Size); i != e; ++i) {
871 for (
int b = 0; b !=
static_cast<int>(
Size); ++b) {
872 Vd[(2 * i + 0) *
Size + b] = Vv[(2 * i + Odd) *
Size + b];
873 Vd[(2 * i + 1) *
Size + b] = Vu[(2 * i + Odd) *
Size + b];
888 for (
int i = 0, e = Len / 4; i != e; ++i) {
889 Vd[0 * (Len / 4) + i] = Vv[4 * i + 0];
890 Vd[1 * (Len / 4) + i] = Vv[4 * i + 2];
891 Vd[2 * (Len / 4) + i] = Vu[4 * i + 0];
892 Vd[3 * (Len / 4) + i] = Vu[4 * i + 2];
897template <
typename ShuffFunc,
typename... OptArgs>
900 std::iota(Vu.begin(), Vu.end(),
Length);
902 return S(Vu, Vv,
args...);
930 assert(ElemTy != MVT::i1 &&
"Use getBoolVT for predicates");
931 unsigned NumElems =
HwLen / (ElemTy.getSizeInBits() / 8);
936 assert(ElemTy != MVT::i1);
937 unsigned NumElems = (2 *
HwLen) / (ElemTy.getSizeInBits() / 8);
946 void selectExtractSubvector(
SDNode *
N);
955 static std::optional<int> rotationDistance(ShuffleMask SM,
unsigned WrapAt);
958 void select(
SDNode *ISelN);
959 void materialize(
const ResultStack &
Results);
969 OpRef concats(OpRef Va, OpRef Vb, ResultStack &
Results);
970 OpRef funnels(OpRef Va, OpRef Vb,
int Amount, ResultStack &
Results);
972 OpRef packs(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &
Results,
974 OpRef packp(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &
Results,
981 OpRef shuffs1(ShuffleMask SM, OpRef Va, ResultStack &
Results);
982 OpRef shuffs2(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &
Results);
983 OpRef shuffp1(ShuffleMask SM, OpRef Va, ResultStack &
Results);
984 OpRef shuffp2(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &
Results);
986 OpRef butterfly(ShuffleMask SM, OpRef Va, ResultStack &
Results);
987 OpRef contracting(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &
Results);
988 OpRef expanding(ShuffleMask SM, OpRef Va, ResultStack &
Results);
989 OpRef perfect(ShuffleMask SM, OpRef Va, ResultStack &
Results);
991 bool selectVectorConstants(
SDNode *
N);
999 unsigned VecLen = Mask.size();
1001 for (
unsigned I = 0;
I != VecLen; ++
I) {
1004 MaskL[
I] = MaskR[
I] = -1;
1005 }
else if (
unsigned(M) < VecLen) {
1010 MaskR[
I] = M-VecLen;
1017 assert(
A.size() > 0 &&
A.size() >= MaxLen);
1020 for (
unsigned I = 1;
I != MaxLen; ++
I) {
1021 if (
A[
I] -
E != Inc)
1025 return {
F, MaxLen };
1029 for (
int Idx : Mask)
1036 for (
int I = 0,
E = Mask.size();
I !=
E; ++
I) {
1038 if (M >= 0 && M !=
I)
1045 int L = Mask.size();
1048 return llvm::all_of(Mask.drop_front(L / 2), [](
int M) { return M < 0; });
1055 if (SM.MaxSrc == -1)
1058 unsigned Shift =
Log2_32(SegLen);
1061 for (
int M : SM.Mask) {
1063 Segs.
set(M >> Shift);
1081 unsigned MaskLen = SM.Mask.
size();
1082 assert(MaskLen % SegLen == 0);
1085 for (
int S = 0,
E = Map.size(); S !=
E; ++S) {
1087 for (
int I = 0;
I !=
static_cast<int>(SegLen); ++
I) {
1088 int M = SM.Mask[S*SegLen +
I];
1091 unsigned G = M / SegLen;
1094 }
else if (Idx !=
G) {
1108 for (
int I = OutSegMap.
size() - 1;
I >= 0; --
I) {
1109 unsigned S = OutSegMap[
I];
1110 assert(S != ~0u &&
"Unexpected undef");
1111 assert(S != ~1u &&
"Unexpected multi");
1112 if (InvMap.
size() <= S)
1117 unsigned Shift =
Log2_32(SegLen);
1118 for (
int I = 0,
E = Mask.size();
I !=
E; ++
I) {
1121 int OutIdx = InvMap[M >> Shift];
1122 M = (M & (SegLen-1)) + SegLen*OutIdx;
1128bool HvxSelector::selectVectorConstants(SDNode *
N) {
1134 SetVector<SDNode*> WorkQ;
1139 for (
unsigned i = 0; i != WorkQ.
size(); ++i) {
1140 SDNode *
W = WorkQ[i];
1143 for (
unsigned j = 0, f =
W->getNumOperands(); j != f; ++j)
1144 WorkQ.
insert(
W->getOperand(j).getNode());
1147 for (SDNode *L : Nodes)
1150 return !Nodes.empty();
1153void HvxSelector::materialize(
const ResultStack &
Results) {
1155 dbgs() <<
"Materializing\n";
1160 const SDLoc &dl(
Results.InpNode);
1161 std::vector<SDValue> Output;
1165 std::vector<SDValue>
Ops;
1166 for (
const OpRef &R :
Node.Ops) {
1169 Ops.push_back(
R.OpV);
1172 if (
R.OpN & OpRef::Undef) {
1174 Ops.push_back(ISel.selectUndef(dl, MVT(SVT)));
1178 unsigned Part =
R.OpN & OpRef::Whole;
1179 int Idx =
SignExtend32(
R.OpN & OpRef::Index, OpRef::IndexBits);
1182 assert(Idx >= 0 &&
unsigned(Idx) < Output.size());
1184 MVT OpTy =
Op.getValueType().getSimpleVT();
1185 if (Part != OpRef::Whole) {
1186 assert(Part == OpRef::LoHalf || Part == OpRef::HiHalf);
1189 unsigned Sub = (Part == OpRef::LoHalf) ? Hexagon::vsub_lo
1191 Op = DAG.getTargetExtractSubreg(
Sub, dl, HalfTy,
Op);
1197 SDNode *ResN = (
Node.Opc == TargetOpcode::COPY)
1198 ?
Ops.front().getNode()
1200 Output.push_back(
SDValue(ResN, 0));
1203 SDNode *OutN = Output.back().getNode();
1204 SDNode *InpN =
Results.InpNode;
1206 dbgs() <<
"Generated node:\n";
1210 ISel.ReplaceNode(InpN, OutN);
1211 selectVectorConstants(OutN);
1212 DAG.RemoveDeadNodes();
1215OpRef HvxSelector::concats(OpRef
Lo, OpRef
Hi, ResultStack &
Results) {
1217 const SDLoc &dl(
Results.InpNode);
1218 Results.push(TargetOpcode::REG_SEQUENCE, getPairVT(MVT::i8), {
1219 getConst32(Hexagon::HvxWRRegClassID, dl),
1220 Lo, getConst32(Hexagon::vsub_lo, dl),
1221 Hi, getConst32(Hexagon::vsub_hi, dl),
1223 return OpRef::res(
Results.top());
1226OpRef HvxSelector::funnels(OpRef Va, OpRef Vb,
int Amount,
1231 auto VecLen =
static_cast<int>(HwLen);
1235 if (Amount == VecLen)
1238 MVT Ty = getSingleVT(MVT::i8);
1239 const SDLoc &dl(
Results.InpNode);
1243 if (Amount > VecLen) {
1249 SDValue A = getConst32(Amount, dl);
1250 Results.push(Hexagon::V6_valignbi, Ty, {Vb, Va,
A});
1251 }
else if (
isUInt<3>(VecLen - Amount)) {
1252 SDValue A = getConst32(VecLen - Amount, dl);
1253 Results.push(Hexagon::V6_vlalignbi, Ty, {Vb, Va,
A});
1255 SDValue A = getConst32(Amount, dl);
1256 Results.push(Hexagon::A2_tfrsi, Ty, {
A});
1257 Results.push(Hexagon::V6_valignb, Ty, {Vb, Va, OpRef::res(-1)});
1259 return OpRef::res(
Results.top());
1265OpRef HvxSelector::packs(ShuffleMask SM, OpRef Va, OpRef Vb,
1269 if (!Va.isValid() || !Vb.isValid())
1270 return OpRef::fail();
1282 MVT Ty = getSingleVT(MVT::i8);
1283 MVT
PairTy = getPairVT(MVT::i8);
1284 OpRef Inp[2] = {Va, Vb};
1285 unsigned VecLen = SM.Mask.
size();
1287 auto valign = [
this](OpRef
Lo, OpRef
Hi,
unsigned Amt, MVT Ty,
1291 const SDLoc &dl(
Results.InpNode);
1294 SDValue S = getConst32(IsRight ? Amt : HwLen - Amt, dl);
1295 unsigned Opc = IsRight ? Hexagon::V6_valignbi : Hexagon::V6_vlalignbi;
1297 return OpRef::res(
Results.top());
1299 Results.push(Hexagon::A2_tfrsi, MVT::i32, {getConst32(Amt, dl)});
1300 OpRef
A = OpRef::res(
Results.top());
1302 return OpRef::res(
Results.top());
1306 unsigned SegLen = HwLen / 2;
1312 unsigned SegCount = SegList.
size();
1315 if (SegList.
empty())
1316 return OpRef::undef(Ty);
1334 unsigned Seg0 = ~0
u, Seg1 = ~0
u;
1335 for (
unsigned X : SegMap) {
1340 else if (Seg1 != ~0u)
1342 if (
X == ~1u ||
X != Seg0)
1346 if (SegCount == 1) {
1347 unsigned SrcOp = SegList[0] / 2;
1348 for (
int I = 0;
I !=
static_cast<int>(VecLen); ++
I) {
1359 if (SegCount == 2) {
1366 if (Seg0 == ~1u || Seg1 == ~1u) {
1370 }
else if (Seg0 == ~1u) {
1371 Seg0 = SegList[0] != Seg1 ? SegList[0] : SegList[1];
1374 Seg1 = SegList[0] != Seg0 ? SegList[0] : SegList[1];
1377 assert(Seg0 != ~1u && Seg1 != ~1u);
1379 assert(Seg0 != Seg1 &&
"Expecting different segments");
1380 const SDLoc &dl(
Results.InpNode);
1381 Results.push(Hexagon::A2_tfrsi, MVT::i32, {getConst32(SegLen, dl)});
1382 OpRef HL = OpRef::res(
Results.top());
1386 if (Seg0 / 2 == Seg1 / 2) {
1391 Results.push(Hexagon::V6_vror, Ty, {Inp[Seg0 / 2], HL});
1392 Va = OpRef::res(
Results.top());
1395 }
else if (Seg0 % 2 == Seg1 % 2) {
1399 auto Vs = (Seg0 == 0 || Seg0 == 1) ? std::make_pair(Vb, Va)
1400 : std::make_pair(Va, Vb);
1401 Results.push(Hexagon::V6_vshuffvdd,
PairTy, {Vs.first, Vs.second, HL});
1402 OpRef
P = OpRef::res(
Results.top());
1403 Va = (Seg0 == 0 || Seg0 == 2) ? OpRef::lo(
P) : OpRef::hi(
P);
1407 if ((Seg0 == 0 && Seg1 == 3) || (Seg0 == 2 && Seg1 == 1)) {
1411 Results.push(Hexagon::V6_pred_scalar2, getBoolVT(), {HL});
1412 OpRef Qt = OpRef::res(
Results.top());
1413 auto Vs = (Seg0 == 0) ? std::make_pair(Va, Vb)
1414 : std::make_pair(Vb, Va);
1415 Results.push(Hexagon::V6_vmux, Ty, {Qt, Vs.first, Vs.second});
1416 Va = OpRef::res(
Results.top());
1422 assert(Seg0 == 1 || Seg0 == 3);
1429 ShuffleMask SMH(MaskH);
1430 assert(SMH.Mask.size() == VecLen);
1433 if (SMH.MaxSrc - SMH.MinSrc >=
static_cast<int>(HwLen)) {
1437 ShuffleMask SW(Swapped);
1438 if (SW.MaxSrc - SW.MinSrc <
static_cast<int>(HwLen)) {
1439 MaskA.assign(SW.Mask.begin(), SW.Mask.end());
1443 ShuffleMask SMA(MaskA);
1444 assert(SMA.Mask.size() == VecLen);
1446 if (SMA.MaxSrc - SMA.MinSrc <
static_cast<int>(HwLen)) {
1447 int ShiftR = SMA.MinSrc;
1448 if (ShiftR >=
static_cast<int>(HwLen)) {
1450 Vb = OpRef::undef(Ty);
1453 OpRef RetVal = valign(Va, Vb, ShiftR, Ty,
Results);
1455 for (
int I = 0;
I !=
static_cast<int>(VecLen); ++
I) {
1456 int M = SMA.Mask[
I];
1472 BitVector Picked(HwLen);
1473 SmallVector<uint8_t,128> MuxBytes(HwLen);
1475 for (
int I = 0;
I !=
static_cast<int>(VecLen); ++
I) {
1479 if (M >=
static_cast<int>(HwLen))
1490 return vmuxs(MuxBytes, Va, Vb,
Results);
1492 return OpRef::fail();
1498OpRef HvxSelector::packp(ShuffleMask SM, OpRef Va, OpRef Vb,
1502 if (SegList.empty())
1503 return OpRef::undef(getPairVT(MVT::i8));
1507 unsigned SegCount = SegList.size();
1509 return OpRef::fail();
1511 MVT HalfTy = getSingleVT(MVT::i8);
1513 OpRef Inp[2] = { Va, Vb };
1514 OpRef Out[2] = { OpRef::undef(HalfTy), OpRef::undef(HalfTy) };
1519 for (
int I = 0,
E = SegList.size();
I !=
E; ++
I) {
1520 unsigned S = SegList[
I];
1521 OpRef
Op = Inp[S / 2];
1522 Out[
I] = (S & 1) ? OpRef::hi(
Op) : OpRef::lo(
Op);
1532 return concats(Out[0], Out[1],
Results);
1535OpRef HvxSelector::vmuxs(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,
1538 MVT ByteTy = getSingleVT(MVT::i8);
1540 const SDLoc &dl(
Results.InpNode);
1541 SDValue B = getVectorConstant(Bytes, dl);
1542 Results.push(Hexagon::V6_vd0, ByteTy, {});
1543 Results.push(Hexagon::V6_veqb, BoolTy, {OpRef(
B), OpRef::res(-1)});
1544 Results.push(Hexagon::V6_vmux, ByteTy, {OpRef::res(-1), Vb, Va});
1545 return OpRef::res(
Results.top());
1548OpRef HvxSelector::vmuxp(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,
1551 size_t S = Bytes.
size() / 2;
1557OpRef HvxSelector::shuffs1(ShuffleMask SM, OpRef Va, ResultStack &
Results) {
1559 unsigned VecLen = SM.Mask.
size();
1562 assert(
all_of(SM.Mask, [
this](
int M) { return M == -1 || M < int(HwLen); }));
1567 return OpRef::undef(getSingleVT(MVT::i8));
1570 if (
auto Dist = rotationDistance(SM, VecLen)) {
1571 OpRef Rotate = funnels(Va, Va, *Dist,
Results);
1572 if (Rotate.isValid())
1575 unsigned HalfLen = HwLen / 2;
1580 std::pair<int, unsigned> Strip1 =
findStrip(SM.Mask, 1, HalfLen);
1581 if ((Strip1.first & ~HalfLen) == 0 && Strip1.second == HalfLen) {
1582 std::pair<int, unsigned> Strip2 =
1584 if (Strip1 == Strip2) {
1585 const SDLoc &dl(
Results.InpNode);
1586 Results.push(Hexagon::A2_tfrsi, MVT::i32, {getConst32(HalfLen, dl)});
1587 Results.push(Hexagon::V6_vshuffvdd, getPairVT(MVT::i8),
1588 {Va, Va, OpRef::res(
Results.top())});
1589 OpRef S = OpRef::res(
Results.top());
1590 return (Strip1.first == 0) ? OpRef::lo(S) : OpRef::
hi(S);
1594 OpRef
P = perfect(SM, Va,
Results);
1597 return butterfly(SM, Va,
Results);
1600OpRef HvxSelector::shuffs2(ShuffleMask SM, OpRef Va, OpRef Vb,
1604 return OpRef::undef(getSingleVT(MVT::i8));
1606 OpRef
C = contracting(SM, Va, Vb,
Results);
1610 int VecLen = SM.Mask.
size();
1612 OpRef
P = packs(SM, Va, Vb,
Results, PackedMask);
1614 return shuffs1(ShuffleMask(PackedMask),
P,
Results);
1622 OpRef
L = shuffs1(ShuffleMask(MaskL), Va,
Results);
1623 OpRef
R = shuffs1(ShuffleMask(MaskR), Vb,
Results);
1624 if (!
L.isValid() || !
R.isValid())
1625 return OpRef::fail();
1627 SmallVector<uint8_t, 128> Bytes(VecLen);
1628 for (
int I = 0;
I != VecLen; ++
I) {
1632 return vmuxs(Bytes, L, R,
Results);
1635OpRef HvxSelector::shuffp1(ShuffleMask SM, OpRef Va, ResultStack &
Results) {
1637 int VecLen = SM.Mask.
size();
1642 return OpRef::undef(getPairVT(MVT::i8));
1645 OpRef
P = packs(SM, OpRef::lo(Va), OpRef::hi(Va),
Results, PackedMask);
1647 ShuffleMask PM(PackedMask);
1654 if (
L.isValid() &&
H.isValid())
1664 OpRef
R = perfect(SM, Va,
Results);
1670 OpRef
L = shuffs2(SM.lo(), OpRef::lo(Va), OpRef::hi(Va),
Results);
1671 OpRef
H = shuffs2(SM.hi(), OpRef::lo(Va), OpRef::hi(Va),
Results);
1672 if (
L.isValid() &&
H.isValid())
1675 return OpRef::fail();
1678OpRef HvxSelector::shuffp2(ShuffleMask SM, OpRef Va, OpRef Vb,
1682 return OpRef::undef(getPairVT(MVT::i8));
1684 int VecLen = SM.Mask.
size();
1686 OpRef
P = packp(SM, Va, Vb,
Results, PackedMask);
1688 return shuffp1(ShuffleMask(PackedMask),
P,
Results);
1693 OpRef
L = shuffp1(ShuffleMask(MaskL), Va,
Results);
1694 OpRef
R = shuffp1(ShuffleMask(MaskR), Vb,
Results);
1695 if (!
L.isValid() || !
R.isValid())
1696 return OpRef::fail();
1700 for (
int I = 0;
I != VecLen; ++
I) {
1704 return vmuxp(Bytes, L, R,
Results);
1708 struct Deleter :
public SelectionDAG::DAGNodeDeletedListener {
1709 template <
typename T>
1710 Deleter(SelectionDAG &
D,
T &
C)
1711 : SelectionDAG::DAGNodeDeletedListener(
D, [&
C] (SDNode *
N, SDNode *
E) {
1716 template <
typename T>
1717 struct NullifyingVector :
public T {
1718 DenseMap<SDNode*, SDNode**> Refs;
1719 NullifyingVector(
T &&V) :
T(
V) {
1720 for (
unsigned i = 0, e = T::size(); i !=
e; ++i) {
1721 SDNode *&
N = T::operator[](i);
1727 if (
F != Refs.
end())
1728 *
F->second =
nullptr;
1733void HvxSelector::select(SDNode *ISelN) {
1750 DAG.RemoveDeadNodes();
1752 SetVector<SDNode *> SubNodes;
1761 if (SubNodes.
empty()) {
1762 ISel.ReplaceNode(ISelN, N0);
1769 SetVector<SDNode*> Dom, NonDom;
1772 auto IsDomRec = [&Dom, &NonDom] (SDNode *
T,
auto Rec) ->
bool {
1775 if (
T->use_empty() || NonDom.
count(
T))
1777 for (SDNode *U :
T->users()) {
1789 auto IsDom = [&IsDomRec] (SDNode *
T) {
return IsDomRec(
T, IsDomRec); };
1792 for (
unsigned I = 0;
I != SubNodes.
size(); ++
I) {
1794 SDNode *
O =
Op.getNode();
1801 SetVector<SDNode*> TmpQ;
1803 std::map<SDNode *, unsigned> OpCount;
1804 for (SDNode *
T : Dom) {
1805 unsigned NumDomOps =
llvm::count_if(
T->ops(), [&Dom](
const SDUse &U) {
1806 return Dom.count(U.getNode());
1809 OpCount.insert({
T, NumDomOps});
1814 for (
unsigned I = 0;
I != TmpQ.
size(); ++
I) {
1815 SDNode *S = TmpQ[
I];
1816 for (SDNode *U : S->
users()) {
1819 auto F = OpCount.find(U);
1821 if (
F->second > 0 && !--
F->second)
1827 ISel.ReplaceNode(ISelN, N0);
1830 NullifyingVector<
decltype(TmpQ)::vector_type>
Queue(TmpQ.
takeVector());
1832 Deleter DUQ(DAG, Queue);
1833 for (SDNode *S :
reverse(Queue)) {
1841bool HvxSelector::scalarizeShuffle(ArrayRef<int> Mask,
const SDLoc &dl,
1846 assert(ElemTy == MVT::i8);
1847 unsigned VecLen =
Mask.size();
1848 bool HavePairs = (2*HwLen == VecLen);
1849 MVT SingleTy = getSingleVT(MVT::i8);
1865 LLVMContext &Ctx = *DAG.getContext();
1866 MVT LegalTy =
Lower.getTypeToTransformTo(Ctx, ElemTy).getSimpleVT();
1867 for (
int I : Mask) {
1869 Ops.push_back(ISel.selectUndef(dl, LegalTy));
1882 Vec = DAG.getTargetExtractSubreg(Hexagon::vsub_lo, dl, SingleTy, Vec);
1884 Vec = DAG.getTargetExtractSubreg(Hexagon::vsub_hi, dl, SingleTy, Vec);
1888 SDValue Idx = DAG.getConstant(M, dl, MVT::i32);
1896 if (2*HwLen == VecLen) {
1897 SDValue B0 = DAG.getBuildVector(SingleTy, dl, {
Ops.data(), HwLen});
1899 SDValue B1 = DAG.getBuildVector(SingleTy, dl, {
Ops.data()+HwLen, HwLen});
1907 SDValue BV = DAG.getBuildVector(ResTy, dl,
Ops);
1908 LV =
Lower.LowerOperation(BV, DAG);
1915 DAG.RemoveDeadNodes();
1922 unsigned Impossible = ~(1u << Width) + 1;
1923 for (
unsigned I = 0, E = Bs.
size();
I != E; ++
I) {
1927 if (~Impossible == 0)
1929 for (
unsigned Log = 0; Log != Width; ++Log) {
1930 if (Impossible & (1u << Log))
1934 Impossible |= (1u << Log);
1942 for (
unsigned BitIdx = 0; BitIdx != Width; ++BitIdx) {
1944 for (
int i = 0, e = SM.Mask.
size(); i != e; ++i) {
1947 BitValues[i] = 0xff;
1949 BitValues[i] = (M & (1u << BitIdx)) != 0;
1951 Worklist[BitIdx] = possibilities(BitValues, Width);
1993 for (
unsigned I = 0, E = Sorted.
size();
I != E;) {
1994 unsigned P = Sorted[
I],
Count = 1;
1995 while (++
I != E &&
P == Sorted[
I])
2013 for (
unsigned I = 0;
I != Width; ++
I) {
2021 for (
unsigned J =
I + 1; J != Width; ++J) {
2035 assert(OrAll == (1u << Width) -1);
2043 std::optional<int> Dist;
2044 for (
int I = 0, E = SM.Mask.
size();
I != E; ++
I) {
2049 if ((
I + *Dist) %
static_cast<int>(WrapAt) != M)
2050 return std::nullopt;
2057 Dist = *Dist + WrapAt;
2063OpRef HvxSelector::contracting(ShuffleMask SM, OpRef Va, OpRef Vb,
2066 if (!Va.isValid() || !Vb.isValid())
2067 return OpRef::fail();
2078 int VecLen = SM.Mask.
size();
2081 if (
auto Dist = rotationDistance(SM, 2 * VecLen)) {
2082 OpRef Funnel = funnels(Va, Vb, *Dist,
Results);
2083 if (Funnel.isValid())
2087 MVT SingleTy = getSingleVT(MVT::i8);
2088 MVT
PairTy = getPairVT(MVT::i8);
2090 auto same = [](ArrayRef<int> Mask1, ArrayRef<int> Mask2) ->
bool {
2091 return Mask1 == Mask2;
2094 using PackConfig = std::pair<unsigned, bool>;
2095 PackConfig Packs[] = {
2103 unsigned Opcodes[] = {
2104 Hexagon::V6_vpackeb,
2105 Hexagon::V6_vpackob,
2106 Hexagon::V6_vpackeh,
2107 Hexagon::V6_vpackoh,
2109 for (
int i = 0, e = std::size(Opcodes); i !=
e; ++i) {
2110 auto [
Size, Odd] = Packs[i];
2112 Results.push(Opcodes[i], SingleTy, {Vb, Va});
2113 return OpRef::res(
Results.top());
2119 unsigned Opcodes[] = {
2120 Hexagon::V6_vshuffeb,
2121 Hexagon::V6_vshuffob,
2122 Hexagon::V6_vshufeh,
2123 Hexagon::V6_vshufoh,
2125 for (
int i = 0, e = std::size(Opcodes); i !=
e; ++i) {
2126 auto [
Size, Odd] = Packs[i];
2128 Results.push(Opcodes[i], SingleTy, {Vb, Va});
2129 return OpRef::res(
Results.top());
2138 unsigned Opcodes[] = {
2139 Hexagon::V6_vpackeb,
2140 Hexagon::V6_vpackob,
2141 Hexagon::V6_vpackeh,
2142 Hexagon::V6_vpackoh,
2144 const SDLoc &dl(
Results.InpNode);
2146 for (
int i = 0, e = std::size(Opcodes); i !=
e; ++i) {
2147 auto [
Size, Odd] = Packs[i];
2149 Results.push(Hexagon::A2_tfrsi, MVT::i32,
2150 {getSignedConst32(-2 *
Size, dl)});
2151 Results.push(Hexagon::V6_vdealvdd,
PairTy, {Vb, Va, OpRef::res(-1)});
2153 Results.push(Opcodes[i], SingleTy,
2154 {OpRef::hi(vdeal), OpRef::lo(vdeal)});
2155 return OpRef::res(
Results.top());
2161 Results.push(Hexagon::V6_vdealb4w, SingleTy, {Vb, Va});
2162 return OpRef::res(
Results.top());
2165 return OpRef::fail();
2168OpRef HvxSelector::expanding(ShuffleMask SM, OpRef Va, ResultStack &
Results) {
2181 int VecLen = SM.Mask.
size();
2182 assert(2*HwLen ==
unsigned(VecLen) &&
"Expecting vector-pair type");
2184 std::pair<int,unsigned> Strip =
findStrip(SM.Mask, 1, VecLen);
2191 if (Strip.first != 0)
2192 return OpRef::fail();
2195 if (Strip.second != 1 && Strip.second != 2)
2196 return OpRef::fail();
2199 int L = Strip.second;
2202 for (
int I = 2*L;
I <
N;
I += 2*
L) {
2204 if (S.second !=
unsigned(L))
2205 return OpRef::fail();
2207 return OpRef::fail();
2210 for (
int I = L;
I <
N;
I += 2*
L) {
2212 if (S.first != -1 || S.second !=
unsigned(L))
2213 return OpRef::fail();
2216 unsigned Opc = Strip.second == 1 ? Hexagon::V6_vunpackub
2217 : Hexagon::V6_vunpackuh;
2219 return OpRef::res(
Results.top());
2222OpRef HvxSelector::perfect(ShuffleMask SM, OpRef Va, ResultStack &
Results) {
2231 int VecLen = SM.Mask.
size();
2233 unsigned LogLen =
Log2_32(VecLen);
2234 unsigned HwLog =
Log2_32(HwLen);
2237 assert(LogLen == HwLog || LogLen == HwLog + 1);
2238 bool HavePairs = LogLen == HwLog + 1;
2240 SmallVector<unsigned, 8> Perm(LogLen);
2324 bool InvertedPair =
false;
2325 if (HavePairs && SM.Mask[0] >=
int(HwLen)) {
2326 for (
int i = 0, e = SM.Mask.
size(); i != e; ++i) {
2328 MaskStorage[i] =
M >= int(HwLen) ?
M - HwLen :
M + HwLen;
2330 InvertedPair =
true;
2331 SM = ShuffleMask(MaskStorage);
2334 auto Comps = getPerfectCompletions(SM, LogLen);
2336 return OpRef::fail();
2338 auto Pick = completeToPerfect(Comps, LogLen);
2339 for (
unsigned I = 0;
I != LogLen; ++
I)
2367 using CycleType = SmallVector<unsigned, 8>;
2368 std::set<CycleType> Cycles;
2369 std::set<unsigned>
All;
2371 for (
unsigned I : Perm)
2376 auto canonicalize = [LogLen](
const CycleType &
C) -> CycleType {
2377 unsigned LogPos,
N =
C.size();
2378 for (LogPos = 0; LogPos !=
N; ++LogPos)
2379 if (
C[LogPos] == LogLen - 1)
2384 CycleType NewC(
C.begin() + LogPos,
C.end());
2385 NewC.append(
C.begin(),
C.begin() + LogPos);
2389 auto pfs = [](
const std::set<CycleType> &Cs,
unsigned Len) {
2394 const CycleType &
C = *Cs.begin();
2395 if (
C[0] != Len - 1)
2397 int D =
Len -
C.size();
2398 if (
D != 0 &&
D != 1)
2401 bool IsDeal =
true, IsShuff =
true;
2402 for (
unsigned I = 1;
I !=
Len -
D; ++
I) {
2403 if (
C[
I] != Len - 1 -
I)
2405 if (
C[
I] !=
I - (1 -
D))
2409 assert(!(IsDeal || IsShuff) || IsDeal != IsShuff);
2410 static unsigned Deals[] = {Hexagon::V6_vdealb, Hexagon::V6_vdealh};
2411 static unsigned Shufs[] = {Hexagon::V6_vshuffb, Hexagon::V6_vshuffh};
2412 return IsDeal ? Deals[
D] : (IsShuff ? Shufs[
D] : 0);
2415 while (!
All.empty()) {
2416 unsigned A = *
All.begin();
2420 for (
unsigned B = Perm[
A];
B !=
A;
B = Perm[
B]) {
2426 Cycles.insert(canonicalize(
C));
2429 MVT SingleTy = getSingleVT(MVT::i8);
2430 MVT
PairTy = getPairVT(MVT::i8);
2433 if (
unsigned(VecLen) == HwLen) {
2434 if (
unsigned SingleOpc = pfs(Cycles, LogLen)) {
2435 Results.push(SingleOpc, SingleTy, {Va});
2436 return OpRef::res(
Results.top());
2446 SmallVector<unsigned, 8> SwapElems;
2459 for (
const CycleType &
C : Cycles) {
2461 unsigned First = (
C[0] == LogLen - 1) ? 1 : 0;
2469 const SDLoc &dl(
Results.InpNode);
2470 OpRef Arg = HavePairs ? Va : concats(Va, OpRef::undef(SingleTy),
Results);
2472 Arg = concats(OpRef::hi(Arg), OpRef::lo(Arg),
Results);
2474 for (
unsigned I = 0,
E = SwapElems.
size();
I !=
E;) {
2475 bool IsInc =
I ==
E - 1 || SwapElems[
I] < SwapElems[
I + 1];
2476 unsigned S = (1u << SwapElems[
I]);
2478 while (++
I <
E - 1 && IsInc == (SwapElems[
I] < SwapElems[
I + 1]))
2479 S |= 1u << SwapElems[
I];
2482 S |= 1u << SwapElems[
I];
2487 Results.push(Hexagon::A2_tfrsi, MVT::i32, {getConst32(S, dl)});
2488 Res.Opc = IsInc ? Hexagon::V6_vshuffvdd : Hexagon::V6_vdealvdd;
2490 Res.Ops = {OpRef::hi(Arg), OpRef::lo(Arg), OpRef::res(-1)};
2492 Arg = OpRef::res(
Results.top());
2495 return HavePairs ? Arg : OpRef::lo(Arg);
2498OpRef HvxSelector::butterfly(ShuffleMask SM, OpRef Va, ResultStack &
Results) {
2510 MVT ResTy = getSingleVT(MVT::i8);
2511 PermNetwork::Controls
FC, RC;
2512 const SDLoc &dl(
Results.InpNode);
2513 int VecLen = SM.Mask.
size();
2515 for (
int M : SM.Mask) {
2516 if (M != -1 && M >= VecLen)
2517 return OpRef::fail();
2521 ForwardDeltaNetwork FN(SM.Mask);
2523 SDValue Ctl = getVectorConstant(FC, dl);
2524 Results.push(Hexagon::V6_vdelta, ResTy, {Va, OpRef(Ctl)});
2525 return OpRef::res(
Results.top());
2529 ReverseDeltaNetwork
RN(SM.Mask);
2531 SDValue Ctl = getVectorConstant(RC, dl);
2532 Results.push(Hexagon::V6_vrdelta, ResTy, {Va, OpRef(Ctl)});
2533 return OpRef::res(
Results.top());
2537 BenesNetwork BN(SM.Mask);
2538 if (BN.run(FC, RC)) {
2539 SDValue CtlF = getVectorConstant(FC, dl);
2540 SDValue CtlR = getVectorConstant(RC, dl);
2541 Results.push(Hexagon::V6_vdelta, ResTy, {Va, OpRef(CtlF)});
2542 Results.push(Hexagon::V6_vrdelta, ResTy,
2543 {OpRef::res(-1), OpRef(CtlR)});
2544 return OpRef::res(
Results.top());
2547 return OpRef::fail();
2550SDValue HvxSelector::getConst32(
unsigned Val,
const SDLoc &dl) {
2551 return DAG.getTargetConstant(Val, dl, MVT::i32);
2554SDValue HvxSelector::getSignedConst32(
int Val,
const SDLoc &dl) {
2555 return DAG.getSignedTargetConstant(Val, dl, MVT::i32);
2558SDValue HvxSelector::getVectorConstant(ArrayRef<uint8_t>
Data,
2561 for (uint8_t
C :
Data)
2562 Elems.
push_back(DAG.getConstant(
C, dl, MVT::i8));
2564 SDValue BV = DAG.getBuildVector(VecTy, dl, Elems);
2566 DAG.RemoveDeadNode(BV.
getNode());
2572 MVT ResTy =
N->getValueType(0).getSimpleVT();
2573 unsigned Idx =
N->getConstantOperandVal(1);
2579 assert(Idx == 0 || Idx == ResLen);
2581 unsigned SubReg = Idx == 0 ? Hexagon::vsub_lo : Hexagon::vsub_hi;
2584 ISel.ReplaceNode(
N, Ext.getNode());
2589 dbgs() <<
"Starting " << __func__ <<
" on node:\n";
2592 MVT ResTy =
N->getValueType(0).getSimpleVT();
2594 assert(ResTy.isVector() && ResTy.getVectorElementType() == MVT::i8);
2597 std::vector<int> Mask(SN->getMask().begin(), SN->getMask().end());
2599 for (
int &Idx : Mask)
2600 if (Idx != -1 && Idx < 0)
2603 unsigned VecLen = Mask.size();
2604 bool HavePairs = (2*
HwLen == VecLen);
2605 assert(ResTy.getSizeInBits() / 8 == VecLen);
2610 bool UseLeft =
false, UseRight =
false;
2611 for (
unsigned I = 0;
I != VecLen; ++
I) {
2614 unsigned Idx = Mask[
I];
2623 dbgs() <<
"VecLen=" << VecLen <<
" HwLen=" <<
HwLen <<
" UseLeft="
2624 << UseLeft <<
" UseRight=" << UseRight <<
" HavePairs="
2625 << HavePairs <<
'\n';
2628 if (!UseLeft && !UseRight) {
2638 OpRef Va = OpRef::undef(ResTy);
2639 OpRef Vb = OpRef::undef(ResTy);
2642 Results.push(TargetOpcode::COPY, ResTy, {Vec0});
2643 Va = OpRef::OpRef::res(
Results.top());
2646 Results.push(TargetOpcode::COPY, ResTy, {Vec1});
2647 Vb = OpRef::res(
Results.top());
2650 OpRef Res = !HavePairs ? shuffs2(ShuffleMask(Mask), Va, Vb,
Results)
2651 : shuffp2(ShuffleMask(Mask), Va, Vb,
Results);
2653 bool Done = Res.isValid();
2656 Results.push(TargetOpcode::COPY, ResTy, {Res});
2659 Done = scalarizeShuffle(Mask,
SDLoc(
N), ResTy, Vec0, Vec1,
N);
2664 dbgs() <<
"Unhandled shuffle:\n";
2673 MVT Ty =
N->getValueType(0).getSimpleVT();
2680 unsigned S = CN->getZExtValue() %
HST.getVectorLength();
2684 NewN =
DAG.getMachineNode(Hexagon::V6_valignbi, dl, Ty,
2685 {VecV, VecV, getConst32(S, dl)});
2690 NewN =
DAG.getMachineNode(Hexagon::V6_vror, dl, Ty, {VecV, RotV});
2692 ISel.ReplaceNode(
N, NewN);
2700 N->getValueType(0), {Vv, Vu, Rt});
2701 ISel.ReplaceNode(
N, NewN);
2702 DAG.RemoveDeadNode(
N);
2705void HexagonDAGToDAGISel::PreprocessHvxISelDAG() {
2706 auto getNodes = [
this]() -> std::vector<SDNode *> {
2707 std::vector<SDNode *>
T;
2708 T.reserve(CurDAG->allnodes_size());
2709 for (
SDNode &
N : CurDAG->allnodes())
2714 ppHvxShuffleOfShuffle(getNodes());
2719 return std::hash<const void *>()(V.getNode()) +
2720 std::hash<unsigned>()(V.getResNo());
2724void HexagonDAGToDAGISel::ppHvxShuffleOfShuffle(std::vector<SDNode *> &&Nodes) {
2735 unsigned HwLen = HST->getVectorLength();
2737 struct SubVectorInfo {
2738 SubVectorInfo(
SDValue S,
unsigned H) : Src(S), HalfIdx(
H) {}
2743 using MapType = std::unordered_map<SDValue, unsigned>;
2745 auto getMaskElt = [&](
unsigned Idx, ShuffleVectorSDNode *Shuff0,
2746 ShuffleVectorSDNode *Shuff1,
2747 const MapType &OpMap) ->
int {
2754 ShuffleVectorSDNode *OpShuff = Idx < HwLen ? Shuff0 : Shuff1;
2763 auto N =
static_cast<unsigned>(MaybeN);
2764 unsigned SrcBase =
N < HwLen ? OpMap.at(OpShuff->
getOperand(0))
2777 ArrayRef<int> TopMask =
This->getMask();
2780 assert(TopMask.
size() == S0->getMask().size() &&
2781 TopMask.
size() ==
S1->getMask().size());
2785 for (
unsigned I = 0;
I != HwLen; ++
I) {
2786 int MaybeM = TopMask[
I];
2789 getMaskElt(
static_cast<unsigned>(MaybeM), S0,
S1, OpMap);
2795 std::fill(FoldedMask.begin() + HwLen, FoldedMask.end(), -1);
2800 const SDLoc &dl(TopShuff);
2810 auto getSourceInfo = [](
SDValue V) -> std::optional<SubVectorInfo> {
2811 while (
V.getOpcode() == ISD::BITCAST)
2812 V =
V.getOperand(0);
2814 return std::nullopt;
2815 return SubVectorInfo(
V.getOperand(0),
2819 for (SDNode *
N : Nodes) {
2822 EVT ResTy =
N->getValueType(0);
2840 if (!V0A.has_value())
2843 if (!V0B.has_value() || V0B->Src != V0A->Src)
2846 if (!V1A.has_value() || V1A->Src != V0A->Src)
2849 if (!V1B.has_value() || V1B->Src != V0A->Src)
2854 assert(V0A->Src.getValueType().getSizeInBits() == 16 * HwLen);
2867void HexagonDAGToDAGISel::SelectHvxExtractSubvector(SDNode *
N) {
2868 HvxSelector(*
this, *CurDAG).selectExtractSubvector(
N);
2871void HexagonDAGToDAGISel::SelectHvxShuffle(SDNode *
N) {
2872 HvxSelector(*
this, *CurDAG).selectShuffle(
N);
2875void HexagonDAGToDAGISel::SelectHvxRor(SDNode *
N) {
2876 HvxSelector(*
this, *CurDAG).selectRor(
N);
2879void HexagonDAGToDAGISel::SelectHvxVAlign(SDNode *
N) {
2880 HvxSelector(*
this, *CurDAG).selectVAlign(
N);
2887 SDValue Predicate =
N->getOperand(3);
2889 SDValue Modifier =
N->getOperand(5);
2891 SDValue ImmOperand =
CurDAG->getTargetConstant(0, dl, MVT::i32);
2894 unsigned IntNo =
N->getConstantOperandVal(1);
2898 case Intrinsic::hexagon_V6_vgathermhq:
2899 case Intrinsic::hexagon_V6_vgathermhq_128B:
2900 Opcode = Hexagon::V6_vgathermhq_pseudo;
2902 case Intrinsic::hexagon_V6_vgathermwq:
2903 case Intrinsic::hexagon_V6_vgathermwq_128B:
2904 Opcode = Hexagon::V6_vgathermwq_pseudo;
2906 case Intrinsic::hexagon_V6_vgathermhwq:
2907 case Intrinsic::hexagon_V6_vgathermhwq_128B:
2908 Opcode = Hexagon::V6_vgathermhwq_pseudo;
2928 SDValue Modifier =
N->getOperand(4);
2930 SDValue ImmOperand =
CurDAG->getTargetConstant(0, dl, MVT::i32);
2933 unsigned IntNo =
N->getConstantOperandVal(1);
2937 case Intrinsic::hexagon_V6_vgathermh:
2938 case Intrinsic::hexagon_V6_vgathermh_128B:
2939 Opcode = Hexagon::V6_vgathermh_pseudo;
2941 case Intrinsic::hexagon_V6_vgathermw:
2942 case Intrinsic::hexagon_V6_vgathermw_128B:
2943 Opcode = Hexagon::V6_vgathermw_pseudo;
2945 case Intrinsic::hexagon_V6_vgathermhw:
2946 case Intrinsic::hexagon_V6_vgathermhw_128B:
2947 Opcode = Hexagon::V6_vgathermhw_pseudo;
2962 unsigned IID =
N->getConstantOperandVal(0);
2965 case Intrinsic::hexagon_V6_vaddcarry: {
2966 std::array<SDValue, 3>
Ops = {
2967 {
N->getOperand(1),
N->getOperand(2),
N->getOperand(3)}};
2969 Result =
CurDAG->getMachineNode(Hexagon::V6_vaddcarry,
SDLoc(
N), VTs,
Ops);
2972 case Intrinsic::hexagon_V6_vaddcarry_128B: {
2973 std::array<SDValue, 3>
Ops = {
2974 {
N->getOperand(1),
N->getOperand(2),
N->getOperand(3)}};
2976 Result =
CurDAG->getMachineNode(Hexagon::V6_vaddcarry,
SDLoc(
N), VTs,
Ops);
2979 case Intrinsic::hexagon_V6_vsubcarry: {
2980 std::array<SDValue, 3>
Ops = {
2981 {
N->getOperand(1),
N->getOperand(2),
N->getOperand(3)}};
2983 Result =
CurDAG->getMachineNode(Hexagon::V6_vsubcarry,
SDLoc(
N), VTs,
Ops);
2986 case Intrinsic::hexagon_V6_vsubcarry_128B: {
2987 std::array<SDValue, 3>
Ops = {
2988 {
N->getOperand(1),
N->getOperand(2),
N->getOperand(3)}};
2990 Result =
CurDAG->getMachineNode(Hexagon::V6_vsubcarry,
SDLoc(
N), VTs,
Ops);
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
ReachingDefInfo InstSet InstSet & Ignore
Function Alias Analysis Results
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static void fail(const SDLoc &DL, SelectionDAG &DAG, const Twine &Msg, SDValue Val={})
This file implements the BitVector class.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
#define LLVM_ATTRIBUTE_UNUSED
const HexagonInstrInfo * TII
static bool isIdentity(ArrayRef< int > Mask)
static std::pair< int, unsigned > findStrip(ArrayRef< int > A, int Inc, unsigned MaxLen)
static const HexagonSubtarget & getHexagonSubtarget(SelectionDAG &G)
static void splitMask(ArrayRef< int > Mask, MutableArrayRef< int > MaskL, MutableArrayRef< int > MaskR)
static void packSegmentMask(ArrayRef< int > Mask, ArrayRef< unsigned > OutSegMap, unsigned SegLen, MutableArrayRef< int > PackedMask)
static SmallVector< unsigned, 4 > getInputSegmentList(ShuffleMask SM, unsigned SegLen)
static const HexagonTargetLowering & getHexagonLowering(SelectionDAG &G)
static SmallVector< unsigned, 4 > getOutputSegmentMap(ShuffleMask SM, unsigned SegLen)
static bool isLowHalfOnly(ArrayRef< int > Mask)
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
std::pair< MCSymbol *, MachineModuleInfoImpl::StubValueTy > PairTy
static bool isUndef(const MachineInstr &MI)
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
static Type * getValueType(Value *V)
Returns the type of the given value/instruction V.
BaseType
A given derived pointer can have multiple base pointers through phi/selects.
static LLVM_ATTRIBUTE_ALWAYS_INLINE MVT::SimpleValueType getSimpleVT(const unsigned char *MatcherTable, unsigned &MatcherIndex)
getSimpleVT - Decode a value in MatcherTable, if it's a VBR encoded value, use GetVBR to decode it.
This file implements a set that has insertion order iteration characteristics.
#define DEBUG_WITH_TYPE(TYPE,...)
DEBUG_WITH_TYPE macro - This macro should be used by passes to emit debug information.
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
ArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
size_t size() const
size - Get the array size.
ArrayRef< T > take_back(size_t N=1) const
Return a copy of *this with only the last N elements.
iterator_range< const_set_bits_iterator > set_bits() const
iterator find(const_arg_type_t< KeyT > Val)
Tagged union holding either a T or a Error.
void print(raw_ostream &OS, AssemblyAnnotationWriter *AAW=nullptr, bool ShouldPreserveUseListOrder=false, bool IsForDebug=false) const
Print the function to an output stream with an optional AssemblyAnnotationWriter.
void SelectV65GatherPred(SDNode *N)
void SelectV65Gather(SDNode *N)
void SelectHVXDualOutput(SDNode *N)
unsigned getVectorNumElements() const
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
A description of a memory reference used in the backend.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
LLVM_ABI void dump() const
Dump this node, for debugging.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
LLVM_ABI void dumpr() const
Dump (recursively) this node and its use-def subgraph.
const SDValue & getOperand(unsigned Num) const
LLVM_ABI void print(raw_ostream &OS, const SelectionDAG *G=nullptr) const
iterator_range< user_iterator > users()
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
unsigned getOpcode() const
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
size_type size() const
Determine the number of elements in the SetVector.
void insert_range(Range &&R)
Vector takeVector()
Clear the SetVector and return the underlying vector.
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
bool empty() const
Determine if the SetVector is empty or not.
bool insert(const value_type &X)
Insert a new element into the SetVector.
int getMaskElt(unsigned Idx) const
static void commuteMask(MutableArrayRef< int > Mask)
Change values in a shuffle permute mask assuming the two vector operands have swapped position.
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
@ Switch
The "resume-switch" lowering, where there are separate resume and destroy functions that are shared b...
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
NodeAddr< NodeBase * > Node
LLVM_ABI iterator begin() const
This is an optimization pass for GlobalISel generic memory operations.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
SmallVectorImpl< T >::const_pointer c_str(SmallVectorImpl< T > &str)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
auto reverse(ContainerTy &&C)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
void sort(IteratorTy Start, IteratorTy End)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
FunctionAddr VTableAddr Count
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
void replace(R &&Range, const T &OldValue, const T &NewValue)
Provide wrappers to std::replace which take ranges instead of having to pass begin/end explicitly.
@ Sub
Subtraction of integers.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
ArrayRef(const T &OneElt) -> ArrayRef< T >
OutputIt copy(R &&Range, OutputIt Out)
constexpr int32_t SignExtend32(uint32_t X)
Sign-extend the number in the bottom B bits of X to a 32-bit integer.
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
int popcount(T Value) noexcept
Count the number of set bits in a value.
MaskT vshuffvdd(ArrayRef< int > Vu, ArrayRef< int > Vv, unsigned Rt)
MaskT vpack(ArrayRef< int > Vu, ArrayRef< int > Vv, unsigned Size, bool TakeOdd)
ArrayRef< int > hi(ArrayRef< int > Vuu)
auto mask(ShuffFunc S, unsigned Length, OptArgs... args) -> MaskT
MaskT vshuff(ArrayRef< int > Vu, ArrayRef< int > Vv, unsigned Size, bool TakeOdd)
MaskT vdealb4w(ArrayRef< int > Vu, ArrayRef< int > Vv)
SmallVector< int, 128 > MaskT
MaskT vdeal(ArrayRef< int > Vu, ArrayRef< int > Vv, unsigned Size, bool TakeOdd)
ArrayRef< int > lo(ArrayRef< int > Vuu)
MaskT vdealvdd(ArrayRef< int > Vu, ArrayRef< int > Vv, unsigned Rt)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
EVT getVectorElementType() const
Given a vector type, return the type of each element.
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
HvxSelector(HexagonDAGToDAGISel &HS, SelectionDAG &G)
MVT getSingleVT(MVT ElemTy) const
static SmallVector< uint32_t, 8 > completeToPerfect(ArrayRef< uint32_t > Completions, unsigned Width)
HexagonDAGToDAGISel & ISel
const HexagonTargetLowering & Lower
void selectVAlign(SDNode *N)
void selectExtractSubvector(SDNode *N)
void selectRor(SDNode *N)
void selectShuffle(SDNode *N)
static SmallVector< uint32_t, 8 > getPerfectCompletions(ShuffleMask SM, unsigned Width)
MVT getPairVT(MVT ElemTy) const
const HexagonSubtarget & HST
static std::optional< int > rotationDistance(ShuffleMask SM, unsigned WrapAt)
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
std::size_t operator()(SDValue V) const