LLVM 19.0.0git
HexagonISelDAGToDAGHVX.cpp
Go to the documentation of this file.
1//===-- HexagonISelDAGToDAGHVX.cpp ----------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "Hexagon.h"
10#include "HexagonISelDAGToDAG.h"
11#include "HexagonISelLowering.h"
13#include "llvm/ADT/BitVector.h"
14#include "llvm/ADT/SetVector.h"
17#include "llvm/IR/Intrinsics.h"
18#include "llvm/IR/IntrinsicsHexagon.h"
20#include "llvm/Support/Debug.h"
22
23#include <algorithm>
24#include <cmath>
25#include <deque>
26#include <functional>
27#include <map>
28#include <optional>
29#include <set>
30#include <unordered_map>
31#include <utility>
32#include <vector>
33
34#define DEBUG_TYPE "hexagon-isel"
35using namespace llvm;
36
37namespace {
38
39// --------------------------------------------------------------------
40// Implementation of permutation networks.
41
42// Implementation of the node routing through butterfly networks:
43// - Forward delta.
44// - Reverse delta.
45// - Benes.
46//
47//
48// Forward delta network consists of log(N) steps, where N is the number
49// of inputs. In each step, an input can stay in place, or it can get
50// routed to another position[1]. The step after that consists of two
51// networks, each half in size in terms of the number of nodes. In those
52// terms, in the given step, an input can go to either the upper or the
53// lower network in the next step.
54//
55// [1] Hexagon's vdelta/vrdelta allow an element to be routed to both
56// positions as long as there is no conflict.
57
58// Here's a delta network for 8 inputs, only the switching routes are
59// shown:
60//
61// Steps:
62// |- 1 ---------------|- 2 -----|- 3 -|
63//
64// Inp[0] *** *** *** *** Out[0]
65// \ / \ / \ /
66// \ / \ / X
67// \ / \ / / \
68// Inp[1] *** \ / *** X *** *** Out[1]
69// \ \ / / \ / \ /
70// \ \ / / X X
71// \ \ / / / \ / \
72// Inp[2] *** \ \ / / *** X *** *** Out[2]
73// \ \ X / / / \ \ /
74// \ \ / \ / / / \ X
75// \ X X / / \ / \
76// Inp[3] *** \ / \ / \ / *** *** *** Out[3]
77// \ X X X /
78// \ / \ / \ / \ /
79// X X X X
80// / \ / \ / \ / \
81// / X X X \
82// Inp[4] *** / \ / \ / \ *** *** *** Out[4]
83// / X X \ \ / \ /
84// / / \ / \ \ \ / X
85// / / X \ \ \ / / \
86// Inp[5] *** / / \ \ *** X *** *** Out[5]
87// / / \ \ \ / \ /
88// / / \ \ X X
89// / / \ \ / \ / \
90// Inp[6] *** / \ *** X *** *** Out[6]
91// / \ / \ \ /
92// / \ / \ X
93// / \ / \ / \
94// Inp[7] *** *** *** *** Out[7]
95//
96//
97// Reverse delta network is same as delta network, with the steps in
98// the opposite order.
99//
100//
101// Benes network is a forward delta network immediately followed by
102// a reverse delta network.
103
104enum class ColorKind { None, Red, Black };
105
106// Graph coloring utility used to partition nodes into two groups:
107// they will correspond to nodes routed to the upper and lower networks.
108struct Coloring {
109 using Node = int;
110 using MapType = std::map<Node, ColorKind>;
111 static constexpr Node Ignore = Node(-1);
112
113 Coloring(ArrayRef<Node> Ord) : Order(Ord) {
114 build();
115 if (!color())
116 Colors.clear();
117 }
118
119 const MapType &colors() const {
120 return Colors;
121 }
122
123 ColorKind other(ColorKind Color) {
124 if (Color == ColorKind::None)
125 return ColorKind::Red;
126 return Color == ColorKind::Red ? ColorKind::Black : ColorKind::Red;
127 }
128
129 LLVM_DUMP_METHOD void dump() const;
130
131private:
132 ArrayRef<Node> Order;
133 MapType Colors;
134 std::set<Node> Needed;
135
136 using NodeSet = std::set<Node>;
137 std::map<Node,NodeSet> Edges;
138
139 Node conj(Node Pos) {
140 Node Num = Order.size();
141 return (Pos < Num/2) ? Pos + Num/2 : Pos - Num/2;
142 }
143
144 ColorKind getColor(Node N) {
145 auto F = Colors.find(N);
146 return F != Colors.end() ? F->second : ColorKind::None;
147 }
148
149 std::pair<bool, ColorKind> getUniqueColor(const NodeSet &Nodes);
150
151 void build();
152 bool color();
153};
154} // namespace
155
156std::pair<bool, ColorKind> Coloring::getUniqueColor(const NodeSet &Nodes) {
157 auto Color = ColorKind::None;
158 for (Node N : Nodes) {
159 ColorKind ColorN = getColor(N);
160 if (ColorN == ColorKind::None)
161 continue;
162 if (Color == ColorKind::None)
163 Color = ColorN;
164 else if (Color != ColorKind::None && Color != ColorN)
165 return { false, ColorKind::None };
166 }
167 return { true, Color };
168}
169
170void Coloring::build() {
171 // Add Order[P] and Order[conj(P)] to Edges.
172 for (unsigned P = 0; P != Order.size(); ++P) {
173 Node I = Order[P];
174 if (I != Ignore) {
175 Needed.insert(I);
176 Node PC = Order[conj(P)];
177 if (PC != Ignore && PC != I)
178 Edges[I].insert(PC);
179 }
180 }
181 // Add I and conj(I) to Edges.
182 for (unsigned I = 0; I != Order.size(); ++I) {
183 if (!Needed.count(I))
184 continue;
185 Node C = conj(I);
186 // This will create an entry in the edge table, even if I is not
187 // connected to any other node. This is necessary, because it still
188 // needs to be colored.
189 NodeSet &Is = Edges[I];
190 if (Needed.count(C))
191 Is.insert(C);
192 }
193}
194
195bool Coloring::color() {
196 SetVector<Node> FirstQ;
197 auto Enqueue = [this,&FirstQ] (Node N) {
199 Q.insert(N);
200 for (unsigned I = 0; I != Q.size(); ++I) {
201 NodeSet &Ns = Edges[Q[I]];
202 Q.insert(Ns.begin(), Ns.end());
203 }
204 FirstQ.insert(Q.begin(), Q.end());
205 };
206 for (Node N : Needed)
207 Enqueue(N);
208
209 for (Node N : FirstQ) {
210 if (Colors.count(N))
211 continue;
212 NodeSet &Ns = Edges[N];
213 auto P = getUniqueColor(Ns);
214 if (!P.first)
215 return false;
216 Colors[N] = other(P.second);
217 }
218
219 // First, color nodes that don't have any dups.
220 for (auto E : Edges) {
221 Node N = E.first;
222 if (!Needed.count(conj(N)) || Colors.count(N))
223 continue;
224 auto P = getUniqueColor(E.second);
225 if (!P.first)
226 return false;
227 Colors[N] = other(P.second);
228 }
229
230 // Now, nodes that are still uncolored. Since the graph can be modified
231 // in this step, create a work queue.
232 std::vector<Node> WorkQ;
233 for (auto E : Edges) {
234 Node N = E.first;
235 if (!Colors.count(N))
236 WorkQ.push_back(N);
237 }
238
239 for (Node N : WorkQ) {
240 NodeSet &Ns = Edges[N];
241 auto P = getUniqueColor(Ns);
242 if (P.first) {
243 Colors[N] = other(P.second);
244 continue;
245 }
246
247 // Coloring failed. Split this node.
248 Node C = conj(N);
249 ColorKind ColorN = other(ColorKind::None);
250 ColorKind ColorC = other(ColorN);
251 NodeSet &Cs = Edges[C];
252 NodeSet CopyNs = Ns;
253 for (Node M : CopyNs) {
254 ColorKind ColorM = getColor(M);
255 if (ColorM == ColorC) {
256 // Connect M with C, disconnect M from N.
257 Cs.insert(M);
258 Edges[M].insert(C);
259 Ns.erase(M);
260 Edges[M].erase(N);
261 }
262 }
263 Colors[N] = ColorN;
264 Colors[C] = ColorC;
265 }
266
267 // Explicitly assign "None" to all uncolored nodes.
268 for (unsigned I = 0; I != Order.size(); ++I)
269 if (Colors.count(I) == 0)
270 Colors[I] = ColorKind::None;
271
272 return true;
273}
274
275#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
276void Coloring::dump() const {
277 dbgs() << "{ Order: {";
278 for (Node P : Order) {
279 if (P != Ignore)
280 dbgs() << ' ' << P;
281 else
282 dbgs() << " -";
283 }
284 dbgs() << " }\n";
285 dbgs() << " Needed: {";
286 for (Node N : Needed)
287 dbgs() << ' ' << N;
288 dbgs() << " }\n";
289
290 dbgs() << " Edges: {\n";
291 for (auto E : Edges) {
292 dbgs() << " " << E.first << " -> {";
293 for (auto N : E.second)
294 dbgs() << ' ' << N;
295 dbgs() << " }\n";
296 }
297 dbgs() << " }\n";
298
299 auto ColorKindToName = [](ColorKind C) {
300 switch (C) {
301 case ColorKind::None:
302 return "None";
303 case ColorKind::Red:
304 return "Red";
305 case ColorKind::Black:
306 return "Black";
307 }
308 llvm_unreachable("all ColorKinds should be handled by the switch above");
309 };
310
311 dbgs() << " Colors: {\n";
312 for (auto C : Colors)
313 dbgs() << " " << C.first << " -> " << ColorKindToName(C.second) << "\n";
314 dbgs() << " }\n}\n";
315}
316#endif
317
318namespace {
319// Base class of for reordering networks. They don't strictly need to be
320// permutations, as outputs with repeated occurrences of an input element
321// are allowed.
322struct PermNetwork {
323 using Controls = std::vector<uint8_t>;
324 using ElemType = int;
325 static constexpr ElemType Ignore = ElemType(-1);
326
327 enum : uint8_t {
328 None,
329 Pass,
330 Switch
331 };
332 enum : uint8_t {
333 Forward,
334 Reverse
335 };
336
337 PermNetwork(ArrayRef<ElemType> Ord, unsigned Mult = 1) {
338 Order.assign(Ord.data(), Ord.data()+Ord.size());
339 Log = 0;
340
341 unsigned S = Order.size();
342 while (S >>= 1)
343 ++Log;
344
345 Table.resize(Order.size());
346 for (RowType &Row : Table)
347 Row.resize(Mult*Log, None);
348 }
349
350 void getControls(Controls &V, unsigned StartAt, uint8_t Dir) const {
351 unsigned Size = Order.size();
352 V.resize(Size);
353 for (unsigned I = 0; I != Size; ++I) {
354 unsigned W = 0;
355 for (unsigned L = 0; L != Log; ++L) {
356 unsigned C = ctl(I, StartAt+L) == Switch;
357 if (Dir == Forward)
358 W |= C << (Log-1-L);
359 else
360 W |= C << L;
361 }
362 assert(isUInt<8>(W));
363 V[I] = uint8_t(W);
364 }
365 }
366
367 uint8_t ctl(ElemType Pos, unsigned Step) const {
368 return Table[Pos][Step];
369 }
370 unsigned size() const {
371 return Order.size();
372 }
373 unsigned steps() const {
374 return Log;
375 }
376
377protected:
378 unsigned Log;
379 std::vector<ElemType> Order;
380 using RowType = std::vector<uint8_t>;
381 std::vector<RowType> Table;
382};
383
384struct ForwardDeltaNetwork : public PermNetwork {
385 ForwardDeltaNetwork(ArrayRef<ElemType> Ord) : PermNetwork(Ord) {}
386
387 bool run(Controls &V) {
388 if (!route(Order.data(), Table.data(), size(), 0))
389 return false;
390 getControls(V, 0, Forward);
391 return true;
392 }
393
394private:
395 bool route(ElemType *P, RowType *T, unsigned Size, unsigned Step);
396};
397
398struct ReverseDeltaNetwork : public PermNetwork {
399 ReverseDeltaNetwork(ArrayRef<ElemType> Ord) : PermNetwork(Ord) {}
400
401 bool run(Controls &V) {
402 if (!route(Order.data(), Table.data(), size(), 0))
403 return false;
404 getControls(V, 0, Reverse);
405 return true;
406 }
407
408private:
409 bool route(ElemType *P, RowType *T, unsigned Size, unsigned Step);
410};
411
412struct BenesNetwork : public PermNetwork {
413 BenesNetwork(ArrayRef<ElemType> Ord) : PermNetwork(Ord, 2) {}
414
415 bool run(Controls &F, Controls &R) {
416 if (!route(Order.data(), Table.data(), size(), 0))
417 return false;
418
419 getControls(F, 0, Forward);
420 getControls(R, Log, Reverse);
421 return true;
422 }
423
424private:
425 bool route(ElemType *P, RowType *T, unsigned Size, unsigned Step);
426};
427} // namespace
428
429bool ForwardDeltaNetwork::route(ElemType *P, RowType *T, unsigned Size,
430 unsigned Step) {
431 bool UseUp = false, UseDown = false;
432 ElemType Num = Size;
433
434 // Cannot use coloring here, because coloring is used to determine
435 // the "big" switch, i.e. the one that changes halves, and in a forward
436 // network, a color can be simultaneously routed to both halves in the
437 // step we're working on.
438 for (ElemType J = 0; J != Num; ++J) {
439 ElemType I = P[J];
440 // I is the position in the input,
441 // J is the position in the output.
442 if (I == Ignore)
443 continue;
444 uint8_t S;
445 if (I < Num/2)
446 S = (J < Num/2) ? Pass : Switch;
447 else
448 S = (J < Num/2) ? Switch : Pass;
449
450 // U is the element in the table that needs to be updated.
451 ElemType U = (S == Pass) ? I : (I < Num/2 ? I+Num/2 : I-Num/2);
452 if (U < Num/2)
453 UseUp = true;
454 else
455 UseDown = true;
456 if (T[U][Step] != S && T[U][Step] != None)
457 return false;
458 T[U][Step] = S;
459 }
460
461 for (ElemType J = 0; J != Num; ++J)
462 if (P[J] != Ignore && P[J] >= Num/2)
463 P[J] -= Num/2;
464
465 if (Step+1 < Log) {
466 if (UseUp && !route(P, T, Size/2, Step+1))
467 return false;
468 if (UseDown && !route(P+Size/2, T+Size/2, Size/2, Step+1))
469 return false;
470 }
471 return true;
472}
473
474bool ReverseDeltaNetwork::route(ElemType *P, RowType *T, unsigned Size,
475 unsigned Step) {
476 unsigned Pets = Log-1 - Step;
477 bool UseUp = false, UseDown = false;
478 ElemType Num = Size;
479
480 // In this step half-switching occurs, so coloring can be used.
481 Coloring G({P,Size});
482 const Coloring::MapType &M = G.colors();
483 if (M.empty())
484 return false;
485
486 ColorKind ColorUp = ColorKind::None;
487 for (ElemType J = 0; J != Num; ++J) {
488 ElemType I = P[J];
489 // I is the position in the input,
490 // J is the position in the output.
491 if (I == Ignore)
492 continue;
493 ColorKind C = M.at(I);
494 if (C == ColorKind::None)
495 continue;
496 // During "Step", inputs cannot switch halves, so if the "up" color
497 // is still unknown, make sure that it is selected in such a way that
498 // "I" will stay in the same half.
499 bool InpUp = I < Num/2;
500 if (ColorUp == ColorKind::None)
501 ColorUp = InpUp ? C : G.other(C);
502 if ((C == ColorUp) != InpUp) {
503 // If I should go to a different half than where is it now, give up.
504 return false;
505 }
506
507 uint8_t S;
508 if (InpUp) {
509 S = (J < Num/2) ? Pass : Switch;
510 UseUp = true;
511 } else {
512 S = (J < Num/2) ? Switch : Pass;
513 UseDown = true;
514 }
515 T[J][Pets] = S;
516 }
517
518 // Reorder the working permutation according to the computed switch table
519 // for the last step (i.e. Pets).
520 for (ElemType J = 0, E = Size / 2; J != E; ++J) {
521 ElemType PJ = P[J]; // Current values of P[J]
522 ElemType PC = P[J+Size/2]; // and P[conj(J)]
523 ElemType QJ = PJ; // New values of P[J]
524 ElemType QC = PC; // and P[conj(J)]
525 if (T[J][Pets] == Switch)
526 QC = PJ;
527 if (T[J+Size/2][Pets] == Switch)
528 QJ = PC;
529 P[J] = QJ;
530 P[J+Size/2] = QC;
531 }
532
533 for (ElemType J = 0; J != Num; ++J)
534 if (P[J] != Ignore && P[J] >= Num/2)
535 P[J] -= Num/2;
536
537 if (Step+1 < Log) {
538 if (UseUp && !route(P, T, Size/2, Step+1))
539 return false;
540 if (UseDown && !route(P+Size/2, T+Size/2, Size/2, Step+1))
541 return false;
542 }
543 return true;
544}
545
546bool BenesNetwork::route(ElemType *P, RowType *T, unsigned Size,
547 unsigned Step) {
548 Coloring G({P,Size});
549 const Coloring::MapType &M = G.colors();
550 if (M.empty())
551 return false;
552 ElemType Num = Size;
553
554 unsigned Pets = 2*Log-1 - Step;
555 bool UseUp = false, UseDown = false;
556
557 // Both assignments, i.e. Red->Up and Red->Down are valid, but they will
558 // result in different controls. Let's pick the one where the first
559 // control will be "Pass".
560 ColorKind ColorUp = ColorKind::None;
561 for (ElemType J = 0; J != Num; ++J) {
562 ElemType I = P[J];
563 if (I == Ignore)
564 continue;
565 ColorKind C = M.at(I);
566 if (C == ColorKind::None)
567 continue;
568 if (ColorUp == ColorKind::None) {
569 ColorUp = (I < Num / 2) ? ColorKind::Red : ColorKind::Black;
570 }
571 unsigned CI = (I < Num/2) ? I+Num/2 : I-Num/2;
572 if (C == ColorUp) {
573 if (I < Num/2)
574 T[I][Step] = Pass;
575 else
576 T[CI][Step] = Switch;
577 T[J][Pets] = (J < Num/2) ? Pass : Switch;
578 UseUp = true;
579 } else { // Down
580 if (I < Num/2)
581 T[CI][Step] = Switch;
582 else
583 T[I][Step] = Pass;
584 T[J][Pets] = (J < Num/2) ? Switch : Pass;
585 UseDown = true;
586 }
587 }
588
589 // Reorder the working permutation according to the computed switch table
590 // for the last step (i.e. Pets).
591 for (ElemType J = 0; J != Num/2; ++J) {
592 ElemType PJ = P[J]; // Current values of P[J]
593 ElemType PC = P[J+Num/2]; // and P[conj(J)]
594 ElemType QJ = PJ; // New values of P[J]
595 ElemType QC = PC; // and P[conj(J)]
596 if (T[J][Pets] == Switch)
597 QC = PJ;
598 if (T[J+Num/2][Pets] == Switch)
599 QJ = PC;
600 P[J] = QJ;
601 P[J+Num/2] = QC;
602 }
603
604 for (ElemType J = 0; J != Num; ++J)
605 if (P[J] != Ignore && P[J] >= Num/2)
606 P[J] -= Num/2;
607
608 if (Step+1 < Log) {
609 if (UseUp && !route(P, T, Size/2, Step+1))
610 return false;
611 if (UseDown && !route(P+Size/2, T+Size/2, Size/2, Step+1))
612 return false;
613 }
614 return true;
615}
616
617// --------------------------------------------------------------------
618// Support for building selection results (output instructions that are
619// parts of the final selection).
620
621namespace {
622struct OpRef {
623 OpRef(SDValue V) : OpV(V) {}
624 bool isValue() const { return OpV.getNode() != nullptr; }
625 bool isValid() const { return isValue() || !(OpN & Invalid); }
626 bool isUndef() const { return OpN & Undef; }
627 static OpRef res(int N) { return OpRef(Whole | (N & Index)); }
628 static OpRef fail() { return OpRef(Invalid); }
629
630 static OpRef lo(const OpRef &R) {
631 assert(!R.isValue());
632 return OpRef(R.OpN & (Undef | Index | LoHalf));
633 }
634 static OpRef hi(const OpRef &R) {
635 assert(!R.isValue());
636 return OpRef(R.OpN & (Undef | Index | HiHalf));
637 }
638 static OpRef undef(MVT Ty) { return OpRef(Undef | Ty.SimpleTy); }
639
640 // Direct value.
641 SDValue OpV = SDValue();
642
643 // Reference to the operand of the input node:
644 // If the 31st bit is 1, it's undef, otherwise, bits 28..0 are the
645 // operand index:
646 // If bit 30 is set, it's the high half of the operand.
647 // If bit 29 is set, it's the low half of the operand.
648 unsigned OpN = 0;
649
650 enum : unsigned {
651 Invalid = 0x10000000,
652 LoHalf = 0x20000000,
653 HiHalf = 0x40000000,
654 Whole = LoHalf | HiHalf,
655 Undef = 0x80000000,
656 Index = 0x0FFFFFFF, // Mask of the index value.
657 IndexBits = 28,
658 };
659
661 void print(raw_ostream &OS, const SelectionDAG &G) const;
662
663private:
664 OpRef(unsigned N) : OpN(N) {}
665};
666
667struct NodeTemplate {
668 NodeTemplate() = default;
669 unsigned Opc = 0;
670 MVT Ty = MVT::Other;
671 std::vector<OpRef> Ops;
672
673 LLVM_DUMP_METHOD void print(raw_ostream &OS, const SelectionDAG &G) const;
674};
675
676struct ResultStack {
677 ResultStack(SDNode *Inp)
678 : InpNode(Inp), InpTy(Inp->getValueType(0).getSimpleVT()) {}
679 SDNode *InpNode;
680 MVT InpTy;
681 unsigned push(const NodeTemplate &Res) {
682 List.push_back(Res);
683 return List.size()-1;
684 }
685 unsigned push(unsigned Opc, MVT Ty, std::vector<OpRef> &&Ops) {
686 NodeTemplate Res;
687 Res.Opc = Opc;
688 Res.Ty = Ty;
689 Res.Ops = Ops;
690 return push(Res);
691 }
692 bool empty() const { return List.empty(); }
693 unsigned size() const { return List.size(); }
694 unsigned top() const { return size()-1; }
695 const NodeTemplate &operator[](unsigned I) const { return List[I]; }
696 unsigned reset(unsigned NewTop) {
697 List.resize(NewTop+1);
698 return NewTop;
699 }
700
701 using BaseType = std::vector<NodeTemplate>;
702 BaseType::iterator begin() { return List.begin(); }
703 BaseType::iterator end() { return List.end(); }
704 BaseType::const_iterator begin() const { return List.begin(); }
705 BaseType::const_iterator end() const { return List.end(); }
706
708
710 void print(raw_ostream &OS, const SelectionDAG &G) const;
711};
712} // namespace
713
714#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
715void OpRef::print(raw_ostream &OS, const SelectionDAG &G) const {
716 if (isValue()) {
717 OpV.getNode()->print(OS, &G);
718 return;
719 }
720 if (OpN & Invalid) {
721 OS << "invalid";
722 return;
723 }
724 if (OpN & Undef) {
725 OS << "undef";
726 return;
727 }
728 if ((OpN & Whole) != Whole) {
729 assert((OpN & Whole) == LoHalf || (OpN & Whole) == HiHalf);
730 if (OpN & LoHalf)
731 OS << "lo ";
732 else
733 OS << "hi ";
734 }
735 OS << '#' << SignExtend32(OpN & Index, IndexBits);
736}
737
738void NodeTemplate::print(raw_ostream &OS, const SelectionDAG &G) const {
739 const TargetInstrInfo &TII = *G.getSubtarget().getInstrInfo();
740 OS << format("%8s", EVT(Ty).getEVTString().c_str()) << " "
741 << TII.getName(Opc);
742 bool Comma = false;
743 for (const auto &R : Ops) {
744 if (Comma)
745 OS << ',';
746 Comma = true;
747 OS << ' ';
748 R.print(OS, G);
749 }
750}
751
752void ResultStack::print(raw_ostream &OS, const SelectionDAG &G) const {
753 OS << "Input node:\n";
754#ifndef NDEBUG
755 InpNode->dumpr(&G);
756#endif
757 OS << "Result templates:\n";
758 for (unsigned I = 0, E = List.size(); I != E; ++I) {
759 OS << '[' << I << "] ";
760 List[I].print(OS, G);
761 OS << '\n';
762 }
763}
764#endif
765
766namespace {
767struct ShuffleMask {
768 ShuffleMask(ArrayRef<int> M) : Mask(M) {
769 for (int M : Mask) {
770 if (M == -1)
771 continue;
772 MinSrc = (MinSrc == -1) ? M : std::min(MinSrc, M);
773 MaxSrc = (MaxSrc == -1) ? M : std::max(MaxSrc, M);
774 }
775 }
776
778 int MinSrc = -1, MaxSrc = -1;
779
780 ShuffleMask lo() const {
781 size_t H = Mask.size()/2;
782 return ShuffleMask(Mask.take_front(H));
783 }
784 ShuffleMask hi() const {
785 size_t H = Mask.size()/2;
786 return ShuffleMask(Mask.take_back(H));
787 }
788
789 void print(raw_ostream &OS) const {
790 OS << "MinSrc:" << MinSrc << ", MaxSrc:" << MaxSrc << " {";
791 for (int M : Mask)
792 OS << ' ' << M;
793 OS << " }";
794 }
795};
796
798raw_ostream &operator<<(raw_ostream &OS, const ShuffleMask &SM) {
799 SM.print(OS);
800 return OS;
801}
802} // namespace
803
804namespace shuffles {
806// Vdd = vshuffvdd(Vu, Vv, Rt)
807// Vdd = vdealvdd(Vu, Vv, Rt)
808// Vd = vpack(Vu, Vv, Size, TakeOdd)
809// Vd = vshuff(Vu, Vv, Size, TakeOdd)
810// Vd = vdeal(Vu, Vv, Size, TakeOdd)
811// Vd = vdealb4w(Vu, Vv)
812
813ArrayRef<int> lo(ArrayRef<int> Vuu) { return Vuu.take_front(Vuu.size() / 2); }
814ArrayRef<int> hi(ArrayRef<int> Vuu) { return Vuu.take_back(Vuu.size() / 2); }
815
817 int Len = Vu.size();
818 MaskT Vdd(2 * Len);
819 std::copy(Vv.begin(), Vv.end(), Vdd.begin());
820 std::copy(Vu.begin(), Vu.end(), Vdd.begin() + Len);
821
822 auto Vd0 = MutableArrayRef<int>(Vdd).take_front(Len);
823 auto Vd1 = MutableArrayRef<int>(Vdd).take_back(Len);
824
825 for (int Offset = 1; Offset < Len; Offset *= 2) {
826 if ((Rt & Offset) == 0)
827 continue;
828 for (int i = 0; i != Len; ++i) {
829 if ((i & Offset) == 0)
830 std::swap(Vd1[i], Vd0[i + Offset]);
831 }
832 }
833 return Vdd;
834}
835
837 int Len = Vu.size();
838 MaskT Vdd(2 * Len);
839 std::copy(Vv.begin(), Vv.end(), Vdd.begin());
840 std::copy(Vu.begin(), Vu.end(), Vdd.begin() + Len);
841
842 auto Vd0 = MutableArrayRef<int>(Vdd).take_front(Len);
843 auto Vd1 = MutableArrayRef<int>(Vdd).take_back(Len);
844
845 for (int Offset = Len / 2; Offset > 0; Offset /= 2) {
846 if ((Rt & Offset) == 0)
847 continue;
848 for (int i = 0; i != Len; ++i) {
849 if ((i & Offset) == 0)
850 std::swap(Vd1[i], Vd0[i + Offset]);
851 }
852 }
853 return Vdd;
854}
855
856MaskT vpack(ArrayRef<int> Vu, ArrayRef<int> Vv, unsigned Size, bool TakeOdd) {
857 int Len = Vu.size();
858 MaskT Vd(Len);
859 auto Odd = static_cast<int>(TakeOdd);
860 for (int i = 0, e = Len / (2 * Size); i != e; ++i) {
861 for (int b = 0; b != static_cast<int>(Size); ++b) {
862 // clang-format off
863 Vd[i * Size + b] = Vv[(2 * i + Odd) * Size + b];
864 Vd[i * Size + b + Len / 2] = Vu[(2 * i + Odd) * Size + b];
865 // clang-format on
866 }
867 }
868 return Vd;
869}
870
871MaskT vshuff(ArrayRef<int> Vu, ArrayRef<int> Vv, unsigned Size, bool TakeOdd) {
872 int Len = Vu.size();
873 MaskT Vd(Len);
874 auto Odd = static_cast<int>(TakeOdd);
875 for (int i = 0, e = Len / (2 * Size); i != e; ++i) {
876 for (int b = 0; b != static_cast<int>(Size); ++b) {
877 Vd[(2 * i + 0) * Size + b] = Vv[(2 * i + Odd) * Size + b];
878 Vd[(2 * i + 1) * Size + b] = Vu[(2 * i + Odd) * Size + b];
879 }
880 }
881 return Vd;
882}
883
884MaskT vdeal(ArrayRef<int> Vu, ArrayRef<int> Vv, unsigned Size, bool TakeOdd) {
885 int Len = Vu.size();
886 MaskT T = vdealvdd(Vu, Vv, Len - 2 * Size);
887 return vpack(hi(T), lo(T), Size, TakeOdd);
888}
889
891 int Len = Vu.size();
892 MaskT Vd(Len);
893 for (int i = 0, e = Len / 4; i != e; ++i) {
894 Vd[0 * (Len / 4) + i] = Vv[4 * i + 0];
895 Vd[1 * (Len / 4) + i] = Vv[4 * i + 2];
896 Vd[2 * (Len / 4) + i] = Vu[4 * i + 0];
897 Vd[3 * (Len / 4) + i] = Vu[4 * i + 2];
898 }
899 return Vd;
900}
901
902template <typename ShuffFunc, typename... OptArgs>
903auto mask(ShuffFunc S, unsigned Length, OptArgs... args) -> MaskT {
904 MaskT Vu(Length), Vv(Length);
905 std::iota(Vu.begin(), Vu.end(), Length); // High
906 std::iota(Vv.begin(), Vv.end(), 0); // Low
907 return S(Vu, Vv, args...);
908}
909
910} // namespace shuffles
911
912// --------------------------------------------------------------------
913// The HvxSelector class.
914
916 return static_cast<const HexagonTargetLowering&>(G.getTargetLoweringInfo());
917}
919 return G.getSubtarget<HexagonSubtarget>();
920}
921
922namespace llvm {
923 struct HvxSelector {
928 const unsigned HwLen;
929
931 : Lower(getHexagonLowering(G)), ISel(HS), DAG(G),
932 HST(getHexagonSubtarget(G)), HwLen(HST.getVectorLength()) {}
933
934 MVT getSingleVT(MVT ElemTy) const {
935 assert(ElemTy != MVT::i1 && "Use getBoolVT for predicates");
936 unsigned NumElems = HwLen / (ElemTy.getSizeInBits() / 8);
937 return MVT::getVectorVT(ElemTy, NumElems);
938 }
939
940 MVT getPairVT(MVT ElemTy) const {
941 assert(ElemTy != MVT::i1); // Suspicious: there are no predicate pairs.
942 unsigned NumElems = (2 * HwLen) / (ElemTy.getSizeInBits() / 8);
943 return MVT::getVectorVT(ElemTy, NumElems);
944 }
945
946 MVT getBoolVT() const {
947 // Return HwLen x i1.
948 return MVT::getVectorVT(MVT::i1, HwLen);
949 }
950
952 void selectShuffle(SDNode *N);
953 void selectRor(SDNode *N);
954 void selectVAlign(SDNode *N);
955
957 unsigned Width);
959 ArrayRef<uint32_t> Completions, unsigned Width);
960 static std::optional<int> rotationDistance(ShuffleMask SM, unsigned WrapAt);
961
962 private:
963 void select(SDNode *ISelN);
964 void materialize(const ResultStack &Results);
965
966 SDValue getConst32(int Val, const SDLoc &dl);
967 SDValue getVectorConstant(ArrayRef<uint8_t> Data, const SDLoc &dl);
968
969 enum : unsigned {
970 None,
971 PackMux,
972 };
973 OpRef concats(OpRef Va, OpRef Vb, ResultStack &Results);
974 OpRef funnels(OpRef Va, OpRef Vb, int Amount, ResultStack &Results);
975
976 OpRef packs(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results,
977 MutableArrayRef<int> NewMask, unsigned Options = None);
978 OpRef packp(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results,
979 MutableArrayRef<int> NewMask);
980 OpRef vmuxs(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,
981 ResultStack &Results);
982 OpRef vmuxp(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,
983 ResultStack &Results);
984
985 OpRef shuffs1(ShuffleMask SM, OpRef Va, ResultStack &Results);
986 OpRef shuffs2(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results);
987 OpRef shuffp1(ShuffleMask SM, OpRef Va, ResultStack &Results);
988 OpRef shuffp2(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results);
989
990 OpRef butterfly(ShuffleMask SM, OpRef Va, ResultStack &Results);
991 OpRef contracting(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results);
992 OpRef expanding(ShuffleMask SM, OpRef Va, ResultStack &Results);
993 OpRef perfect(ShuffleMask SM, OpRef Va, ResultStack &Results);
994
995 bool selectVectorConstants(SDNode *N);
996 bool scalarizeShuffle(ArrayRef<int> Mask, const SDLoc &dl, MVT ResTy,
997 SDValue Va, SDValue Vb, SDNode *N);
998 };
999} // namespace llvm
1000
1002 MutableArrayRef<int> MaskR) {
1003 unsigned VecLen = Mask.size();
1004 assert(MaskL.size() == VecLen && MaskR.size() == VecLen);
1005 for (unsigned I = 0; I != VecLen; ++I) {
1006 int M = Mask[I];
1007 if (M < 0) {
1008 MaskL[I] = MaskR[I] = -1;
1009 } else if (unsigned(M) < VecLen) {
1010 MaskL[I] = M;
1011 MaskR[I] = -1;
1012 } else {
1013 MaskL[I] = -1;
1014 MaskR[I] = M-VecLen;
1015 }
1016 }
1017}
1018
1019static std::pair<int,unsigned> findStrip(ArrayRef<int> A, int Inc,
1020 unsigned MaxLen) {
1021 assert(A.size() > 0 && A.size() >= MaxLen);
1022 int F = A[0];
1023 int E = F;
1024 for (unsigned I = 1; I != MaxLen; ++I) {
1025 if (A[I] - E != Inc)
1026 return { F, I };
1027 E = A[I];
1028 }
1029 return { F, MaxLen };
1030}
1031
1032static bool isUndef(ArrayRef<int> Mask) {
1033 for (int Idx : Mask)
1034 if (Idx != -1)
1035 return false;
1036 return true;
1037}
1038
1039static bool isIdentity(ArrayRef<int> Mask) {
1040 for (int I = 0, E = Mask.size(); I != E; ++I) {
1041 int M = Mask[I];
1042 if (M >= 0 && M != I)
1043 return false;
1044 }
1045 return true;
1046}
1047
1048static bool isLowHalfOnly(ArrayRef<int> Mask) {
1049 int L = Mask.size();
1050 assert(L % 2 == 0);
1051 // Check if the second half of the mask is all-undef.
1052 return llvm::all_of(Mask.drop_front(L / 2), [](int M) { return M < 0; });
1053}
1054
1056 unsigned SegLen) {
1057 assert(isPowerOf2_32(SegLen));
1059 if (SM.MaxSrc == -1)
1060 return SegList;
1061
1062 unsigned Shift = Log2_32(SegLen);
1063 BitVector Segs(alignTo(SM.MaxSrc + 1, SegLen) >> Shift);
1064
1065 for (int M : SM.Mask) {
1066 if (M >= 0)
1067 Segs.set(M >> Shift);
1068 }
1069
1070 for (unsigned B : Segs.set_bits())
1071 SegList.push_back(B);
1072 return SegList;
1073}
1074
1076 unsigned SegLen) {
1077 // Calculate the layout of the output segments in terms of the input
1078 // segments.
1079 // For example [1,3,1,0] means that the output consists of 4 output
1080 // segments, where the first output segment has only elements of the
1081 // input segment at index 1. The next output segment only has elements
1082 // of the input segment 3, etc.
1083 // If an output segment only has undef elements, the value will be ~0u.
1084 // If an output segment has elements from more than one input segment,
1085 // the corresponding value will be ~1u.
1086 unsigned MaskLen = SM.Mask.size();
1087 assert(MaskLen % SegLen == 0);
1088 SmallVector<unsigned, 4> Map(MaskLen / SegLen);
1089
1090 for (int S = 0, E = Map.size(); S != E; ++S) {
1091 unsigned Idx = ~0u;
1092 for (int I = 0; I != static_cast<int>(SegLen); ++I) {
1093 int M = SM.Mask[S*SegLen + I];
1094 if (M < 0)
1095 continue;
1096 unsigned G = M / SegLen; // Input segment of this element.
1097 if (Idx == ~0u) {
1098 Idx = G;
1099 } else if (Idx != G) {
1100 Idx = ~1u;
1101 break;
1102 }
1103 }
1104 Map[S] = Idx;
1105 }
1106
1107 return Map;
1108}
1109
1111 unsigned SegLen, MutableArrayRef<int> PackedMask) {
1113 for (int I = OutSegMap.size() - 1; I >= 0; --I) {
1114 unsigned S = OutSegMap[I];
1115 assert(S != ~0u && "Unexpected undef");
1116 assert(S != ~1u && "Unexpected multi");
1117 if (InvMap.size() <= S)
1118 InvMap.resize(S+1);
1119 InvMap[S] = I;
1120 }
1121
1122 unsigned Shift = Log2_32(SegLen);
1123 for (int I = 0, E = Mask.size(); I != E; ++I) {
1124 int M = Mask[I];
1125 if (M >= 0) {
1126 int OutIdx = InvMap[M >> Shift];
1127 M = (M & (SegLen-1)) + SegLen*OutIdx;
1128 }
1129 PackedMask[I] = M;
1130 }
1131}
1132
1133bool HvxSelector::selectVectorConstants(SDNode *N) {
1134 // Constant vectors are generated as loads from constant pools or as
1135 // splats of a constant value. Since they are generated during the
1136 // selection process, the main selection algorithm is not aware of them.
1137 // Select them directly here.
1139 SetVector<SDNode*> WorkQ;
1140
1141 // The DAG can change (due to CSE) during selection, so cache all the
1142 // unselected nodes first to avoid traversing a mutating DAG.
1143 WorkQ.insert(N);
1144 for (unsigned i = 0; i != WorkQ.size(); ++i) {
1145 SDNode *W = WorkQ[i];
1146 if (!W->isMachineOpcode() && W->getOpcode() == HexagonISD::ISEL)
1147 Nodes.push_back(W);
1148 for (unsigned j = 0, f = W->getNumOperands(); j != f; ++j)
1149 WorkQ.insert(W->getOperand(j).getNode());
1150 }
1151
1152 for (SDNode *L : Nodes)
1153 select(L);
1154
1155 return !Nodes.empty();
1156}
1157
1158void HvxSelector::materialize(const ResultStack &Results) {
1159 DEBUG_WITH_TYPE("isel", {
1160 dbgs() << "Materializing\n";
1161 Results.print(dbgs(), DAG);
1162 });
1163 if (Results.empty())
1164 return;
1165 const SDLoc &dl(Results.InpNode);
1166 std::vector<SDValue> Output;
1167
1168 for (unsigned I = 0, E = Results.size(); I != E; ++I) {
1169 const NodeTemplate &Node = Results[I];
1170 std::vector<SDValue> Ops;
1171 for (const OpRef &R : Node.Ops) {
1172 assert(R.isValid());
1173 if (R.isValue()) {
1174 Ops.push_back(R.OpV);
1175 continue;
1176 }
1177 if (R.OpN & OpRef::Undef) {
1178 MVT::SimpleValueType SVT = MVT::SimpleValueType(R.OpN & OpRef::Index);
1179 Ops.push_back(ISel.selectUndef(dl, MVT(SVT)));
1180 continue;
1181 }
1182 // R is an index of a result.
1183 unsigned Part = R.OpN & OpRef::Whole;
1184 int Idx = SignExtend32(R.OpN & OpRef::Index, OpRef::IndexBits);
1185 if (Idx < 0)
1186 Idx += I;
1187 assert(Idx >= 0 && unsigned(Idx) < Output.size());
1188 SDValue Op = Output[Idx];
1189 MVT OpTy = Op.getValueType().getSimpleVT();
1190 if (Part != OpRef::Whole) {
1191 assert(Part == OpRef::LoHalf || Part == OpRef::HiHalf);
1193 OpTy.getVectorNumElements()/2);
1194 unsigned Sub = (Part == OpRef::LoHalf) ? Hexagon::vsub_lo
1195 : Hexagon::vsub_hi;
1196 Op = DAG.getTargetExtractSubreg(Sub, dl, HalfTy, Op);
1197 }
1198 Ops.push_back(Op);
1199 } // for (Node : Results)
1200
1201 assert(Node.Ty != MVT::Other);
1202 SDNode *ResN = (Node.Opc == TargetOpcode::COPY)
1203 ? Ops.front().getNode()
1204 : DAG.getMachineNode(Node.Opc, dl, Node.Ty, Ops);
1205 Output.push_back(SDValue(ResN, 0));
1206 }
1207
1208 SDNode *OutN = Output.back().getNode();
1209 SDNode *InpN = Results.InpNode;
1210 DEBUG_WITH_TYPE("isel", {
1211 dbgs() << "Generated node:\n";
1212 OutN->dumpr(&DAG);
1213 });
1214
1215 ISel.ReplaceNode(InpN, OutN);
1216 selectVectorConstants(OutN);
1218}
1219
1220OpRef HvxSelector::concats(OpRef Lo, OpRef Hi, ResultStack &Results) {
1221 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1222 const SDLoc &dl(Results.InpNode);
1223 Results.push(TargetOpcode::REG_SEQUENCE, getPairVT(MVT::i8), {
1224 getConst32(Hexagon::HvxWRRegClassID, dl),
1225 Lo, getConst32(Hexagon::vsub_lo, dl),
1226 Hi, getConst32(Hexagon::vsub_hi, dl),
1227 });
1228 return OpRef::res(Results.top());
1229}
1230
1231OpRef HvxSelector::funnels(OpRef Va, OpRef Vb, int Amount,
1232 ResultStack &Results) {
1233 // Do a funnel shift towards the low end (shift right) by Amount bytes.
1234 // If Amount < 0, treat it as shift left, i.e. do a shift right by
1235 // Amount + HwLen.
1236 auto VecLen = static_cast<int>(HwLen);
1237
1238 if (Amount == 0)
1239 return Va;
1240 if (Amount == VecLen)
1241 return Vb;
1242
1243 MVT Ty = getSingleVT(MVT::i8);
1244 const SDLoc &dl(Results.InpNode);
1245
1246 if (Amount < 0)
1247 Amount += VecLen;
1248 if (Amount > VecLen) {
1249 Amount -= VecLen;
1250 std::swap(Va, Vb);
1251 }
1252
1253 if (isUInt<3>(Amount)) {
1254 SDValue A = getConst32(Amount, dl);
1255 Results.push(Hexagon::V6_valignbi, Ty, {Vb, Va, A});
1256 } else if (isUInt<3>(VecLen - Amount)) {
1257 SDValue A = getConst32(VecLen - Amount, dl);
1258 Results.push(Hexagon::V6_vlalignbi, Ty, {Vb, Va, A});
1259 } else {
1260 SDValue A = getConst32(Amount, dl);
1261 Results.push(Hexagon::A2_tfrsi, Ty, {A});
1262 Results.push(Hexagon::V6_valignb, Ty, {Vb, Va, OpRef::res(-1)});
1263 }
1264 return OpRef::res(Results.top());
1265}
1266
1267// Va, Vb are single vectors. If SM only uses two vector halves from Va/Vb,
1268// pack these halves into a single vector, and remap SM into NewMask to use
1269// the new vector instead.
1270OpRef HvxSelector::packs(ShuffleMask SM, OpRef Va, OpRef Vb,
1271 ResultStack &Results, MutableArrayRef<int> NewMask,
1272 unsigned Options) {
1273 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1274 if (!Va.isValid() || !Vb.isValid())
1275 return OpRef::fail();
1276
1277 if (Vb.isUndef()) {
1278 std::copy(SM.Mask.begin(), SM.Mask.end(), NewMask.begin());
1279 return Va;
1280 }
1281 if (Va.isUndef()) {
1282 std::copy(SM.Mask.begin(), SM.Mask.end(), NewMask.begin());
1284 return Vb;
1285 }
1286
1287 MVT Ty = getSingleVT(MVT::i8);
1288 MVT PairTy = getPairVT(MVT::i8);
1289 OpRef Inp[2] = {Va, Vb};
1290 unsigned VecLen = SM.Mask.size();
1291
1292 auto valign = [this](OpRef Lo, OpRef Hi, unsigned Amt, MVT Ty,
1293 ResultStack &Results) {
1294 if (Amt == 0)
1295 return Lo;
1296 const SDLoc &dl(Results.InpNode);
1297 if (isUInt<3>(Amt) || isUInt<3>(HwLen - Amt)) {
1298 bool IsRight = isUInt<3>(Amt); // Right align.
1299 SDValue S = getConst32(IsRight ? Amt : HwLen - Amt, dl);
1300 unsigned Opc = IsRight ? Hexagon::V6_valignbi : Hexagon::V6_vlalignbi;
1301 Results.push(Opc, Ty, {Hi, Lo, S});
1302 return OpRef::res(Results.top());
1303 }
1304 Results.push(Hexagon::A2_tfrsi, MVT::i32, {getConst32(Amt, dl)});
1305 OpRef A = OpRef::res(Results.top());
1306 Results.push(Hexagon::V6_valignb, Ty, {Hi, Lo, A});
1307 return OpRef::res(Results.top());
1308 };
1309
1310 // Segment is a vector half.
1311 unsigned SegLen = HwLen / 2;
1312
1313 // Check if we can shuffle vector halves around to get the used elements
1314 // into a single vector.
1315 shuffles::MaskT MaskH(SM.Mask);
1316 SmallVector<unsigned, 4> SegList = getInputSegmentList(SM.Mask, SegLen);
1317 unsigned SegCount = SegList.size();
1318 SmallVector<unsigned, 4> SegMap = getOutputSegmentMap(SM.Mask, SegLen);
1319
1320 if (SegList.empty())
1321 return OpRef::undef(Ty);
1322
1323 // NOTE:
1324 // In the following part of the function, where the segments are rearranged,
1325 // the shuffle mask SM can be of any length that is a multiple of a vector
1326 // (i.e. a multiple of 2*SegLen), and non-zero.
1327 // The output segment map is computed, and it may have any even number of
1328 // entries, but the rearrangement of input segments will be done based only
1329 // on the first two (non-undef) entries in the segment map.
1330 // For example, if the output map is 3, 1, 1, 3 (it can have at most two
1331 // distinct entries!), the segments 1 and 3 of Va/Vb will be packaged into
1332 // a single vector V = 3:1. The output mask will then be updated to use
1333 // seg(0,V), seg(1,V), seg(1,V), seg(0,V).
1334 //
1335 // Picking the segments based on the output map is an optimization. For
1336 // correctness it is only necessary that Seg0 and Seg1 are the two input
1337 // segments that are used in the output.
1338
1339 unsigned Seg0 = ~0u, Seg1 = ~0u;
1340 for (unsigned X : SegMap) {
1341 if (X == ~0u)
1342 continue;
1343 if (Seg0 == ~0u)
1344 Seg0 = X;
1345 else if (Seg1 != ~0u)
1346 break;
1347 if (X == ~1u || X != Seg0)
1348 Seg1 = X;
1349 }
1350
1351 if (SegCount == 1) {
1352 unsigned SrcOp = SegList[0] / 2;
1353 for (int I = 0; I != static_cast<int>(VecLen); ++I) {
1354 int M = SM.Mask[I];
1355 if (M >= 0) {
1356 M -= SrcOp * HwLen;
1357 assert(M >= 0);
1358 }
1359 NewMask[I] = M;
1360 }
1361 return Inp[SrcOp];
1362 }
1363
1364 if (SegCount == 2) {
1365 // Seg0 should not be undef here: this would imply a SegList
1366 // with <= 1 elements, which was checked earlier.
1367 assert(Seg0 != ~0u);
1368
1369 // If Seg0 or Seg1 are "multi-defined", pick them from the input
1370 // segment list instead.
1371 if (Seg0 == ~1u || Seg1 == ~1u) {
1372 if (Seg0 == Seg1) {
1373 Seg0 = SegList[0];
1374 Seg1 = SegList[1];
1375 } else if (Seg0 == ~1u) {
1376 Seg0 = SegList[0] != Seg1 ? SegList[0] : SegList[1];
1377 } else {
1378 assert(Seg1 == ~1u);
1379 Seg1 = SegList[0] != Seg0 ? SegList[0] : SegList[1];
1380 }
1381 }
1382 assert(Seg0 != ~1u && Seg1 != ~1u);
1383
1384 assert(Seg0 != Seg1 && "Expecting different segments");
1385 const SDLoc &dl(Results.InpNode);
1386 Results.push(Hexagon::A2_tfrsi, MVT::i32, {getConst32(SegLen, dl)});
1387 OpRef HL = OpRef::res(Results.top());
1388
1389 // Va = AB, Vb = CD
1390
1391 if (Seg0 / 2 == Seg1 / 2) {
1392 // Same input vector.
1393 Va = Inp[Seg0 / 2];
1394 if (Seg0 > Seg1) {
1395 // Swap halves.
1396 Results.push(Hexagon::V6_vror, Ty, {Inp[Seg0 / 2], HL});
1397 Va = OpRef::res(Results.top());
1398 }
1399 packSegmentMask(SM.Mask, {Seg0, Seg1}, SegLen, MaskH);
1400 } else if (Seg0 % 2 == Seg1 % 2) {
1401 // Picking AC, BD, CA, or DB.
1402 // vshuff(CD,AB,HL) -> BD:AC
1403 // vshuff(AB,CD,HL) -> DB:CA
1404 auto Vs = (Seg0 == 0 || Seg0 == 1) ? std::make_pair(Vb, Va) // AC or BD
1405 : std::make_pair(Va, Vb); // CA or DB
1406 Results.push(Hexagon::V6_vshuffvdd, PairTy, {Vs.first, Vs.second, HL});
1407 OpRef P = OpRef::res(Results.top());
1408 Va = (Seg0 == 0 || Seg0 == 2) ? OpRef::lo(P) : OpRef::hi(P);
1409 packSegmentMask(SM.Mask, {Seg0, Seg1}, SegLen, MaskH);
1410 } else {
1411 // Picking AD, BC, CB, or DA.
1412 if ((Seg0 == 0 && Seg1 == 3) || (Seg0 == 2 && Seg1 == 1)) {
1413 // AD or BC: this can be done using vmux.
1414 // Q = V6_pred_scalar2 SegLen
1415 // V = V6_vmux Q, (Va, Vb) or (Vb, Va)
1416 Results.push(Hexagon::V6_pred_scalar2, getBoolVT(), {HL});
1417 OpRef Qt = OpRef::res(Results.top());
1418 auto Vs = (Seg0 == 0) ? std::make_pair(Va, Vb) // AD
1419 : std::make_pair(Vb, Va); // CB
1420 Results.push(Hexagon::V6_vmux, Ty, {Qt, Vs.first, Vs.second});
1421 Va = OpRef::res(Results.top());
1422 packSegmentMask(SM.Mask, {Seg0, Seg1}, SegLen, MaskH);
1423 } else {
1424 // BC or DA: this could be done via valign by SegLen.
1425 // Do nothing here, because valign (if possible) will be generated
1426 // later on (make sure the Seg0 values are as expected).
1427 assert(Seg0 == 1 || Seg0 == 3);
1428 }
1429 }
1430 }
1431
1432 // Check if the arguments can be packed by valign(Va,Vb) or valign(Vb,Va).
1433 // FIXME: maybe remove this?
1434 ShuffleMask SMH(MaskH);
1435 assert(SMH.Mask.size() == VecLen);
1436 shuffles::MaskT MaskA(SMH.Mask);
1437
1438 if (SMH.MaxSrc - SMH.MinSrc >= static_cast<int>(HwLen)) {
1439 // valign(Lo=Va,Hi=Vb) won't work. Try swapping Va/Vb.
1440 shuffles::MaskT Swapped(SMH.Mask);
1442 ShuffleMask SW(Swapped);
1443 if (SW.MaxSrc - SW.MinSrc < static_cast<int>(HwLen)) {
1444 MaskA.assign(SW.Mask.begin(), SW.Mask.end());
1445 std::swap(Va, Vb);
1446 }
1447 }
1448 ShuffleMask SMA(MaskA);
1449 assert(SMA.Mask.size() == VecLen);
1450
1451 if (SMA.MaxSrc - SMA.MinSrc < static_cast<int>(HwLen)) {
1452 int ShiftR = SMA.MinSrc;
1453 if (ShiftR >= static_cast<int>(HwLen)) {
1454 Va = Vb;
1455 Vb = OpRef::undef(Ty);
1456 ShiftR -= HwLen;
1457 }
1458 OpRef RetVal = valign(Va, Vb, ShiftR, Ty, Results);
1459
1460 for (int I = 0; I != static_cast<int>(VecLen); ++I) {
1461 int M = SMA.Mask[I];
1462 if (M != -1)
1463 M -= SMA.MinSrc;
1464 NewMask[I] = M;
1465 }
1466 return RetVal;
1467 }
1468
1469 // By here, packing by segment (half-vector) shuffling, and vector alignment
1470 // failed. Try vmux.
1471 // Note: since this is using the original mask, Va and Vb must not have been
1472 // modified.
1473
1474 if (Options & PackMux) {
1475 // If elements picked from Va and Vb have all different (source) indexes
1476 // (relative to the start of the argument), do a mux, and update the mask.
1477 BitVector Picked(HwLen);
1479 bool CanMux = true;
1480 for (int I = 0; I != static_cast<int>(VecLen); ++I) {
1481 int M = SM.Mask[I];
1482 if (M == -1)
1483 continue;
1484 if (M >= static_cast<int>(HwLen))
1485 M -= HwLen;
1486 else
1487 MuxBytes[M] = 0xFF;
1488 if (Picked[M]) {
1489 CanMux = false;
1490 break;
1491 }
1492 NewMask[I] = M;
1493 }
1494 if (CanMux)
1495 return vmuxs(MuxBytes, Va, Vb, Results);
1496 }
1497 return OpRef::fail();
1498}
1499
1500// Va, Vb are vector pairs. If SM only uses two single vectors from Va/Vb,
1501// pack these vectors into a pair, and remap SM into NewMask to use the
1502// new pair instead.
1503OpRef HvxSelector::packp(ShuffleMask SM, OpRef Va, OpRef Vb,
1504 ResultStack &Results, MutableArrayRef<int> NewMask) {
1505 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1507 if (SegList.empty())
1508 return OpRef::undef(getPairVT(MVT::i8));
1509
1510 // If more than two halves are used, bail.
1511 // TODO: be more aggressive here?
1512 unsigned SegCount = SegList.size();
1513 if (SegCount > 2)
1514 return OpRef::fail();
1515
1516 MVT HalfTy = getSingleVT(MVT::i8);
1517
1518 OpRef Inp[2] = { Va, Vb };
1519 OpRef Out[2] = { OpRef::undef(HalfTy), OpRef::undef(HalfTy) };
1520
1521 // Really make sure we have at most 2 vectors used in the mask.
1522 assert(SegCount <= 2);
1523
1524 for (int I = 0, E = SegList.size(); I != E; ++I) {
1525 unsigned S = SegList[I];
1526 OpRef Op = Inp[S / 2];
1527 Out[I] = (S & 1) ? OpRef::hi(Op) : OpRef::lo(Op);
1528 }
1529
1530 // NOTE: Using SegList as the packing map here (not SegMap). This works,
1531 // because we're not concerned here about the order of the segments (i.e.
1532 // single vectors) in the output pair. Changing the order of vectors is
1533 // free (as opposed to changing the order of vector halves as in packs),
1534 // and so there is no extra cost added in case the order needs to be
1535 // changed later.
1536 packSegmentMask(SM.Mask, SegList, HwLen, NewMask);
1537 return concats(Out[0], Out[1], Results);
1538}
1539
1540OpRef HvxSelector::vmuxs(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,
1541 ResultStack &Results) {
1542 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1543 MVT ByteTy = getSingleVT(MVT::i8);
1544 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
1545 const SDLoc &dl(Results.InpNode);
1546 SDValue B = getVectorConstant(Bytes, dl);
1547 Results.push(Hexagon::V6_vd0, ByteTy, {});
1548 Results.push(Hexagon::V6_veqb, BoolTy, {OpRef(B), OpRef::res(-1)});
1549 Results.push(Hexagon::V6_vmux, ByteTy, {OpRef::res(-1), Vb, Va});
1550 return OpRef::res(Results.top());
1551}
1552
1553OpRef HvxSelector::vmuxp(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,
1554 ResultStack &Results) {
1555 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1556 size_t S = Bytes.size() / 2;
1557 OpRef L = vmuxs(Bytes.take_front(S), OpRef::lo(Va), OpRef::lo(Vb), Results);
1558 OpRef H = vmuxs(Bytes.drop_front(S), OpRef::hi(Va), OpRef::hi(Vb), Results);
1559 return concats(L, H, Results);
1560}
1561
1562OpRef HvxSelector::shuffs1(ShuffleMask SM, OpRef Va, ResultStack &Results) {
1563 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1564 unsigned VecLen = SM.Mask.size();
1565 assert(HwLen == VecLen);
1566 (void)VecLen;
1567 assert(all_of(SM.Mask, [this](int M) { return M == -1 || M < int(HwLen); }));
1568
1569 if (isIdentity(SM.Mask))
1570 return Va;
1571 if (isUndef(SM.Mask))
1572 return OpRef::undef(getSingleVT(MVT::i8));
1573
1574 // First, check for rotations.
1575 if (auto Dist = rotationDistance(SM, VecLen)) {
1576 OpRef Rotate = funnels(Va, Va, *Dist, Results);
1577 if (Rotate.isValid())
1578 return Rotate;
1579 }
1580 unsigned HalfLen = HwLen / 2;
1581 assert(isPowerOf2_32(HalfLen));
1582
1583 // Handle special case where the output is the same half of the input
1584 // repeated twice, i.e. if Va = AB, then handle the output of AA or BB.
1585 std::pair<int, unsigned> Strip1 = findStrip(SM.Mask, 1, HalfLen);
1586 if ((Strip1.first & ~HalfLen) == 0 && Strip1.second == HalfLen) {
1587 std::pair<int, unsigned> Strip2 =
1588 findStrip(SM.Mask.drop_front(HalfLen), 1, HalfLen);
1589 if (Strip1 == Strip2) {
1590 const SDLoc &dl(Results.InpNode);
1591 Results.push(Hexagon::A2_tfrsi, MVT::i32, {getConst32(HalfLen, dl)});
1592 Results.push(Hexagon::V6_vshuffvdd, getPairVT(MVT::i8),
1593 {Va, Va, OpRef::res(Results.top())});
1594 OpRef S = OpRef::res(Results.top());
1595 return (Strip1.first == 0) ? OpRef::lo(S) : OpRef::hi(S);
1596 }
1597 }
1598
1599 OpRef P = perfect(SM, Va, Results);
1600 if (P.isValid())
1601 return P;
1602 return butterfly(SM, Va, Results);
1603}
1604
1605OpRef HvxSelector::shuffs2(ShuffleMask SM, OpRef Va, OpRef Vb,
1606 ResultStack &Results) {
1607 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1608 if (isUndef(SM.Mask))
1609 return OpRef::undef(getSingleVT(MVT::i8));
1610
1611 OpRef C = contracting(SM, Va, Vb, Results);
1612 if (C.isValid())
1613 return C;
1614
1615 int VecLen = SM.Mask.size();
1616 shuffles::MaskT PackedMask(VecLen);
1617 OpRef P = packs(SM, Va, Vb, Results, PackedMask);
1618 if (P.isValid())
1619 return shuffs1(ShuffleMask(PackedMask), P, Results);
1620
1621 // TODO: Before we split the mask, try perfect shuffle on concatenated
1622 // operands.
1623
1624 shuffles::MaskT MaskL(VecLen), MaskR(VecLen);
1625 splitMask(SM.Mask, MaskL, MaskR);
1626
1627 OpRef L = shuffs1(ShuffleMask(MaskL), Va, Results);
1628 OpRef R = shuffs1(ShuffleMask(MaskR), Vb, Results);
1629 if (!L.isValid() || !R.isValid())
1630 return OpRef::fail();
1631
1632 SmallVector<uint8_t, 128> Bytes(VecLen);
1633 for (int I = 0; I != VecLen; ++I) {
1634 if (MaskL[I] != -1)
1635 Bytes[I] = 0xFF;
1636 }
1637 return vmuxs(Bytes, L, R, Results);
1638}
1639
1640OpRef HvxSelector::shuffp1(ShuffleMask SM, OpRef Va, ResultStack &Results) {
1641 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1642 int VecLen = SM.Mask.size();
1643
1644 if (isIdentity(SM.Mask))
1645 return Va;
1646 if (isUndef(SM.Mask))
1647 return OpRef::undef(getPairVT(MVT::i8));
1648
1649 shuffles::MaskT PackedMask(VecLen);
1650 OpRef P = packs(SM, OpRef::lo(Va), OpRef::hi(Va), Results, PackedMask);
1651 if (P.isValid()) {
1652 ShuffleMask PM(PackedMask);
1653 OpRef E = expanding(PM, P, Results);
1654 if (E.isValid())
1655 return E;
1656
1657 OpRef L = shuffs1(PM.lo(), P, Results);
1658 OpRef H = shuffs1(PM.hi(), P, Results);
1659 if (L.isValid() && H.isValid())
1660 return concats(L, H, Results);
1661 }
1662
1663 if (!isLowHalfOnly(SM.Mask)) {
1664 // Doing a perfect shuffle on a low-half mask (i.e. where the upper half
1665 // is all-undef) may produce a perfect shuffle that generates legitimate
1666 // upper half. This isn't wrong, but if the perfect shuffle was possible,
1667 // then there is a good chance that a shorter (contracting) code may be
1668 // used as well (e.g. V6_vshuffeb, etc).
1669 OpRef R = perfect(SM, Va, Results);
1670 if (R.isValid())
1671 return R;
1672 // TODO commute the mask and try the opposite order of the halves.
1673 }
1674
1675 OpRef L = shuffs2(SM.lo(), OpRef::lo(Va), OpRef::hi(Va), Results);
1676 OpRef H = shuffs2(SM.hi(), OpRef::lo(Va), OpRef::hi(Va), Results);
1677 if (L.isValid() && H.isValid())
1678 return concats(L, H, Results);
1679
1680 return OpRef::fail();
1681}
1682
1683OpRef HvxSelector::shuffp2(ShuffleMask SM, OpRef Va, OpRef Vb,
1684 ResultStack &Results) {
1685 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1686 if (isUndef(SM.Mask))
1687 return OpRef::undef(getPairVT(MVT::i8));
1688
1689 int VecLen = SM.Mask.size();
1690 SmallVector<int,256> PackedMask(VecLen);
1691 OpRef P = packp(SM, Va, Vb, Results, PackedMask);
1692 if (P.isValid())
1693 return shuffp1(ShuffleMask(PackedMask), P, Results);
1694
1695 SmallVector<int,256> MaskL(VecLen), MaskR(VecLen);
1696 splitMask(SM.Mask, MaskL, MaskR);
1697
1698 OpRef L = shuffp1(ShuffleMask(MaskL), Va, Results);
1699 OpRef R = shuffp1(ShuffleMask(MaskR), Vb, Results);
1700 if (!L.isValid() || !R.isValid())
1701 return OpRef::fail();
1702
1703 // Mux the results.
1704 SmallVector<uint8_t,256> Bytes(VecLen);
1705 for (int I = 0; I != VecLen; ++I) {
1706 if (MaskL[I] != -1)
1707 Bytes[I] = 0xFF;
1708 }
1709 return vmuxp(Bytes, L, R, Results);
1710}
1711
1712namespace {
1713 struct Deleter : public SelectionDAG::DAGNodeDeletedListener {
1714 template <typename T>
1715 Deleter(SelectionDAG &D, T &C)
1716 : SelectionDAG::DAGNodeDeletedListener(D, [&C] (SDNode *N, SDNode *E) {
1717 C.erase(N);
1718 }) {}
1719 };
1720
1721 template <typename T>
1722 struct NullifyingVector : public T {
1724 NullifyingVector(T &&V) : T(V) {
1725 for (unsigned i = 0, e = T::size(); i != e; ++i) {
1726 SDNode *&N = T::operator[](i);
1727 Refs[N] = &N;
1728 }
1729 }
1730 void erase(SDNode *N) {
1731 auto F = Refs.find(N);
1732 if (F != Refs.end())
1733 *F->second = nullptr;
1734 }
1735 };
1736}
1737
1738void HvxSelector::select(SDNode *ISelN) {
1739 // What's important here is to select the right set of nodes. The main
1740 // selection algorithm loops over nodes in a topological order, i.e. users
1741 // are visited before their operands.
1742 //
1743 // It is an error to have an unselected node with a selected operand, and
1744 // there is an assertion in the main selector code to enforce that.
1745 //
1746 // Such a situation could occur if we selected a node, which is both a
1747 // subnode of ISelN, and a subnode of an unrelated (and yet unselected)
1748 // node in the DAG.
1749 assert(ISelN->getOpcode() == HexagonISD::ISEL);
1750 SDNode *N0 = ISelN->getOperand(0).getNode();
1751
1752 // There could have been nodes created (i.e. inserted into the DAG)
1753 // that are now dead. Remove them, in case they use any of the nodes
1754 // to select (and make them look shared).
1756
1757 SetVector<SDNode *> SubNodes;
1758
1759 if (!N0->isMachineOpcode()) {
1760 // Don't want to select N0 if it's shared with another node, except if
1761 // it's shared with other ISELs.
1762 auto IsISelN = [](SDNode *T) { return T->getOpcode() == HexagonISD::ISEL; };
1763 if (llvm::all_of(N0->uses(), IsISelN))
1764 SubNodes.insert(N0);
1765 }
1766 if (SubNodes.empty()) {
1767 ISel.ReplaceNode(ISelN, N0);
1768 return;
1769 }
1770
1771 // Need to manually select the nodes that are dominated by the ISEL. Other
1772 // nodes are reachable from the rest of the DAG, and so will be selected
1773 // by the DAG selection routine.
1774 SetVector<SDNode*> Dom, NonDom;
1775 Dom.insert(N0);
1776
1777 auto IsDomRec = [&Dom, &NonDom] (SDNode *T, auto Rec) -> bool {
1778 if (Dom.count(T))
1779 return true;
1780 if (T->use_empty() || NonDom.count(T))
1781 return false;
1782 for (SDNode *U : T->uses()) {
1783 // If T is reachable from a known non-dominated node, then T itself
1784 // is non-dominated.
1785 if (!Rec(U, Rec)) {
1786 NonDom.insert(T);
1787 return false;
1788 }
1789 }
1790 Dom.insert(T);
1791 return true;
1792 };
1793
1794 auto IsDom = [&IsDomRec] (SDNode *T) { return IsDomRec(T, IsDomRec); };
1795
1796 // Add the rest of nodes dominated by ISEL to SubNodes.
1797 for (unsigned I = 0; I != SubNodes.size(); ++I) {
1798 for (SDValue Op : SubNodes[I]->ops()) {
1799 SDNode *O = Op.getNode();
1800 if (IsDom(O))
1801 SubNodes.insert(O);
1802 }
1803 }
1804
1805 // Do a topological sort of nodes from Dom.
1806 SetVector<SDNode*> TmpQ;
1807
1808 std::map<SDNode *, unsigned> OpCount;
1809 for (SDNode *T : Dom) {
1810 unsigned NumDomOps = llvm::count_if(T->ops(), [&Dom](const SDUse &U) {
1811 return Dom.count(U.getNode());
1812 });
1813
1814 OpCount.insert({T, NumDomOps});
1815 if (NumDomOps == 0)
1816 TmpQ.insert(T);
1817 }
1818
1819 for (unsigned I = 0; I != TmpQ.size(); ++I) {
1820 SDNode *S = TmpQ[I];
1821 for (SDNode *U : S->uses()) {
1822 if (U == ISelN)
1823 continue;
1824 auto F = OpCount.find(U);
1825 assert(F != OpCount.end());
1826 if (F->second > 0 && !--F->second)
1827 TmpQ.insert(F->first);
1828 }
1829 }
1830
1831 // Remove the marker.
1832 ISel.ReplaceNode(ISelN, N0);
1833
1834 assert(SubNodes.size() == TmpQ.size());
1835 NullifyingVector<decltype(TmpQ)::vector_type> Queue(TmpQ.takeVector());
1836
1837 Deleter DUQ(DAG, Queue);
1838 for (SDNode *S : reverse(Queue)) {
1839 if (S == nullptr)
1840 continue;
1841 DEBUG_WITH_TYPE("isel", {dbgs() << "HVX selecting: "; S->dump(&DAG);});
1842 ISel.Select(S);
1843 }
1844}
1845
1846bool HvxSelector::scalarizeShuffle(ArrayRef<int> Mask, const SDLoc &dl,
1847 MVT ResTy, SDValue Va, SDValue Vb,
1848 SDNode *N) {
1849 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1850 MVT ElemTy = ResTy.getVectorElementType();
1851 assert(ElemTy == MVT::i8);
1852 unsigned VecLen = Mask.size();
1853 bool HavePairs = (2*HwLen == VecLen);
1854 MVT SingleTy = getSingleVT(MVT::i8);
1855
1856 // The prior attempts to handle this shuffle may have left a bunch of
1857 // dead nodes in the DAG (such as constants). These nodes will be added
1858 // at the end of DAG's node list, which at that point had already been
1859 // sorted topologically. In the main selection loop, the node list is
1860 // traversed backwards from the root node, which means that any new
1861 // nodes (from the end of the list) will not be visited.
1862 // Scalarization will replace the shuffle node with the scalarized
1863 // expression, and if that expression reused any if the leftoever (dead)
1864 // nodes, these nodes would not be selected (since the "local" selection
1865 // only visits nodes that are not in AllNodes).
1866 // To avoid this issue, remove all dead nodes from the DAG now.
1867// DAG.RemoveDeadNodes();
1868
1870 LLVMContext &Ctx = *DAG.getContext();
1871 MVT LegalTy = Lower.getTypeToTransformTo(Ctx, ElemTy).getSimpleVT();
1872 for (int I : Mask) {
1873 if (I < 0) {
1874 Ops.push_back(ISel.selectUndef(dl, LegalTy));
1875 continue;
1876 }
1877 SDValue Vec;
1878 unsigned M = I;
1879 if (M < VecLen) {
1880 Vec = Va;
1881 } else {
1882 Vec = Vb;
1883 M -= VecLen;
1884 }
1885 if (HavePairs) {
1886 if (M < HwLen) {
1887 Vec = DAG.getTargetExtractSubreg(Hexagon::vsub_lo, dl, SingleTy, Vec);
1888 } else {
1889 Vec = DAG.getTargetExtractSubreg(Hexagon::vsub_hi, dl, SingleTy, Vec);
1890 M -= HwLen;
1891 }
1892 }
1893 SDValue Idx = DAG.getConstant(M, dl, MVT::i32);
1894 SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, LegalTy, {Vec, Idx});
1896 assert(L.getNode());
1897 Ops.push_back(L);
1898 }
1899
1900 SDValue LV;
1901 if (2*HwLen == VecLen) {
1902 SDValue B0 = DAG.getBuildVector(SingleTy, dl, {Ops.data(), HwLen});
1903 SDValue L0 = Lower.LowerOperation(B0, DAG);
1904 SDValue B1 = DAG.getBuildVector(SingleTy, dl, {Ops.data()+HwLen, HwLen});
1905 SDValue L1 = Lower.LowerOperation(B1, DAG);
1906 // XXX CONCAT_VECTORS is legal for HVX vectors. Legalizing (lowering)
1907 // functions may expect to be called only for illegal operations, so
1908 // make sure that they are not called for legal ones. Develop a better
1909 // mechanism for dealing with this.
1910 LV = DAG.getNode(ISD::CONCAT_VECTORS, dl, ResTy, {L0, L1});
1911 } else {
1912 SDValue BV = DAG.getBuildVector(ResTy, dl, Ops);
1913 LV = Lower.LowerOperation(BV, DAG);
1914 }
1915
1916 assert(!N->use_empty());
1917 SDValue IS = DAG.getNode(HexagonISD::ISEL, dl, ResTy, LV);
1918 ISel.ReplaceNode(N, IS.getNode());
1919 select(IS.getNode());
1921 return true;
1922}
1923
1925 unsigned Width) {
1926 auto possibilities = [](ArrayRef<uint8_t> Bs, unsigned Width) -> uint32_t {
1927 unsigned Impossible = ~(1u << Width) + 1;
1928 for (unsigned I = 0, E = Bs.size(); I != E; ++I) {
1929 uint8_t B = Bs[I];
1930 if (B == 0xff)
1931 continue;
1932 if (~Impossible == 0)
1933 break;
1934 for (unsigned Log = 0; Log != Width; ++Log) {
1935 if (Impossible & (1u << Log))
1936 continue;
1937 unsigned Expected = (I >> Log) % 2;
1938 if (B != Expected)
1939 Impossible |= (1u << Log);
1940 }
1941 }
1942 return ~Impossible;
1943 };
1944
1945 SmallVector<uint32_t, 8> Worklist(Width);
1946
1947 for (unsigned BitIdx = 0; BitIdx != Width; ++BitIdx) {
1948 SmallVector<uint8_t> BitValues(SM.Mask.size());
1949 for (int i = 0, e = SM.Mask.size(); i != e; ++i) {
1950 int M = SM.Mask[i];
1951 if (M < 0)
1952 BitValues[i] = 0xff;
1953 else
1954 BitValues[i] = (M & (1u << BitIdx)) != 0;
1955 }
1956 Worklist[BitIdx] = possibilities(BitValues, Width);
1957 }
1958
1959 // If there is a word P in Worklist that matches multiple possibilities,
1960 // then if any other word Q matches any of the possibilities matched by P,
1961 // then Q matches all the possibilities matched by P. In fact, P == Q.
1962 // In other words, for each words P, Q, the sets of possibilities matched
1963 // by P and Q are either equal or disjoint (no partial overlap).
1964 //
1965 // Illustration: For 4-bit values there are 4 complete sequences:
1966 // a: 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1
1967 // b: 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1
1968 // c: 0 0 0 0 1 1 1 1 0 0 0 0 1 1 1 1
1969 // d: 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1
1970 //
1971 // Words containing unknown bits that match two of the complete
1972 // sequences:
1973 // ab: 0 u u 1 0 u u 1 0 u u 1 0 u u 1
1974 // ac: 0 u 0 u u 1 u 1 0 u 0 u u 1 u 1
1975 // ad: 0 u 0 u 0 u 0 u u 1 u 1 u 1 u 1
1976 // bc: 0 0 u u u u 1 1 0 0 u u u u 1 1
1977 // bd: 0 0 u u 0 0 u u u u 1 1 u u 1 1
1978 // cd: 0 0 0 0 u u u u u u u u 1 1 1 1
1979 //
1980 // Proof of the claim above:
1981 // Let P be a word that matches s0 and s1. For that to happen, all known
1982 // bits in P must match s0 and s1 exactly.
1983 // Assume there is Q that matches s1. Note that since P and Q came from
1984 // the same shuffle mask, the positions of unknown bits in P and Q match
1985 // exactly, which makes the indices of known bits be exactly the same
1986 // between P and Q. Since P matches s0 and s1, the known bits of P much
1987 // match both s0 and s1. Also, since Q matches s1, the known bits in Q
1988 // are exactly the same as in s1, which means that they are exactly the
1989 // same as in P. This implies that P == Q.
1990
1991 // There can be a situation where there are more entries with the same
1992 // bits set than there are set bits (e.g. value 9 occuring more than 2
1993 // times). In such cases it will be impossible to complete this to a
1994 // perfect shuffle.
1995 SmallVector<uint32_t, 8> Sorted(Worklist);
1996 llvm::sort(Sorted.begin(), Sorted.end());
1997
1998 for (unsigned I = 0, E = Sorted.size(); I != E;) {
1999 unsigned P = Sorted[I], Count = 1;
2000 while (++I != E && P == Sorted[I])
2001 ++Count;
2002 if ((unsigned)llvm::popcount(P) < Count) {
2003 // Reset all occurences of P, if there are more occurrences of P
2004 // than there are bits in P.
2005 for (unsigned &Q : Worklist) {
2006 if (Q == P)
2007 Q = 0;
2008 }
2009 }
2010 }
2011
2012 return Worklist;
2013}
2014
2017 // Pick a completion if there are multiple possibilities. For now just
2018 // select any valid completion.
2019 SmallVector<uint32_t, 8> Comps(Completions);
2020
2021 for (unsigned I = 0; I != Width; ++I) {
2022 uint32_t P = Comps[I];
2023 assert(P != 0);
2024 if (isPowerOf2_32(P))
2025 continue;
2026 // T = least significant bit of P.
2027 uint32_t T = P ^ ((P - 1) & P);
2028 // Clear T in all remaining words matching P.
2029 for (unsigned J = I + 1; J != Width; ++J) {
2030 if (Comps[J] == P)
2031 Comps[J] ^= T;
2032 }
2033 Comps[I] = T;
2034 }
2035
2036#ifndef NDEBUG
2037 // Check that we have generated a valid completion.
2038 uint32_t OrAll = 0;
2039 for (uint32_t C : Comps) {
2041 OrAll |= C;
2042 }
2043 assert(OrAll == (1u << Width) -1);
2044#endif
2045
2046 return Comps;
2047}
2048
2049std::optional<int> HvxSelector::rotationDistance(ShuffleMask SM,
2050 unsigned WrapAt) {
2051 std::optional<int> Dist;
2052 for (int I = 0, E = SM.Mask.size(); I != E; ++I) {
2053 int M = SM.Mask[I];
2054 if (M < 0)
2055 continue;
2056 if (Dist) {
2057 if ((I + *Dist) % static_cast<int>(WrapAt) != M)
2058 return std::nullopt;
2059 } else {
2060 // Integer a%b operator assumes rounding towards zero by /, so it
2061 // "misbehaves" when a crosses 0 (the remainder also changes sign).
2062 // Add WrapAt in an attempt to keep I+Dist non-negative.
2063 Dist = M - I;
2064 if (Dist < 0)
2065 Dist = *Dist + WrapAt;
2066 }
2067 }
2068 return Dist;
2069}
2070
2071OpRef HvxSelector::contracting(ShuffleMask SM, OpRef Va, OpRef Vb,
2072 ResultStack &Results) {
2073 DEBUG_WITH_TYPE("isel", { dbgs() << __func__ << '\n'; });
2074 if (!Va.isValid() || !Vb.isValid())
2075 return OpRef::fail();
2076
2077 // Contracting shuffles, i.e. instructions that always discard some bytes
2078 // from the operand vectors.
2079 //
2080 // Funnel shifts
2081 // V6_vshuff{e,o}b
2082 // V6_vshuf{e,o}h
2083 // V6_vdealb4w
2084 // V6_vpack{e,o}{b,h}
2085
2086 int VecLen = SM.Mask.size();
2087
2088 // First, check for funnel shifts.
2089 if (auto Dist = rotationDistance(SM, 2 * VecLen)) {
2090 OpRef Funnel = funnels(Va, Vb, *Dist, Results);
2091 if (Funnel.isValid())
2092 return Funnel;
2093 }
2094
2095 MVT SingleTy = getSingleVT(MVT::i8);
2096 MVT PairTy = getPairVT(MVT::i8);
2097
2098 auto same = [](ArrayRef<int> Mask1, ArrayRef<int> Mask2) -> bool {
2099 return Mask1 == Mask2;
2100 };
2101
2102 using PackConfig = std::pair<unsigned, bool>;
2103 PackConfig Packs[] = {
2104 {1, false}, // byte, even
2105 {1, true}, // byte, odd
2106 {2, false}, // half, even
2107 {2, true}, // half, odd
2108 };
2109
2110 { // Check vpack
2111 unsigned Opcodes[] = {
2112 Hexagon::V6_vpackeb,
2113 Hexagon::V6_vpackob,
2114 Hexagon::V6_vpackeh,
2115 Hexagon::V6_vpackoh,
2116 };
2117 for (int i = 0, e = std::size(Opcodes); i != e; ++i) {
2118 auto [Size, Odd] = Packs[i];
2119 if (same(SM.Mask, shuffles::mask(shuffles::vpack, HwLen, Size, Odd))) {
2120 Results.push(Opcodes[i], SingleTy, {Vb, Va});
2121 return OpRef::res(Results.top());
2122 }
2123 }
2124 }
2125
2126 { // Check vshuff
2127 unsigned Opcodes[] = {
2128 Hexagon::V6_vshuffeb,
2129 Hexagon::V6_vshuffob,
2130 Hexagon::V6_vshufeh,
2131 Hexagon::V6_vshufoh,
2132 };
2133 for (int i = 0, e = std::size(Opcodes); i != e; ++i) {
2134 auto [Size, Odd] = Packs[i];
2135 if (same(SM.Mask, shuffles::mask(shuffles::vshuff, HwLen, Size, Odd))) {
2136 Results.push(Opcodes[i], SingleTy, {Vb, Va});
2137 return OpRef::res(Results.top());
2138 }
2139 }
2140 }
2141
2142 { // Check vdeal
2143 // There is no "V6_vdealeb", etc, but the supposed behavior of vdealeb
2144 // is equivalent to "(V6_vpackeb (V6_vdealvdd Vu, Vv, -2))". Other such
2145 // variants of "deal" can be done similarly.
2146 unsigned Opcodes[] = {
2147 Hexagon::V6_vpackeb,
2148 Hexagon::V6_vpackob,
2149 Hexagon::V6_vpackeh,
2150 Hexagon::V6_vpackoh,
2151 };
2152 const SDLoc &dl(Results.InpNode);
2153
2154 for (int i = 0, e = std::size(Opcodes); i != e; ++i) {
2155 auto [Size, Odd] = Packs[i];
2156 if (same(SM.Mask, shuffles::mask(shuffles::vdeal, HwLen, Size, Odd))) {
2157 Results.push(Hexagon::A2_tfrsi, MVT::i32, {getConst32(-2 * Size, dl)});
2158 Results.push(Hexagon::V6_vdealvdd, PairTy, {Vb, Va, OpRef::res(-1)});
2159 auto vdeal = OpRef::res(Results.top());
2160 Results.push(Opcodes[i], SingleTy,
2161 {OpRef::hi(vdeal), OpRef::lo(vdeal)});
2162 return OpRef::res(Results.top());
2163 }
2164 }
2165 }
2166
2167 if (same(SM.Mask, shuffles::mask(shuffles::vdealb4w, HwLen))) {
2168 Results.push(Hexagon::V6_vdealb4w, SingleTy, {Vb, Va});
2169 return OpRef::res(Results.top());
2170 }
2171
2172 return OpRef::fail();
2173}
2174
2175OpRef HvxSelector::expanding(ShuffleMask SM, OpRef Va, ResultStack &Results) {
2176 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
2177 // Expanding shuffles (using all elements and inserting into larger vector):
2178 //
2179 // V6_vunpacku{b,h} [*]
2180 //
2181 // [*] Only if the upper elements (filled with 0s) are "don't care" in Mask.
2182 //
2183 // Note: V6_vunpacko{b,h} are or-ing the high byte/half in the result, so
2184 // they are not shuffles.
2185 //
2186 // The argument is a single vector.
2187
2188 int VecLen = SM.Mask.size();
2189 assert(2*HwLen == unsigned(VecLen) && "Expecting vector-pair type");
2190
2191 std::pair<int,unsigned> Strip = findStrip(SM.Mask, 1, VecLen);
2192
2193 // The patterns for the unpacks, in terms of the starting offsets of the
2194 // consecutive strips (L = length of the strip, N = VecLen):
2195 //
2196 // vunpacku: 0, -1, L, -1, 2L, -1 ...
2197
2198 if (Strip.first != 0)
2199 return OpRef::fail();
2200
2201 // The vunpackus only handle byte and half-word.
2202 if (Strip.second != 1 && Strip.second != 2)
2203 return OpRef::fail();
2204
2205 int N = VecLen;
2206 int L = Strip.second;
2207
2208 // First, check the non-ignored strips.
2209 for (int I = 2*L; I < N; I += 2*L) {
2210 auto S = findStrip(SM.Mask.drop_front(I), 1, N-I);
2211 if (S.second != unsigned(L))
2212 return OpRef::fail();
2213 if (2*S.first != I)
2214 return OpRef::fail();
2215 }
2216 // Check the -1s.
2217 for (int I = L; I < N; I += 2*L) {
2218 auto S = findStrip(SM.Mask.drop_front(I), 0, N-I);
2219 if (S.first != -1 || S.second != unsigned(L))
2220 return OpRef::fail();
2221 }
2222
2223 unsigned Opc = Strip.second == 1 ? Hexagon::V6_vunpackub
2224 : Hexagon::V6_vunpackuh;
2225 Results.push(Opc, getPairVT(MVT::i8), {Va});
2226 return OpRef::res(Results.top());
2227}
2228
2229OpRef HvxSelector::perfect(ShuffleMask SM, OpRef Va, ResultStack &Results) {
2230 DEBUG_WITH_TYPE("isel", { dbgs() << __func__ << '\n'; });
2231 // V6_vdeal{b,h}
2232 // V6_vshuff{b,h}
2233
2234 // V6_vshufoe{b,h} those are quivalent to vshuffvdd(..,{1,2})
2235 // V6_vshuffvdd (V6_vshuff)
2236 // V6_dealvdd (V6_vdeal)
2237
2238 int VecLen = SM.Mask.size();
2239 assert(isPowerOf2_32(VecLen) && Log2_32(VecLen) <= 8);
2240 unsigned LogLen = Log2_32(VecLen);
2241 unsigned HwLog = Log2_32(HwLen);
2242 // The result length must be the same as the length of a single vector,
2243 // or a vector pair.
2244 assert(LogLen == HwLog || LogLen == HwLog + 1);
2245 bool HavePairs = LogLen == HwLog + 1;
2246
2247 SmallVector<unsigned, 8> Perm(LogLen);
2248
2249 // Check if this could be a perfect shuffle, or a combination of perfect
2250 // shuffles.
2251 //
2252 // Consider this permutation (using hex digits to make the ASCII diagrams
2253 // easier to read):
2254 // { 0, 8, 1, 9, 2, A, 3, B, 4, C, 5, D, 6, E, 7, F }.
2255 // This is a "deal" operation: divide the input into two halves, and
2256 // create the output by picking elements by alternating between these two
2257 // halves:
2258 // 0 1 2 3 4 5 6 7 --> 0 8 1 9 2 A 3 B 4 C 5 D 6 E 7 F [*]
2259 // 8 9 A B C D E F
2260 //
2261 // Aside from a few special explicit cases (V6_vdealb, etc.), HVX provides
2262 // a somwehat different mechanism that could be used to perform shuffle/
2263 // deal operations: a 2x2 transpose.
2264 // Consider the halves of inputs again, they can be interpreted as a 2x8
2265 // matrix. A 2x8 matrix can be looked at four 2x2 matrices concatenated
2266 // together. Now, when considering 2 elements at a time, it will be a 2x4
2267 // matrix (with elements 01, 23, 45, etc.), or two 2x2 matrices:
2268 // 01 23 45 67
2269 // 89 AB CD EF
2270 // With groups of 4, this will become a single 2x2 matrix, and so on.
2271 //
2272 // The 2x2 transpose instruction works by transposing each of the 2x2
2273 // matrices (or "sub-matrices"), given a specific group size. For example,
2274 // if the group size is 1 (i.e. each element is its own group), there
2275 // will be four transposes of the four 2x2 matrices that form the 2x8.
2276 // For example, with the inputs as above, the result will be:
2277 // 0 8 2 A 4 C 6 E
2278 // 1 9 3 B 5 D 7 F
2279 // Now, this result can be tranposed again, but with the group size of 2:
2280 // 08 19 4C 5D
2281 // 2A 3B 6E 7F
2282 // If we then transpose that result, but with the group size of 4, we get:
2283 // 0819 2A3B
2284 // 4C5D 6E7F
2285 // If we concatenate these two rows, it will be
2286 // 0 8 1 9 2 A 3 B 4 C 5 D 6 E 7 F
2287 // which is the same as the "deal" [*] above.
2288 //
2289 // In general, a "deal" of individual elements is a series of 2x2 transposes,
2290 // with changing group size. HVX has two instructions:
2291 // Vdd = V6_vdealvdd Vu, Vv, Rt
2292 // Vdd = V6_shufvdd Vu, Vv, Rt
2293 // that perform exactly that. The register Rt controls which transposes are
2294 // going to happen: a bit at position n (counting from 0) indicates that a
2295 // transpose with a group size of 2^n will take place. If multiple bits are
2296 // set, multiple transposes will happen: vdealvdd will perform them starting
2297 // with the largest group size, vshuffvdd will do them in the reverse order.
2298 //
2299 // The main observation is that each 2x2 transpose corresponds to swapping
2300 // columns of bits in the binary representation of the values.
2301 //
2302 // The numbers {3,2,1,0} and the log2 of the number of contiguous 1 bits
2303 // in a given column. The * denote the columns that will be swapped.
2304 // The transpose with the group size 2^n corresponds to swapping columns
2305 // 3 (the highest log) and log2(n):
2306 //
2307 // 3 2 1 0 0 2 1 3 0 2 3 1
2308 // * * * * * *
2309 // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
2310 // 1 0 0 0 1 8 1 0 0 0 8 1 0 0 0 8 1 0 0 0
2311 // 2 0 0 1 0 2 0 0 1 0 1 0 0 0 1 1 0 0 0 1
2312 // 3 0 0 1 1 A 1 0 1 0 9 1 0 0 1 9 1 0 0 1
2313 // 4 0 1 0 0 4 0 1 0 0 4 0 1 0 0 2 0 0 1 0
2314 // 5 0 1 0 1 C 1 1 0 0 C 1 1 0 0 A 1 0 1 0
2315 // 6 0 1 1 0 6 0 1 1 0 5 0 1 0 1 3 0 0 1 1
2316 // 7 0 1 1 1 E 1 1 1 0 D 1 1 0 1 B 1 0 1 1
2317 // 8 1 0 0 0 1 0 0 0 1 2 0 0 1 0 4 0 1 0 0
2318 // 9 1 0 0 1 9 1 0 0 1 A 1 0 1 0 C 1 1 0 0
2319 // A 1 0 1 0 3 0 0 1 1 3 0 0 1 1 5 0 1 0 1
2320 // B 1 0 1 1 B 1 0 1 1 B 1 0 1 1 D 1 1 0 1
2321 // C 1 1 0 0 5 0 1 0 1 6 0 1 1 0 6 0 1 1 0
2322 // D 1 1 0 1 D 1 1 0 1 E 1 1 1 0 E 1 1 1 0
2323 // E 1 1 1 0 7 0 1 1 1 7 0 1 1 1 7 0 1 1 1
2324 // F 1 1 1 1 F 1 1 1 1 F 1 1 1 1 F 1 1 1 1
2325
2326 // There is one special case that is not a perfect shuffle, but can be
2327 // turned into one easily: when the shuffle operates on a vector pair,
2328 // but the two vectors in the pair are swapped. The code that identifies
2329 // perfect shuffles will reject it, unless the order is reversed.
2330 shuffles::MaskT MaskStorage(SM.Mask);
2331 bool InvertedPair = false;
2332 if (HavePairs && SM.Mask[0] >= int(HwLen)) {
2333 for (int i = 0, e = SM.Mask.size(); i != e; ++i) {
2334 int M = SM.Mask[i];
2335 MaskStorage[i] = M >= int(HwLen) ? M - HwLen : M + HwLen;
2336 }
2337 InvertedPair = true;
2338 SM = ShuffleMask(MaskStorage);
2339 }
2340
2341 auto Comps = getPerfectCompletions(SM, LogLen);
2342 if (llvm::is_contained(Comps, 0))
2343 return OpRef::fail();
2344
2345 auto Pick = completeToPerfect(Comps, LogLen);
2346 for (unsigned I = 0; I != LogLen; ++I)
2347 Perm[I] = Log2_32(Pick[I]);
2348
2349 // Once we have Perm, represent it as cycles. Denote the maximum log2
2350 // (equal to log2(VecLen)-1) as M. The cycle containing M can then be
2351 // written as (M a1 a2 a3 ... an). That cycle can be broken up into
2352 // simple swaps as (M a1)(M a2)(M a3)...(M an), with the composition
2353 // order being from left to right. Any (contiguous) segment where the
2354 // values ai, ai+1...aj are either all increasing or all decreasing,
2355 // can be implemented via a single vshuffvdd/vdealvdd respectively.
2356 //
2357 // If there is a cycle (a1 a2 ... an) that does not involve M, it can
2358 // be written as (M an)(a1 a2 ... an)(M a1). The first two cycles can
2359 // then be folded to get (M a1 a2 ... an)(M a1), and the above procedure
2360 // can be used to generate a sequence of vshuffvdd/vdealvdd.
2361 //
2362 // Example:
2363 // Assume M = 4 and consider a permutation (0 1)(2 3). It can be written
2364 // as (4 0 1)(4 0) composed with (4 2 3)(4 2), or simply
2365 // (4 0 1)(4 0)(4 2 3)(4 2).
2366 // It can then be expanded into swaps as
2367 // (4 0)(4 1)(4 0)(4 2)(4 3)(4 2),
2368 // and broken up into "increasing" segments as
2369 // [(4 0)(4 1)] [(4 0)(4 2)(4 3)] [(4 2)].
2370 // This is equivalent to
2371 // (4 0 1)(4 0 2 3)(4 2),
2372 // which can be implemented as 3 vshufvdd instructions.
2373
2374 using CycleType = SmallVector<unsigned, 8>;
2375 std::set<CycleType> Cycles;
2376 std::set<unsigned> All;
2377
2378 for (unsigned I : Perm)
2379 All.insert(I);
2380
2381 // If the cycle contains LogLen-1, move it to the front of the cycle.
2382 // Otherwise, return the cycle unchanged.
2383 auto canonicalize = [LogLen](const CycleType &C) -> CycleType {
2384 unsigned LogPos, N = C.size();
2385 for (LogPos = 0; LogPos != N; ++LogPos)
2386 if (C[LogPos] == LogLen - 1)
2387 break;
2388 if (LogPos == N)
2389 return C;
2390
2391 CycleType NewC(C.begin() + LogPos, C.end());
2392 NewC.append(C.begin(), C.begin() + LogPos);
2393 return NewC;
2394 };
2395
2396 auto pfs = [](const std::set<CycleType> &Cs, unsigned Len) {
2397 // Ordering: shuff: 5 0 1 2 3 4, deal: 5 4 3 2 1 0 (for Log=6),
2398 // for bytes zero is included, for halfwords is not.
2399 if (Cs.size() != 1)
2400 return 0u;
2401 const CycleType &C = *Cs.begin();
2402 if (C[0] != Len - 1)
2403 return 0u;
2404 int D = Len - C.size();
2405 if (D != 0 && D != 1)
2406 return 0u;
2407
2408 bool IsDeal = true, IsShuff = true;
2409 for (unsigned I = 1; I != Len - D; ++I) {
2410 if (C[I] != Len - 1 - I)
2411 IsDeal = false;
2412 if (C[I] != I - (1 - D)) // I-1, I
2413 IsShuff = false;
2414 }
2415 // At most one, IsDeal or IsShuff, can be non-zero.
2416 assert(!(IsDeal || IsShuff) || IsDeal != IsShuff);
2417 static unsigned Deals[] = {Hexagon::V6_vdealb, Hexagon::V6_vdealh};
2418 static unsigned Shufs[] = {Hexagon::V6_vshuffb, Hexagon::V6_vshuffh};
2419 return IsDeal ? Deals[D] : (IsShuff ? Shufs[D] : 0);
2420 };
2421
2422 while (!All.empty()) {
2423 unsigned A = *All.begin();
2424 All.erase(A);
2425 CycleType C;
2426 C.push_back(A);
2427 for (unsigned B = Perm[A]; B != A; B = Perm[B]) {
2428 C.push_back(B);
2429 All.erase(B);
2430 }
2431 if (C.size() <= 1)
2432 continue;
2433 Cycles.insert(canonicalize(C));
2434 }
2435
2436 MVT SingleTy = getSingleVT(MVT::i8);
2437 MVT PairTy = getPairVT(MVT::i8);
2438
2439 // Recognize patterns for V6_vdeal{b,h} and V6_vshuff{b,h}.
2440 if (unsigned(VecLen) == HwLen) {
2441 if (unsigned SingleOpc = pfs(Cycles, LogLen)) {
2442 Results.push(SingleOpc, SingleTy, {Va});
2443 return OpRef::res(Results.top());
2444 }
2445 }
2446
2447 // From the cycles, construct the sequence of values that will
2448 // then form the control values for vdealvdd/vshuffvdd, i.e.
2449 // (M a1 a2)(M a3 a4 a5)... -> a1 a2 a3 a4 a5
2450 // This essentially strips the M value from the cycles where
2451 // it's present, and performs the insertion of M (then stripping)
2452 // for cycles without M (as described in an earlier comment).
2453 SmallVector<unsigned, 8> SwapElems;
2454 // When the input is extended (i.e. single vector becomes a pair),
2455 // this is done by using an "undef" vector as the second input.
2456 // However, then we get
2457 // input 1: GOODBITS
2458 // input 2: ........
2459 // but we need
2460 // input 1: ....BITS
2461 // input 2: ....GOOD
2462 // Then at the end, this needs to be undone. To accomplish this,
2463 // artificially add "LogLen-1" at both ends of the sequence.
2464 if (!HavePairs)
2465 SwapElems.push_back(LogLen - 1);
2466 for (const CycleType &C : Cycles) {
2467 // Do the transformation: (a1..an) -> (M a1..an)(M a1).
2468 unsigned First = (C[0] == LogLen - 1) ? 1 : 0;
2469 SwapElems.append(C.begin() + First, C.end());
2470 if (First == 0)
2471 SwapElems.push_back(C[0]);
2472 }
2473 if (!HavePairs)
2474 SwapElems.push_back(LogLen - 1);
2475
2476 const SDLoc &dl(Results.InpNode);
2477 OpRef Arg = HavePairs ? Va : concats(Va, OpRef::undef(SingleTy), Results);
2478 if (InvertedPair)
2479 Arg = concats(OpRef::hi(Arg), OpRef::lo(Arg), Results);
2480
2481 for (unsigned I = 0, E = SwapElems.size(); I != E;) {
2482 bool IsInc = I == E - 1 || SwapElems[I] < SwapElems[I + 1];
2483 unsigned S = (1u << SwapElems[I]);
2484 if (I < E - 1) {
2485 while (++I < E - 1 && IsInc == (SwapElems[I] < SwapElems[I + 1]))
2486 S |= 1u << SwapElems[I];
2487 // The above loop will not add a bit for the final SwapElems[I+1],
2488 // so add it here.
2489 S |= 1u << SwapElems[I];
2490 }
2491 ++I;
2492
2493 NodeTemplate Res;
2494 Results.push(Hexagon::A2_tfrsi, MVT::i32, {getConst32(S, dl)});
2495 Res.Opc = IsInc ? Hexagon::V6_vshuffvdd : Hexagon::V6_vdealvdd;
2496 Res.Ty = PairTy;
2497 Res.Ops = {OpRef::hi(Arg), OpRef::lo(Arg), OpRef::res(-1)};
2498 Results.push(Res);
2499 Arg = OpRef::res(Results.top());
2500 }
2501
2502 return HavePairs ? Arg : OpRef::lo(Arg);
2503}
2504
2505OpRef HvxSelector::butterfly(ShuffleMask SM, OpRef Va, ResultStack &Results) {
2506 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
2507 // Butterfly shuffles.
2508 //
2509 // V6_vdelta
2510 // V6_vrdelta
2511 // V6_vror
2512
2513 // The assumption here is that all elements picked by Mask are in the
2514 // first operand to the vector_shuffle. This assumption is enforced
2515 // by the caller.
2516
2517 MVT ResTy = getSingleVT(MVT::i8);
2518 PermNetwork::Controls FC, RC;
2519 const SDLoc &dl(Results.InpNode);
2520 int VecLen = SM.Mask.size();
2521
2522 for (int M : SM.Mask) {
2523 if (M != -1 && M >= VecLen)
2524 return OpRef::fail();
2525 }
2526
2527 // Try the deltas/benes for both single vectors and vector pairs.
2528 ForwardDeltaNetwork FN(SM.Mask);
2529 if (FN.run(FC)) {
2530 SDValue Ctl = getVectorConstant(FC, dl);
2531 Results.push(Hexagon::V6_vdelta, ResTy, {Va, OpRef(Ctl)});
2532 return OpRef::res(Results.top());
2533 }
2534
2535 // Try reverse delta.
2536 ReverseDeltaNetwork RN(SM.Mask);
2537 if (RN.run(RC)) {
2538 SDValue Ctl = getVectorConstant(RC, dl);
2539 Results.push(Hexagon::V6_vrdelta, ResTy, {Va, OpRef(Ctl)});
2540 return OpRef::res(Results.top());
2541 }
2542
2543 // Do Benes.
2544 BenesNetwork BN(SM.Mask);
2545 if (BN.run(FC, RC)) {
2546 SDValue CtlF = getVectorConstant(FC, dl);
2547 SDValue CtlR = getVectorConstant(RC, dl);
2548 Results.push(Hexagon::V6_vdelta, ResTy, {Va, OpRef(CtlF)});
2549 Results.push(Hexagon::V6_vrdelta, ResTy,
2550 {OpRef::res(-1), OpRef(CtlR)});
2551 return OpRef::res(Results.top());
2552 }
2553
2554 return OpRef::fail();
2555}
2556
2557SDValue HvxSelector::getConst32(int Val, const SDLoc &dl) {
2558 return DAG.getTargetConstant(Val, dl, MVT::i32);
2559}
2560
2561SDValue HvxSelector::getVectorConstant(ArrayRef<uint8_t> Data,
2562 const SDLoc &dl) {
2564 for (uint8_t C : Data)
2565 Elems.push_back(DAG.getConstant(C, dl, MVT::i8));
2566 MVT VecTy = MVT::getVectorVT(MVT::i8, Data.size());
2567 SDValue BV = DAG.getBuildVector(VecTy, dl, Elems);
2568 SDValue LV = Lower.LowerOperation(BV, DAG);
2570 return DAG.getNode(HexagonISD::ISEL, dl, VecTy, LV);
2571}
2572
2574 SDValue Inp = N->getOperand(0);
2575 MVT ResTy = N->getValueType(0).getSimpleVT();
2576 unsigned Idx = N->getConstantOperandVal(1);
2577
2578 [[maybe_unused]] MVT InpTy = Inp.getValueType().getSimpleVT();
2579 [[maybe_unused]] unsigned ResLen = ResTy.getVectorNumElements();
2581 assert(2 * ResLen == InpTy.getVectorNumElements());
2582 assert(Idx == 0 || Idx == ResLen);
2583
2584 unsigned SubReg = Idx == 0 ? Hexagon::vsub_lo : Hexagon::vsub_hi;
2585 SDValue Ext = DAG.getTargetExtractSubreg(SubReg, SDLoc(N), ResTy, Inp);
2586
2587 ISel.ReplaceNode(N, Ext.getNode());
2588}
2589
2591 DEBUG_WITH_TYPE("isel", {
2592 dbgs() << "Starting " << __func__ << " on node:\n";
2593 N->dump(&DAG);
2594 });
2595 MVT ResTy = N->getValueType(0).getSimpleVT();
2596 // Assume that vector shuffles operate on vectors of bytes.
2597 assert(ResTy.isVector() && ResTy.getVectorElementType() == MVT::i8);
2598
2599 auto *SN = cast<ShuffleVectorSDNode>(N);
2600 std::vector<int> Mask(SN->getMask().begin(), SN->getMask().end());
2601 // This shouldn't really be necessary. Is it?
2602 for (int &Idx : Mask)
2603 if (Idx != -1 && Idx < 0)
2604 Idx = -1;
2605
2606 unsigned VecLen = Mask.size();
2607 bool HavePairs = (2*HwLen == VecLen);
2608 assert(ResTy.getSizeInBits() / 8 == VecLen);
2609
2610 // Vd = vector_shuffle Va, Vb, Mask
2611 //
2612
2613 bool UseLeft = false, UseRight = false;
2614 for (unsigned I = 0; I != VecLen; ++I) {
2615 if (Mask[I] == -1)
2616 continue;
2617 unsigned Idx = Mask[I];
2618 assert(Idx < 2*VecLen);
2619 if (Idx < VecLen)
2620 UseLeft = true;
2621 else
2622 UseRight = true;
2623 }
2624
2625 DEBUG_WITH_TYPE("isel", {
2626 dbgs() << "VecLen=" << VecLen << " HwLen=" << HwLen << " UseLeft="
2627 << UseLeft << " UseRight=" << UseRight << " HavePairs="
2628 << HavePairs << '\n';
2629 });
2630 // If the mask is all -1's, generate "undef".
2631 if (!UseLeft && !UseRight) {
2632 ISel.ReplaceNode(N, ISel.selectUndef(SDLoc(SN), ResTy).getNode());
2633 return;
2634 }
2635
2636 SDValue Vec0 = N->getOperand(0);
2637 SDValue Vec1 = N->getOperand(1);
2638 assert(Vec0.getValueType() == ResTy && Vec1.getValueType() == ResTy);
2639
2640 ResultStack Results(SN);
2641 OpRef Va = OpRef::undef(ResTy);
2642 OpRef Vb = OpRef::undef(ResTy);
2643
2644 if (!Vec0.isUndef()) {
2645 Results.push(TargetOpcode::COPY, ResTy, {Vec0});
2646 Va = OpRef::OpRef::res(Results.top());
2647 }
2648 if (!Vec1.isUndef()) {
2649 Results.push(TargetOpcode::COPY, ResTy, {Vec1});
2650 Vb = OpRef::res(Results.top());
2651 }
2652
2653 OpRef Res = !HavePairs ? shuffs2(ShuffleMask(Mask), Va, Vb, Results)
2654 : shuffp2(ShuffleMask(Mask), Va, Vb, Results);
2655
2656 bool Done = Res.isValid();
2657 if (Done) {
2658 // Make sure that Res is on the stack before materializing.
2659 Results.push(TargetOpcode::COPY, ResTy, {Res});
2660 materialize(Results);
2661 } else {
2662 Done = scalarizeShuffle(Mask, SDLoc(N), ResTy, Vec0, Vec1, N);
2663 }
2664
2665 if (!Done) {
2666#ifndef NDEBUG
2667 dbgs() << "Unhandled shuffle:\n";
2668 SN->dumpr(&DAG);
2669#endif
2670 llvm_unreachable("Failed to select vector shuffle");
2671 }
2672}
2673
2675 // If this is a rotation by less than 8, use V6_valignbi.
2676 MVT Ty = N->getValueType(0).getSimpleVT();
2677 const SDLoc &dl(N);
2678 SDValue VecV = N->getOperand(0);
2679 SDValue RotV = N->getOperand(1);
2680 SDNode *NewN = nullptr;
2681
2682 if (auto *CN = dyn_cast<ConstantSDNode>(RotV.getNode())) {
2683 unsigned S = CN->getZExtValue() % HST.getVectorLength();
2684 if (S == 0) {
2685 NewN = VecV.getNode();
2686 } else if (isUInt<3>(S)) {
2687 NewN = DAG.getMachineNode(Hexagon::V6_valignbi, dl, Ty,
2688 {VecV, VecV, getConst32(S, dl)});
2689 }
2690 }
2691
2692 if (!NewN)
2693 NewN = DAG.getMachineNode(Hexagon::V6_vror, dl, Ty, {VecV, RotV});
2694
2695 ISel.ReplaceNode(N, NewN);
2696}
2697
2699 SDValue Vv = N->getOperand(0);
2700 SDValue Vu = N->getOperand(1);
2701 SDValue Rt = N->getOperand(2);
2702 SDNode *NewN = DAG.getMachineNode(Hexagon::V6_valignb, SDLoc(N),
2703 N->getValueType(0), {Vv, Vu, Rt});
2704 ISel.ReplaceNode(N, NewN);
2706}
2707
2708void HexagonDAGToDAGISel::PreprocessHvxISelDAG() {
2709 auto getNodes = [this]() -> std::vector<SDNode *> {
2710 std::vector<SDNode *> T;
2711 T.reserve(CurDAG->allnodes_size());
2712 for (SDNode &N : CurDAG->allnodes())
2713 T.push_back(&N);
2714 return T;
2715 };
2716
2717 ppHvxShuffleOfShuffle(getNodes());
2718}
2719
2720template <> struct std::hash<SDValue> {
2721 std::size_t operator()(SDValue V) const {
2722 return std::hash<const void *>()(V.getNode()) +
2723 std::hash<unsigned>()(V.getResNo());
2724 };
2725};
2726
2727void HexagonDAGToDAGISel::ppHvxShuffleOfShuffle(std::vector<SDNode *> &&Nodes) {
2728 // Motivating case:
2729 // t10: v64i32 = ...
2730 // t46: v128i8 = vector_shuffle<...> t44, t45
2731 // t48: v128i8 = vector_shuffle<...> t44, t45
2732 // t42: v128i8 = vector_shuffle<...> t46, t48
2733 // t12: v32i32 = extract_subvector t10, Constant:i32<0>
2734 // t44: v128i8 = bitcast t12
2735 // t15: v32i32 = extract_subvector t10, Constant:i32<32>
2736 // t45: v128i8 = bitcast t15
2737 SelectionDAG &DAG = *CurDAG;
2738 unsigned HwLen = HST->getVectorLength();
2739
2740 struct SubVectorInfo {
2741 SubVectorInfo(SDValue S, unsigned H) : Src(S), HalfIdx(H) {}
2742 SDValue Src;
2743 unsigned HalfIdx;
2744 };
2745
2746 using MapType = std::unordered_map<SDValue, unsigned>;
2747
2748 auto getMaskElt = [&](unsigned Idx, ShuffleVectorSDNode *Shuff0,
2749 ShuffleVectorSDNode *Shuff1,
2750 const MapType &OpMap) -> int {
2751 // Treat Shuff0 and Shuff1 as operands to another vector shuffle, and
2752 // Idx as a (non-undef) element of the top level shuffle's mask, that
2753 // is, index into concat(Shuff0, Shuff1).
2754 // Assuming that Shuff0 and Shuff1 both operate on subvectors of the
2755 // same source vector (as described by OpMap), return the index of
2756 // that source vector corresponding to Idx.
2757 ShuffleVectorSDNode *OpShuff = Idx < HwLen ? Shuff0 : Shuff1;
2758 if (Idx >= HwLen)
2759 Idx -= HwLen;
2760
2761 // Get the mask index that M points at in the corresponding operand.
2762 int MaybeN = OpShuff->getMaskElt(Idx);
2763 if (MaybeN < 0)
2764 return -1;
2765
2766 auto N = static_cast<unsigned>(MaybeN);
2767 unsigned SrcBase = N < HwLen ? OpMap.at(OpShuff->getOperand(0))
2768 : OpMap.at(OpShuff->getOperand(1));
2769 if (N >= HwLen)
2770 N -= HwLen;
2771
2772 return N + SrcBase;
2773 };
2774
2775 auto fold3 = [&](SDValue TopShuff, SDValue Inp, MapType &&OpMap) -> SDValue {
2776 // Fold all 3 shuffles into a single one.
2777 auto *This = cast<ShuffleVectorSDNode>(TopShuff);
2778 auto *S0 = cast<ShuffleVectorSDNode>(TopShuff.getOperand(0));
2779 auto *S1 = cast<ShuffleVectorSDNode>(TopShuff.getOperand(1));
2780 ArrayRef<int> TopMask = This->getMask();
2781 // This should be guaranteed by type checks in the caller, and the fact
2782 // that all shuffles should have been promoted to operate on MVT::i8.
2783 assert(TopMask.size() == S0->getMask().size() &&
2784 TopMask.size() == S1->getMask().size());
2785 assert(TopMask.size() == HwLen);
2786
2787 SmallVector<int, 256> FoldedMask(2 * HwLen);
2788 for (unsigned I = 0; I != HwLen; ++I) {
2789 int MaybeM = TopMask[I];
2790 if (MaybeM >= 0) {
2791 FoldedMask[I] =
2792 getMaskElt(static_cast<unsigned>(MaybeM), S0, S1, OpMap);
2793 } else {
2794 FoldedMask[I] = -1;
2795 }
2796 }
2797 // The second half of the result will be all-undef.
2798 std::fill(FoldedMask.begin() + HwLen, FoldedMask.end(), -1);
2799
2800 // Return
2801 // FoldedShuffle = (Shuffle Inp, undef, FoldedMask)
2802 // (LoHalf FoldedShuffle)
2803 const SDLoc &dl(TopShuff);
2804 MVT SingleTy = MVT::getVectorVT(MVT::i8, HwLen);
2805 MVT PairTy = MVT::getVectorVT(MVT::i8, 2 * HwLen);
2806 SDValue FoldedShuff =
2807 DAG.getVectorShuffle(PairTy, dl, DAG.getBitcast(PairTy, Inp),
2808 DAG.getUNDEF(PairTy), FoldedMask);
2809 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SingleTy, FoldedShuff,
2810 DAG.getConstant(0, dl, MVT::i32));
2811 };
2812
2813 auto getSourceInfo = [](SDValue V) -> std::optional<SubVectorInfo> {
2814 while (V.getOpcode() == ISD::BITCAST)
2815 V = V.getOperand(0);
2816 if (V.getOpcode() != ISD::EXTRACT_SUBVECTOR)
2817 return std::nullopt;
2818 return SubVectorInfo(V.getOperand(0),
2819 !cast<ConstantSDNode>(V.getOperand(1))->isZero());
2820 };
2821
2822 for (SDNode *N : Nodes) {
2823 if (N->getOpcode() != ISD::VECTOR_SHUFFLE)
2824 continue;
2825 EVT ResTy = N->getValueType(0);
2826 if (ResTy.getVectorElementType() != MVT::i8)
2827 continue;
2828 if (ResTy.getVectorNumElements() != HwLen)
2829 continue;
2830
2831 SDValue V0 = N->getOperand(0);
2832 SDValue V1 = N->getOperand(1);
2833 if (V0.getOpcode() != ISD::VECTOR_SHUFFLE)
2834 continue;
2835 if (V1.getOpcode() != ISD::VECTOR_SHUFFLE)
2836 continue;
2837 if (V0.getValueType() != ResTy || V1.getValueType() != ResTy)
2838 continue;
2839
2840 // Check if all operands of the two operand shuffles are extract_subvectors
2841 // from the same vector pair.
2842 auto V0A = getSourceInfo(V0.getOperand(0));
2843 if (!V0A.has_value())
2844 continue;
2845 auto V0B = getSourceInfo(V0.getOperand(1));
2846 if (!V0B.has_value() || V0B->Src != V0A->Src)
2847 continue;
2848 auto V1A = getSourceInfo(V1.getOperand(0));
2849 if (!V1A.has_value() || V1A->Src != V0A->Src)
2850 continue;
2851 auto V1B = getSourceInfo(V1.getOperand(1));
2852 if (!V1B.has_value() || V1B->Src != V0A->Src)
2853 continue;
2854
2855 // The source must be a pair. This should be guaranteed here,
2856 // but check just in case.
2857 assert(V0A->Src.getValueType().getSizeInBits() == 16 * HwLen);
2858
2859 MapType OpMap = {
2860 {V0.getOperand(0), V0A->HalfIdx * HwLen},
2861 {V0.getOperand(1), V0B->HalfIdx * HwLen},
2862 {V1.getOperand(0), V1A->HalfIdx * HwLen},
2863 {V1.getOperand(1), V1B->HalfIdx * HwLen},
2864 };
2865 SDValue NewS = fold3(SDValue(N, 0), V0A->Src, std::move(OpMap));
2866 ReplaceNode(N, NewS.getNode());
2867 }
2868}
2869
2870void HexagonDAGToDAGISel::SelectHvxExtractSubvector(SDNode *N) {
2871 HvxSelector(*this, *CurDAG).selectExtractSubvector(N);
2872}
2873
2874void HexagonDAGToDAGISel::SelectHvxShuffle(SDNode *N) {
2875 HvxSelector(*this, *CurDAG).selectShuffle(N);
2876}
2877
2878void HexagonDAGToDAGISel::SelectHvxRor(SDNode *N) {
2879 HvxSelector(*this, *CurDAG).selectRor(N);
2880}
2881
2882void HexagonDAGToDAGISel::SelectHvxVAlign(SDNode *N) {
2883 HvxSelector(*this, *CurDAG).selectVAlign(N);
2884}
2885
2887 const SDLoc &dl(N);
2888 SDValue Chain = N->getOperand(0);
2889 SDValue Address = N->getOperand(2);
2890 SDValue Predicate = N->getOperand(3);
2891 SDValue Base = N->getOperand(4);
2892 SDValue Modifier = N->getOperand(5);
2893 SDValue Offset = N->getOperand(6);
2894 SDValue ImmOperand = CurDAG->getTargetConstant(0, dl, MVT::i32);
2895
2896 unsigned Opcode;
2897 unsigned IntNo = N->getConstantOperandVal(1);
2898 switch (IntNo) {
2899 default:
2900 llvm_unreachable("Unexpected HVX gather intrinsic.");
2901 case Intrinsic::hexagon_V6_vgathermhq:
2902 case Intrinsic::hexagon_V6_vgathermhq_128B:
2903 Opcode = Hexagon::V6_vgathermhq_pseudo;
2904 break;
2905 case Intrinsic::hexagon_V6_vgathermwq:
2906 case Intrinsic::hexagon_V6_vgathermwq_128B:
2907 Opcode = Hexagon::V6_vgathermwq_pseudo;
2908 break;
2909 case Intrinsic::hexagon_V6_vgathermhwq:
2910 case Intrinsic::hexagon_V6_vgathermhwq_128B:
2911 Opcode = Hexagon::V6_vgathermhwq_pseudo;
2912 break;
2913 }
2914
2915 SDVTList VTs = CurDAG->getVTList(MVT::Other);
2916 SDValue Ops[] = { Address, ImmOperand,
2917 Predicate, Base, Modifier, Offset, Chain };
2918 SDNode *Result = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
2919
2920 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2921 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
2922
2923 ReplaceNode(N, Result);
2924}
2925
2927 const SDLoc &dl(N);
2928 SDValue Chain = N->getOperand(0);
2929 SDValue Address = N->getOperand(2);
2930 SDValue Base = N->getOperand(3);
2931 SDValue Modifier = N->getOperand(4);
2932 SDValue Offset = N->getOperand(5);
2933 SDValue ImmOperand = CurDAG->getTargetConstant(0, dl, MVT::i32);
2934
2935 unsigned Opcode;
2936 unsigned IntNo = N->getConstantOperandVal(1);
2937 switch (IntNo) {
2938 default:
2939 llvm_unreachable("Unexpected HVX gather intrinsic.");
2940 case Intrinsic::hexagon_V6_vgathermh:
2941 case Intrinsic::hexagon_V6_vgathermh_128B:
2942 Opcode = Hexagon::V6_vgathermh_pseudo;
2943 break;
2944 case Intrinsic::hexagon_V6_vgathermw:
2945 case Intrinsic::hexagon_V6_vgathermw_128B:
2946 Opcode = Hexagon::V6_vgathermw_pseudo;
2947 break;
2948 case Intrinsic::hexagon_V6_vgathermhw:
2949 case Intrinsic::hexagon_V6_vgathermhw_128B:
2950 Opcode = Hexagon::V6_vgathermhw_pseudo;
2951 break;
2952 }
2953
2954 SDVTList VTs = CurDAG->getVTList(MVT::Other);
2955 SDValue Ops[] = { Address, ImmOperand, Base, Modifier, Offset, Chain };
2956 SDNode *Result = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
2957
2958 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2959 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
2960
2961 ReplaceNode(N, Result);
2962}
2963
2965 unsigned IID = N->getConstantOperandVal(0);
2966 SDNode *Result;
2967 switch (IID) {
2968 case Intrinsic::hexagon_V6_vaddcarry: {
2969 std::array<SDValue, 3> Ops = {
2970 {N->getOperand(1), N->getOperand(2), N->getOperand(3)}};
2971 SDVTList VTs = CurDAG->getVTList(MVT::v16i32, MVT::v64i1);
2972 Result = CurDAG->getMachineNode(Hexagon::V6_vaddcarry, SDLoc(N), VTs, Ops);
2973 break;
2974 }
2975 case Intrinsic::hexagon_V6_vaddcarry_128B: {
2976 std::array<SDValue, 3> Ops = {
2977 {N->getOperand(1), N->getOperand(2), N->getOperand(3)}};
2978 SDVTList VTs = CurDAG->getVTList(MVT::v32i32, MVT::v128i1);
2979 Result = CurDAG->getMachineNode(Hexagon::V6_vaddcarry, SDLoc(N), VTs, Ops);
2980 break;
2981 }
2982 case Intrinsic::hexagon_V6_vsubcarry: {
2983 std::array<SDValue, 3> Ops = {
2984 {N->getOperand(1), N->getOperand(2), N->getOperand(3)}};
2985 SDVTList VTs = CurDAG->getVTList(MVT::v16i32, MVT::v64i1);
2986 Result = CurDAG->getMachineNode(Hexagon::V6_vsubcarry, SDLoc(N), VTs, Ops);
2987 break;
2988 }
2989 case Intrinsic::hexagon_V6_vsubcarry_128B: {
2990 std::array<SDValue, 3> Ops = {
2991 {N->getOperand(1), N->getOperand(2), N->getOperand(3)}};
2992 SDVTList VTs = CurDAG->getVTList(MVT::v32i32, MVT::v128i1);
2993 Result = CurDAG->getMachineNode(Hexagon::V6_vsubcarry, SDLoc(N), VTs, Ops);
2994 break;
2995 }
2996 default:
2997 llvm_unreachable("Unexpected HVX dual output intrinsic.");
2998 }
2999 ReplaceUses(N, Result);
3000 ReplaceUses(SDValue(N, 0), SDValue(Result, 0));
3001 ReplaceUses(SDValue(N, 1), SDValue(Result, 1));
3003}
unsigned SubReg
static const LLT S1
ReachingDefAnalysis InstSet InstSet & Ignore
Function Alias Analysis Results
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static void fail(const SDLoc &DL, SelectionDAG &DAG, const Twine &Msg, SDValue Val={})
This file implements the BitVector class.
static void push(SmallVectorImpl< uint64_t > &R, StringRef Str)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:529
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:203
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define DEBUG_WITH_TYPE(TYPE, X)
DEBUG_WITH_TYPE macro - This macro should be used by passes to emit debug information.
Definition: Debug.h:64
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
const HexagonInstrInfo * TII
static bool isIdentity(ArrayRef< int > Mask)
static std::pair< int, unsigned > findStrip(ArrayRef< int > A, int Inc, unsigned MaxLen)
static bool isUndef(ArrayRef< int > Mask)
static const HexagonSubtarget & getHexagonSubtarget(SelectionDAG &G)
static void splitMask(ArrayRef< int > Mask, MutableArrayRef< int > MaskL, MutableArrayRef< int > MaskR)
static void packSegmentMask(ArrayRef< int > Mask, ArrayRef< unsigned > OutSegMap, unsigned SegLen, MutableArrayRef< int > PackedMask)
static SmallVector< unsigned, 4 > getInputSegmentList(ShuffleMask SM, unsigned SegLen)
static const HexagonTargetLowering & getHexagonLowering(SelectionDAG &G)
static SmallVector< unsigned, 4 > getOutputSegmentMap(ShuffleMask SM, unsigned SegLen)
static bool isLowHalfOnly(ArrayRef< int > Mask)
static LVOptions Options
Definition: LVOptions.cpp:25
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
#define H(x, y, z)
Definition: MD5.cpp:57
std::pair< MCSymbol *, MachineModuleInfoImpl::StubValueTy > PairTy
nvptx lower args
#define P(N)
const NodeList & List
Definition: RDFGraph.cpp:201
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
This file implements a set that has insertion order iteration characteristics.
Stack Slot Coloring
@ None
xray Insert XRay ops
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
ArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
Definition: ArrayRef.h:228
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
Definition: ArrayRef.h:204
iterator end() const
Definition: ArrayRef.h:154
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
iterator begin() const
Definition: ArrayRef.h:153
ArrayRef< T > take_back(size_t N=1) const
Return a copy of *this with only the last N elements.
Definition: ArrayRef.h:235
const T * data() const
Definition: ArrayRef.h:162
BitVector & set()
Definition: BitVector.h:351
iterator_range< const_set_bits_iterator > set_bits() const
Definition: BitVector.h:140
This class represents an Operation in the Expression.
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
iterator end()
Definition: DenseMap.h:84
Tagged union holding either a T or a Error.
Definition: Error.h:474
bool empty() const
Definition: Function.h:804
void print(raw_ostream &OS, AssemblyAnnotationWriter *AAW=nullptr, bool ShouldPreserveUseListOrder=false, bool IsForDebug=false) const
Print the function to an output stream with an optional AssemblyAnnotationWriter.
Definition: AsmWriter.cpp:4789
size_t size() const
Definition: Function.h:803
void Select(SDNode *N) override
Main hook for targets to transform nodes into machine nodes.
unsigned getVectorLength() const
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
Machine Value Type.
SimpleValueType SimpleTy
unsigned getVectorNumElements() const
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
A description of a memory reference used in the backend.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:307
MutableArrayRef< T > take_back(size_t N=1) const
Return a copy of *this with only the last N elements.
Definition: ArrayRef.h:419
iterator begin() const
Definition: ArrayRef.h:356
MutableArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
Definition: ArrayRef.h:412
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
iterator begin()
bool insert(SUnit *SU)
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:94
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
void dump() const
Dump this node, for debugging.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
iterator_range< use_iterator > uses()
void dumpr() const
Dump (recursively) this node and its use-def subgraph.
const SDValue & getOperand(unsigned Num) const
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
unsigned getOpcode() const
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:225
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:828
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
iterator_range< allnodes_iterator > allnodes()
Definition: SelectionDAG.h:543
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
ilist< SDNode >::size_type allnodes_size() const
Definition: SelectionDAG.h:539
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:676
LLVMContext * getContext() const
Definition: SelectionDAG.h:485
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
A vector that has set insertion semantics.
Definition: SetVector.h:57
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:98
Vector takeVector()
Clear the SetVector and return the underlying vector.
Definition: SetVector.h:87
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:264
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:93
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
static void commuteMask(MutableArrayRef< int > Mask)
Change values in a shuffle permute mask assuming the two vector operands have swapped position.
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
void resize(size_type N)
Definition: SmallVector.h:651
void push_back(const T &Elt)
Definition: SmallVector.h:426
pointer data()
Return a pointer to the vector's buffer, even if empty().
Definition: SmallVector.h:299
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
TargetInstrInfo - Interface to description of machine instruction set.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:543
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:903
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:600
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:573
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:535
@ Invalid
Invalid file type.
Definition: FileTypes.h:17
@ Undef
Value of the register doesn't matter.
@ Switch
The "resume-switch" lowering, where there are separate resume and destroy functions that are shared b...
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
constexpr double e
Definition: MathExtras.h:31
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:227
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:236
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
@ Offset
Definition: DWP.cpp:456
@ Length
Definition: DWP.cpp:456
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1731
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:385
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1689
@ Done
Definition: Threading.h:61
SmallVectorImpl< T >::const_pointer c_str(SmallVectorImpl< T > &str)
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition: STLExtras.h:2068
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:313
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:428
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:264
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1656
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:125
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
@ Invalid
Denotes invalid value.
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:293
constexpr int32_t SignExtend32(uint32_t X)
Sign-extend the number in the bottom B bits of X to a 32-bit integer.
Definition: MathExtras.h:436
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1930
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1888
MaskT vshuffvdd(ArrayRef< int > Vu, ArrayRef< int > Vv, unsigned Rt)
MaskT vpack(ArrayRef< int > Vu, ArrayRef< int > Vv, unsigned Size, bool TakeOdd)
ArrayRef< int > hi(ArrayRef< int > Vuu)
auto mask(ShuffFunc S, unsigned Length, OptArgs... args) -> MaskT
MaskT vshuff(ArrayRef< int > Vu, ArrayRef< int > Vv, unsigned Size, bool TakeOdd)
MaskT vdealb4w(ArrayRef< int > Vu, ArrayRef< int > Vv)
MaskT vdeal(ArrayRef< int > Vu, ArrayRef< int > Vv, unsigned Size, bool TakeOdd)
ArrayRef< int > lo(ArrayRef< int > Vuu)
MaskT vdealvdd(ArrayRef< int > Vu, ArrayRef< int > Vv, unsigned Rt)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
Extended Value Type.
Definition: ValueTypes.h:34
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:306
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:318
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:326
HvxSelector(HexagonDAGToDAGISel &HS, SelectionDAG &G)
MVT getSingleVT(MVT ElemTy) const
static SmallVector< uint32_t, 8 > completeToPerfect(ArrayRef< uint32_t > Completions, unsigned Width)
HexagonDAGToDAGISel & ISel
const HexagonTargetLowering & Lower
void selectExtractSubvector(SDNode *N)
static SmallVector< uint32_t, 8 > getPerfectCompletions(ShuffleMask SM, unsigned Width)
MVT getPairVT(MVT ElemTy) const
const HexagonSubtarget & HST
static std::optional< int > rotationDistance(ShuffleMask SM, unsigned WrapAt)
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
std::size_t operator()(SDValue V) const