LLVM 18.0.0git
HexagonISelDAGToDAGHVX.cpp
Go to the documentation of this file.
1//===-- HexagonISelDAGToDAGHVX.cpp ----------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "Hexagon.h"
10#include "HexagonISelDAGToDAG.h"
11#include "HexagonISelLowering.h"
13#include "llvm/ADT/BitVector.h"
14#include "llvm/ADT/SetVector.h"
17#include "llvm/IR/Intrinsics.h"
18#include "llvm/IR/IntrinsicsHexagon.h"
20#include "llvm/Support/Debug.h"
22
23#include <algorithm>
24#include <cmath>
25#include <deque>
26#include <functional>
27#include <map>
28#include <optional>
29#include <set>
30#include <unordered_map>
31#include <utility>
32#include <vector>
33
34#define DEBUG_TYPE "hexagon-isel"
35using namespace llvm;
36
37namespace {
38
39// --------------------------------------------------------------------
40// Implementation of permutation networks.
41
42// Implementation of the node routing through butterfly networks:
43// - Forward delta.
44// - Reverse delta.
45// - Benes.
46//
47//
48// Forward delta network consists of log(N) steps, where N is the number
49// of inputs. In each step, an input can stay in place, or it can get
50// routed to another position[1]. The step after that consists of two
51// networks, each half in size in terms of the number of nodes. In those
52// terms, in the given step, an input can go to either the upper or the
53// lower network in the next step.
54//
55// [1] Hexagon's vdelta/vrdelta allow an element to be routed to both
56// positions as long as there is no conflict.
57
58// Here's a delta network for 8 inputs, only the switching routes are
59// shown:
60//
61// Steps:
62// |- 1 ---------------|- 2 -----|- 3 -|
63//
64// Inp[0] *** *** *** *** Out[0]
65// \ / \ / \ /
66// \ / \ / X
67// \ / \ / / \
68// Inp[1] *** \ / *** X *** *** Out[1]
69// \ \ / / \ / \ /
70// \ \ / / X X
71// \ \ / / / \ / \
72// Inp[2] *** \ \ / / *** X *** *** Out[2]
73// \ \ X / / / \ \ /
74// \ \ / \ / / / \ X
75// \ X X / / \ / \
76// Inp[3] *** \ / \ / \ / *** *** *** Out[3]
77// \ X X X /
78// \ / \ / \ / \ /
79// X X X X
80// / \ / \ / \ / \
81// / X X X \
82// Inp[4] *** / \ / \ / \ *** *** *** Out[4]
83// / X X \ \ / \ /
84// / / \ / \ \ \ / X
85// / / X \ \ \ / / \
86// Inp[5] *** / / \ \ *** X *** *** Out[5]
87// / / \ \ \ / \ /
88// / / \ \ X X
89// / / \ \ / \ / \
90// Inp[6] *** / \ *** X *** *** Out[6]
91// / \ / \ \ /
92// / \ / \ X
93// / \ / \ / \
94// Inp[7] *** *** *** *** Out[7]
95//
96//
97// Reverse delta network is same as delta network, with the steps in
98// the opposite order.
99//
100//
101// Benes network is a forward delta network immediately followed by
102// a reverse delta network.
103
104enum class ColorKind { None, Red, Black };
105
106// Graph coloring utility used to partition nodes into two groups:
107// they will correspond to nodes routed to the upper and lower networks.
108struct Coloring {
109 using Node = int;
110 using MapType = std::map<Node, ColorKind>;
111 static constexpr Node Ignore = Node(-1);
112
113 Coloring(ArrayRef<Node> Ord) : Order(Ord) {
114 build();
115 if (!color())
116 Colors.clear();
117 }
118
119 const MapType &colors() const {
120 return Colors;
121 }
122
123 ColorKind other(ColorKind Color) {
124 if (Color == ColorKind::None)
125 return ColorKind::Red;
126 return Color == ColorKind::Red ? ColorKind::Black : ColorKind::Red;
127 }
128
129 LLVM_DUMP_METHOD void dump() const;
130
131private:
132 ArrayRef<Node> Order;
133 MapType Colors;
134 std::set<Node> Needed;
135
136 using NodeSet = std::set<Node>;
137 std::map<Node,NodeSet> Edges;
138
139 Node conj(Node Pos) {
140 Node Num = Order.size();
141 return (Pos < Num/2) ? Pos + Num/2 : Pos - Num/2;
142 }
143
144 ColorKind getColor(Node N) {
145 auto F = Colors.find(N);
146 return F != Colors.end() ? F->second : ColorKind::None;
147 }
148
149 std::pair<bool, ColorKind> getUniqueColor(const NodeSet &Nodes);
150
151 void build();
152 bool color();
153};
154} // namespace
155
156std::pair<bool, ColorKind> Coloring::getUniqueColor(const NodeSet &Nodes) {
157 auto Color = ColorKind::None;
158 for (Node N : Nodes) {
159 ColorKind ColorN = getColor(N);
160 if (ColorN == ColorKind::None)
161 continue;
162 if (Color == ColorKind::None)
163 Color = ColorN;
164 else if (Color != ColorKind::None && Color != ColorN)
165 return { false, ColorKind::None };
166 }
167 return { true, Color };
168}
169
170void Coloring::build() {
171 // Add Order[P] and Order[conj(P)] to Edges.
172 for (unsigned P = 0; P != Order.size(); ++P) {
173 Node I = Order[P];
174 if (I != Ignore) {
175 Needed.insert(I);
176 Node PC = Order[conj(P)];
177 if (PC != Ignore && PC != I)
178 Edges[I].insert(PC);
179 }
180 }
181 // Add I and conj(I) to Edges.
182 for (unsigned I = 0; I != Order.size(); ++I) {
183 if (!Needed.count(I))
184 continue;
185 Node C = conj(I);
186 // This will create an entry in the edge table, even if I is not
187 // connected to any other node. This is necessary, because it still
188 // needs to be colored.
189 NodeSet &Is = Edges[I];
190 if (Needed.count(C))
191 Is.insert(C);
192 }
193}
194
195bool Coloring::color() {
196 SetVector<Node> FirstQ;
197 auto Enqueue = [this,&FirstQ] (Node N) {
199 Q.insert(N);
200 for (unsigned I = 0; I != Q.size(); ++I) {
201 NodeSet &Ns = Edges[Q[I]];
202 Q.insert(Ns.begin(), Ns.end());
203 }
204 FirstQ.insert(Q.begin(), Q.end());
205 };
206 for (Node N : Needed)
207 Enqueue(N);
208
209 for (Node N : FirstQ) {
210 if (Colors.count(N))
211 continue;
212 NodeSet &Ns = Edges[N];
213 auto P = getUniqueColor(Ns);
214 if (!P.first)
215 return false;
216 Colors[N] = other(P.second);
217 }
218
219 // First, color nodes that don't have any dups.
220 for (auto E : Edges) {
221 Node N = E.first;
222 if (!Needed.count(conj(N)) || Colors.count(N))
223 continue;
224 auto P = getUniqueColor(E.second);
225 if (!P.first)
226 return false;
227 Colors[N] = other(P.second);
228 }
229
230 // Now, nodes that are still uncolored. Since the graph can be modified
231 // in this step, create a work queue.
232 std::vector<Node> WorkQ;
233 for (auto E : Edges) {
234 Node N = E.first;
235 if (!Colors.count(N))
236 WorkQ.push_back(N);
237 }
238
239 for (Node N : WorkQ) {
240 NodeSet &Ns = Edges[N];
241 auto P = getUniqueColor(Ns);
242 if (P.first) {
243 Colors[N] = other(P.second);
244 continue;
245 }
246
247 // Coloring failed. Split this node.
248 Node C = conj(N);
249 ColorKind ColorN = other(ColorKind::None);
250 ColorKind ColorC = other(ColorN);
251 NodeSet &Cs = Edges[C];
252 NodeSet CopyNs = Ns;
253 for (Node M : CopyNs) {
254 ColorKind ColorM = getColor(M);
255 if (ColorM == ColorC) {
256 // Connect M with C, disconnect M from N.
257 Cs.insert(M);
258 Edges[M].insert(C);
259 Ns.erase(M);
260 Edges[M].erase(N);
261 }
262 }
263 Colors[N] = ColorN;
264 Colors[C] = ColorC;
265 }
266
267 // Explicitly assign "None" to all uncolored nodes.
268 for (unsigned I = 0; I != Order.size(); ++I)
269 if (Colors.count(I) == 0)
270 Colors[I] = ColorKind::None;
271
272 return true;
273}
274
275#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
276void Coloring::dump() const {
277 dbgs() << "{ Order: {";
278 for (Node P : Order) {
279 if (P != Ignore)
280 dbgs() << ' ' << P;
281 else
282 dbgs() << " -";
283 }
284 dbgs() << " }\n";
285 dbgs() << " Needed: {";
286 for (Node N : Needed)
287 dbgs() << ' ' << N;
288 dbgs() << " }\n";
289
290 dbgs() << " Edges: {\n";
291 for (auto E : Edges) {
292 dbgs() << " " << E.first << " -> {";
293 for (auto N : E.second)
294 dbgs() << ' ' << N;
295 dbgs() << " }\n";
296 }
297 dbgs() << " }\n";
298
299 auto ColorKindToName = [](ColorKind C) {
300 switch (C) {
301 case ColorKind::None:
302 return "None";
303 case ColorKind::Red:
304 return "Red";
305 case ColorKind::Black:
306 return "Black";
307 }
308 llvm_unreachable("all ColorKinds should be handled by the switch above");
309 };
310
311 dbgs() << " Colors: {\n";
312 for (auto C : Colors)
313 dbgs() << " " << C.first << " -> " << ColorKindToName(C.second) << "\n";
314 dbgs() << " }\n}\n";
315}
316#endif
317
318namespace {
319// Base class of for reordering networks. They don't strictly need to be
320// permutations, as outputs with repeated occurrences of an input element
321// are allowed.
322struct PermNetwork {
323 using Controls = std::vector<uint8_t>;
324 using ElemType = int;
325 static constexpr ElemType Ignore = ElemType(-1);
326
327 enum : uint8_t {
328 None,
329 Pass,
330 Switch
331 };
332 enum : uint8_t {
333 Forward,
334 Reverse
335 };
336
337 PermNetwork(ArrayRef<ElemType> Ord, unsigned Mult = 1) {
338 Order.assign(Ord.data(), Ord.data()+Ord.size());
339 Log = 0;
340
341 unsigned S = Order.size();
342 while (S >>= 1)
343 ++Log;
344
345 Table.resize(Order.size());
346 for (RowType &Row : Table)
347 Row.resize(Mult*Log, None);
348 }
349
350 void getControls(Controls &V, unsigned StartAt, uint8_t Dir) const {
351 unsigned Size = Order.size();
352 V.resize(Size);
353 for (unsigned I = 0; I != Size; ++I) {
354 unsigned W = 0;
355 for (unsigned L = 0; L != Log; ++L) {
356 unsigned C = ctl(I, StartAt+L) == Switch;
357 if (Dir == Forward)
358 W |= C << (Log-1-L);
359 else
360 W |= C << L;
361 }
362 assert(isUInt<8>(W));
363 V[I] = uint8_t(W);
364 }
365 }
366
367 uint8_t ctl(ElemType Pos, unsigned Step) const {
368 return Table[Pos][Step];
369 }
370 unsigned size() const {
371 return Order.size();
372 }
373 unsigned steps() const {
374 return Log;
375 }
376
377protected:
378 unsigned Log;
379 std::vector<ElemType> Order;
380 using RowType = std::vector<uint8_t>;
381 std::vector<RowType> Table;
382};
383
384struct ForwardDeltaNetwork : public PermNetwork {
385 ForwardDeltaNetwork(ArrayRef<ElemType> Ord) : PermNetwork(Ord) {}
386
387 bool run(Controls &V) {
388 if (!route(Order.data(), Table.data(), size(), 0))
389 return false;
390 getControls(V, 0, Forward);
391 return true;
392 }
393
394private:
395 bool route(ElemType *P, RowType *T, unsigned Size, unsigned Step);
396};
397
398struct ReverseDeltaNetwork : public PermNetwork {
399 ReverseDeltaNetwork(ArrayRef<ElemType> Ord) : PermNetwork(Ord) {}
400
401 bool run(Controls &V) {
402 if (!route(Order.data(), Table.data(), size(), 0))
403 return false;
404 getControls(V, 0, Reverse);
405 return true;
406 }
407
408private:
409 bool route(ElemType *P, RowType *T, unsigned Size, unsigned Step);
410};
411
412struct BenesNetwork : public PermNetwork {
413 BenesNetwork(ArrayRef<ElemType> Ord) : PermNetwork(Ord, 2) {}
414
415 bool run(Controls &F, Controls &R) {
416 if (!route(Order.data(), Table.data(), size(), 0))
417 return false;
418
419 getControls(F, 0, Forward);
420 getControls(R, Log, Reverse);
421 return true;
422 }
423
424private:
425 bool route(ElemType *P, RowType *T, unsigned Size, unsigned Step);
426};
427} // namespace
428
429bool ForwardDeltaNetwork::route(ElemType *P, RowType *T, unsigned Size,
430 unsigned Step) {
431 bool UseUp = false, UseDown = false;
432 ElemType Num = Size;
433
434 // Cannot use coloring here, because coloring is used to determine
435 // the "big" switch, i.e. the one that changes halves, and in a forward
436 // network, a color can be simultaneously routed to both halves in the
437 // step we're working on.
438 for (ElemType J = 0; J != Num; ++J) {
439 ElemType I = P[J];
440 // I is the position in the input,
441 // J is the position in the output.
442 if (I == Ignore)
443 continue;
444 uint8_t S;
445 if (I < Num/2)
446 S = (J < Num/2) ? Pass : Switch;
447 else
448 S = (J < Num/2) ? Switch : Pass;
449
450 // U is the element in the table that needs to be updated.
451 ElemType U = (S == Pass) ? I : (I < Num/2 ? I+Num/2 : I-Num/2);
452 if (U < Num/2)
453 UseUp = true;
454 else
455 UseDown = true;
456 if (T[U][Step] != S && T[U][Step] != None)
457 return false;
458 T[U][Step] = S;
459 }
460
461 for (ElemType J = 0; J != Num; ++J)
462 if (P[J] != Ignore && P[J] >= Num/2)
463 P[J] -= Num/2;
464
465 if (Step+1 < Log) {
466 if (UseUp && !route(P, T, Size/2, Step+1))
467 return false;
468 if (UseDown && !route(P+Size/2, T+Size/2, Size/2, Step+1))
469 return false;
470 }
471 return true;
472}
473
474bool ReverseDeltaNetwork::route(ElemType *P, RowType *T, unsigned Size,
475 unsigned Step) {
476 unsigned Pets = Log-1 - Step;
477 bool UseUp = false, UseDown = false;
478 ElemType Num = Size;
479
480 // In this step half-switching occurs, so coloring can be used.
481 Coloring G({P,Size});
482 const Coloring::MapType &M = G.colors();
483 if (M.empty())
484 return false;
485
486 ColorKind ColorUp = ColorKind::None;
487 for (ElemType J = 0; J != Num; ++J) {
488 ElemType I = P[J];
489 // I is the position in the input,
490 // J is the position in the output.
491 if (I == Ignore)
492 continue;
493 ColorKind C = M.at(I);
494 if (C == ColorKind::None)
495 continue;
496 // During "Step", inputs cannot switch halves, so if the "up" color
497 // is still unknown, make sure that it is selected in such a way that
498 // "I" will stay in the same half.
499 bool InpUp = I < Num/2;
500 if (ColorUp == ColorKind::None)
501 ColorUp = InpUp ? C : G.other(C);
502 if ((C == ColorUp) != InpUp) {
503 // If I should go to a different half than where is it now, give up.
504 return false;
505 }
506
507 uint8_t S;
508 if (InpUp) {
509 S = (J < Num/2) ? Pass : Switch;
510 UseUp = true;
511 } else {
512 S = (J < Num/2) ? Switch : Pass;
513 UseDown = true;
514 }
515 T[J][Pets] = S;
516 }
517
518 // Reorder the working permutation according to the computed switch table
519 // for the last step (i.e. Pets).
520 for (ElemType J = 0, E = Size / 2; J != E; ++J) {
521 ElemType PJ = P[J]; // Current values of P[J]
522 ElemType PC = P[J+Size/2]; // and P[conj(J)]
523 ElemType QJ = PJ; // New values of P[J]
524 ElemType QC = PC; // and P[conj(J)]
525 if (T[J][Pets] == Switch)
526 QC = PJ;
527 if (T[J+Size/2][Pets] == Switch)
528 QJ = PC;
529 P[J] = QJ;
530 P[J+Size/2] = QC;
531 }
532
533 for (ElemType J = 0; J != Num; ++J)
534 if (P[J] != Ignore && P[J] >= Num/2)
535 P[J] -= Num/2;
536
537 if (Step+1 < Log) {
538 if (UseUp && !route(P, T, Size/2, Step+1))
539 return false;
540 if (UseDown && !route(P+Size/2, T+Size/2, Size/2, Step+1))
541 return false;
542 }
543 return true;
544}
545
546bool BenesNetwork::route(ElemType *P, RowType *T, unsigned Size,
547 unsigned Step) {
548 Coloring G({P,Size});
549 const Coloring::MapType &M = G.colors();
550 if (M.empty())
551 return false;
552 ElemType Num = Size;
553
554 unsigned Pets = 2*Log-1 - Step;
555 bool UseUp = false, UseDown = false;
556
557 // Both assignments, i.e. Red->Up and Red->Down are valid, but they will
558 // result in different controls. Let's pick the one where the first
559 // control will be "Pass".
560 ColorKind ColorUp = ColorKind::None;
561 for (ElemType J = 0; J != Num; ++J) {
562 ElemType I = P[J];
563 if (I == Ignore)
564 continue;
565 ColorKind C = M.at(I);
566 if (C == ColorKind::None)
567 continue;
568 if (ColorUp == ColorKind::None) {
569 ColorUp = (I < Num / 2) ? ColorKind::Red : ColorKind::Black;
570 }
571 unsigned CI = (I < Num/2) ? I+Num/2 : I-Num/2;
572 if (C == ColorUp) {
573 if (I < Num/2)
574 T[I][Step] = Pass;
575 else
576 T[CI][Step] = Switch;
577 T[J][Pets] = (J < Num/2) ? Pass : Switch;
578 UseUp = true;
579 } else { // Down
580 if (I < Num/2)
581 T[CI][Step] = Switch;
582 else
583 T[I][Step] = Pass;
584 T[J][Pets] = (J < Num/2) ? Switch : Pass;
585 UseDown = true;
586 }
587 }
588
589 // Reorder the working permutation according to the computed switch table
590 // for the last step (i.e. Pets).
591 for (ElemType J = 0; J != Num/2; ++J) {
592 ElemType PJ = P[J]; // Current values of P[J]
593 ElemType PC = P[J+Num/2]; // and P[conj(J)]
594 ElemType QJ = PJ; // New values of P[J]
595 ElemType QC = PC; // and P[conj(J)]
596 if (T[J][Pets] == Switch)
597 QC = PJ;
598 if (T[J+Num/2][Pets] == Switch)
599 QJ = PC;
600 P[J] = QJ;
601 P[J+Num/2] = QC;
602 }
603
604 for (ElemType J = 0; J != Num; ++J)
605 if (P[J] != Ignore && P[J] >= Num/2)
606 P[J] -= Num/2;
607
608 if (Step+1 < Log) {
609 if (UseUp && !route(P, T, Size/2, Step+1))
610 return false;
611 if (UseDown && !route(P+Size/2, T+Size/2, Size/2, Step+1))
612 return false;
613 }
614 return true;
615}
616
617// --------------------------------------------------------------------
618// Support for building selection results (output instructions that are
619// parts of the final selection).
620
621namespace {
622struct OpRef {
623 OpRef(SDValue V) : OpV(V) {}
624 bool isValue() const { return OpV.getNode() != nullptr; }
625 bool isValid() const { return isValue() || !(OpN & Invalid); }
626 bool isUndef() const { return OpN & Undef; }
627 static OpRef res(int N) { return OpRef(Whole | (N & Index)); }
628 static OpRef fail() { return OpRef(Invalid); }
629
630 static OpRef lo(const OpRef &R) {
631 assert(!R.isValue());
632 return OpRef(R.OpN & (Undef | Index | LoHalf));
633 }
634 static OpRef hi(const OpRef &R) {
635 assert(!R.isValue());
636 return OpRef(R.OpN & (Undef | Index | HiHalf));
637 }
638 static OpRef undef(MVT Ty) { return OpRef(Undef | Ty.SimpleTy); }
639
640 // Direct value.
641 SDValue OpV = SDValue();
642
643 // Reference to the operand of the input node:
644 // If the 31st bit is 1, it's undef, otherwise, bits 28..0 are the
645 // operand index:
646 // If bit 30 is set, it's the high half of the operand.
647 // If bit 29 is set, it's the low half of the operand.
648 unsigned OpN = 0;
649
650 enum : unsigned {
651 Invalid = 0x10000000,
652 LoHalf = 0x20000000,
653 HiHalf = 0x40000000,
654 Whole = LoHalf | HiHalf,
655 Undef = 0x80000000,
656 Index = 0x0FFFFFFF, // Mask of the index value.
657 IndexBits = 28,
658 };
659
661 void print(raw_ostream &OS, const SelectionDAG &G) const;
662
663private:
664 OpRef(unsigned N) : OpN(N) {}
665};
666
667struct NodeTemplate {
668 NodeTemplate() = default;
669 unsigned Opc = 0;
670 MVT Ty = MVT::Other;
671 std::vector<OpRef> Ops;
672
673 LLVM_DUMP_METHOD void print(raw_ostream &OS, const SelectionDAG &G) const;
674};
675
676struct ResultStack {
677 ResultStack(SDNode *Inp)
678 : InpNode(Inp), InpTy(Inp->getValueType(0).getSimpleVT()) {}
679 SDNode *InpNode;
680 MVT InpTy;
681 unsigned push(const NodeTemplate &Res) {
682 List.push_back(Res);
683 return List.size()-1;
684 }
685 unsigned push(unsigned Opc, MVT Ty, std::vector<OpRef> &&Ops) {
686 NodeTemplate Res;
687 Res.Opc = Opc;
688 Res.Ty = Ty;
689 Res.Ops = Ops;
690 return push(Res);
691 }
692 bool empty() const { return List.empty(); }
693 unsigned size() const { return List.size(); }
694 unsigned top() const { return size()-1; }
695 const NodeTemplate &operator[](unsigned I) const { return List[I]; }
696 unsigned reset(unsigned NewTop) {
697 List.resize(NewTop+1);
698 return NewTop;
699 }
700
701 using BaseType = std::vector<NodeTemplate>;
702 BaseType::iterator begin() { return List.begin(); }
703 BaseType::iterator end() { return List.end(); }
704 BaseType::const_iterator begin() const { return List.begin(); }
705 BaseType::const_iterator end() const { return List.end(); }
706
708
710 void print(raw_ostream &OS, const SelectionDAG &G) const;
711};
712} // namespace
713
714#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
715void OpRef::print(raw_ostream &OS, const SelectionDAG &G) const {
716 if (isValue()) {
717 OpV.getNode()->print(OS, &G);
718 return;
719 }
720 if (OpN & Invalid) {
721 OS << "invalid";
722 return;
723 }
724 if (OpN & Undef) {
725 OS << "undef";
726 return;
727 }
728 if ((OpN & Whole) != Whole) {
729 assert((OpN & Whole) == LoHalf || (OpN & Whole) == HiHalf);
730 if (OpN & LoHalf)
731 OS << "lo ";
732 else
733 OS << "hi ";
734 }
735 OS << '#' << SignExtend32(OpN & Index, IndexBits);
736}
737
738void NodeTemplate::print(raw_ostream &OS, const SelectionDAG &G) const {
739 const TargetInstrInfo &TII = *G.getSubtarget().getInstrInfo();
740 OS << format("%8s", EVT(Ty).getEVTString().c_str()) << " "
741 << TII.getName(Opc);
742 bool Comma = false;
743 for (const auto &R : Ops) {
744 if (Comma)
745 OS << ',';
746 Comma = true;
747 OS << ' ';
748 R.print(OS, G);
749 }
750}
751
752void ResultStack::print(raw_ostream &OS, const SelectionDAG &G) const {
753 OS << "Input node:\n";
754#ifndef NDEBUG
755 InpNode->dumpr(&G);
756#endif
757 OS << "Result templates:\n";
758 for (unsigned I = 0, E = List.size(); I != E; ++I) {
759 OS << '[' << I << "] ";
760 List[I].print(OS, G);
761 OS << '\n';
762 }
763}
764#endif
765
766namespace {
767struct ShuffleMask {
768 ShuffleMask(ArrayRef<int> M) : Mask(M) {
769 for (int M : Mask) {
770 if (M == -1)
771 continue;
772 MinSrc = (MinSrc == -1) ? M : std::min(MinSrc, M);
773 MaxSrc = (MaxSrc == -1) ? M : std::max(MaxSrc, M);
774 }
775 }
776
778 int MinSrc = -1, MaxSrc = -1;
779
780 ShuffleMask lo() const {
781 size_t H = Mask.size()/2;
782 return ShuffleMask(Mask.take_front(H));
783 }
784 ShuffleMask hi() const {
785 size_t H = Mask.size()/2;
786 return ShuffleMask(Mask.take_back(H));
787 }
788
789 void print(raw_ostream &OS) const {
790 OS << "MinSrc:" << MinSrc << ", MaxSrc:" << MaxSrc << " {";
791 for (int M : Mask)
792 OS << ' ' << M;
793 OS << " }";
794 }
795};
796
798raw_ostream &operator<<(raw_ostream &OS, const ShuffleMask &SM) {
799 SM.print(OS);
800 return OS;
801}
802} // namespace
803
804namespace shuffles {
806// Vdd = vshuffvdd(Vu, Vv, Rt)
807// Vdd = vdealvdd(Vu, Vv, Rt)
808// Vd = vpack(Vu, Vv, Size, TakeOdd)
809// Vd = vshuff(Vu, Vv, Size, TakeOdd)
810// Vd = vdeal(Vu, Vv, Size, TakeOdd)
811// Vd = vdealb4w(Vu, Vv)
812
813ArrayRef<int> lo(ArrayRef<int> Vuu) { return Vuu.take_front(Vuu.size() / 2); }
814ArrayRef<int> hi(ArrayRef<int> Vuu) { return Vuu.take_back(Vuu.size() / 2); }
815
817 int Len = Vu.size();
818 MaskT Vdd(2 * Len);
819 std::copy(Vv.begin(), Vv.end(), Vdd.begin());
820 std::copy(Vu.begin(), Vu.end(), Vdd.begin() + Len);
821
822 auto Vd0 = MutableArrayRef<int>(Vdd).take_front(Len);
823 auto Vd1 = MutableArrayRef<int>(Vdd).take_back(Len);
824
825 for (int Offset = 1; Offset < Len; Offset *= 2) {
826 if ((Rt & Offset) == 0)
827 continue;
828 for (int i = 0; i != Len; ++i) {
829 if ((i & Offset) == 0)
830 std::swap(Vd1[i], Vd0[i + Offset]);
831 }
832 }
833 return Vdd;
834}
835
837 int Len = Vu.size();
838 MaskT Vdd(2 * Len);
839 std::copy(Vv.begin(), Vv.end(), Vdd.begin());
840 std::copy(Vu.begin(), Vu.end(), Vdd.begin() + Len);
841
842 auto Vd0 = MutableArrayRef<int>(Vdd).take_front(Len);
843 auto Vd1 = MutableArrayRef<int>(Vdd).take_back(Len);
844
845 for (int Offset = Len / 2; Offset > 0; Offset /= 2) {
846 if ((Rt & Offset) == 0)
847 continue;
848 for (int i = 0; i != Len; ++i) {
849 if ((i & Offset) == 0)
850 std::swap(Vd1[i], Vd0[i + Offset]);
851 }
852 }
853 return Vdd;
854}
855
856MaskT vpack(ArrayRef<int> Vu, ArrayRef<int> Vv, unsigned Size, bool TakeOdd) {
857 int Len = Vu.size();
858 MaskT Vd(Len);
859 auto Odd = static_cast<int>(TakeOdd);
860 for (int i = 0, e = Len / (2 * Size); i != e; ++i) {
861 for (int b = 0; b != static_cast<int>(Size); ++b) {
862 // clang-format off
863 Vd[i * Size + b] = Vv[(2 * i + Odd) * Size + b];
864 Vd[i * Size + b + Len / 2] = Vu[(2 * i + Odd) * Size + b];
865 // clang-format on
866 }
867 }
868 return Vd;
869}
870
871MaskT vshuff(ArrayRef<int> Vu, ArrayRef<int> Vv, unsigned Size, bool TakeOdd) {
872 int Len = Vu.size();
873 MaskT Vd(Len);
874 auto Odd = static_cast<int>(TakeOdd);
875 for (int i = 0, e = Len / (2 * Size); i != e; ++i) {
876 for (int b = 0; b != static_cast<int>(Size); ++b) {
877 Vd[(2 * i + 0) * Size + b] = Vv[(2 * i + Odd) * Size + b];
878 Vd[(2 * i + 1) * Size + b] = Vu[(2 * i + Odd) * Size + b];
879 }
880 }
881 return Vd;
882}
883
884MaskT vdeal(ArrayRef<int> Vu, ArrayRef<int> Vv, unsigned Size, bool TakeOdd) {
885 int Len = Vu.size();
886 MaskT T = vdealvdd(Vu, Vv, Len - 2 * Size);
887 return vpack(hi(T), lo(T), Size, TakeOdd);
888}
889
891 int Len = Vu.size();
892 MaskT Vd(Len);
893 for (int i = 0, e = Len / 4; i != e; ++i) {
894 Vd[0 * (Len / 4) + i] = Vv[4 * i + 0];
895 Vd[1 * (Len / 4) + i] = Vv[4 * i + 2];
896 Vd[2 * (Len / 4) + i] = Vu[4 * i + 0];
897 Vd[3 * (Len / 4) + i] = Vu[4 * i + 2];
898 }
899 return Vd;
900}
901
902template <typename ShuffFunc, typename... OptArgs>
903auto mask(ShuffFunc S, unsigned Length, OptArgs... args) -> MaskT {
904 MaskT Vu(Length), Vv(Length);
905 std::iota(Vu.begin(), Vu.end(), Length); // High
906 std::iota(Vv.begin(), Vv.end(), 0); // Low
907 return S(Vu, Vv, args...);
908}
909
910} // namespace shuffles
911
912// --------------------------------------------------------------------
913// The HvxSelector class.
914
916 return static_cast<const HexagonTargetLowering&>(G.getTargetLoweringInfo());
917}
919 return G.getSubtarget<HexagonSubtarget>();
920}
921
922namespace llvm {
923 struct HvxSelector {
928 const unsigned HwLen;
929
931 : Lower(getHexagonLowering(G)), ISel(HS), DAG(G),
932 HST(getHexagonSubtarget(G)), HwLen(HST.getVectorLength()) {}
933
934 MVT getSingleVT(MVT ElemTy) const {
935 assert(ElemTy != MVT::i1 && "Use getBoolVT for predicates");
936 unsigned NumElems = HwLen / (ElemTy.getSizeInBits() / 8);
937 return MVT::getVectorVT(ElemTy, NumElems);
938 }
939
940 MVT getPairVT(MVT ElemTy) const {
941 assert(ElemTy != MVT::i1); // Suspicious: there are no predicate pairs.
942 unsigned NumElems = (2 * HwLen) / (ElemTy.getSizeInBits() / 8);
943 return MVT::getVectorVT(ElemTy, NumElems);
944 }
945
946 MVT getBoolVT() const {
947 // Return HwLen x i1.
948 return MVT::getVectorVT(MVT::i1, HwLen);
949 }
950
952 void selectShuffle(SDNode *N);
953 void selectRor(SDNode *N);
954 void selectVAlign(SDNode *N);
955
957 unsigned Width);
959 ArrayRef<uint32_t> Completions, unsigned Width);
960 static std::optional<int> rotationDistance(ShuffleMask SM, unsigned WrapAt);
961
962 private:
963 void select(SDNode *ISelN);
964 void materialize(const ResultStack &Results);
965
966 SDValue getConst32(int Val, const SDLoc &dl);
967 SDValue getVectorConstant(ArrayRef<uint8_t> Data, const SDLoc &dl);
968
969 enum : unsigned {
970 None,
971 PackMux,
972 };
973 OpRef concats(OpRef Va, OpRef Vb, ResultStack &Results);
974 OpRef funnels(OpRef Va, OpRef Vb, int Amount, ResultStack &Results);
975
976 OpRef packs(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results,
977 MutableArrayRef<int> NewMask, unsigned Options = None);
978 OpRef packp(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results,
979 MutableArrayRef<int> NewMask);
980 OpRef vmuxs(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,
981 ResultStack &Results);
982 OpRef vmuxp(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,
983 ResultStack &Results);
984
985 OpRef shuffs1(ShuffleMask SM, OpRef Va, ResultStack &Results);
986 OpRef shuffs2(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results);
987 OpRef shuffp1(ShuffleMask SM, OpRef Va, ResultStack &Results);
988 OpRef shuffp2(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results);
989
990 OpRef butterfly(ShuffleMask SM, OpRef Va, ResultStack &Results);
991 OpRef contracting(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results);
992 OpRef expanding(ShuffleMask SM, OpRef Va, ResultStack &Results);
993 OpRef perfect(ShuffleMask SM, OpRef Va, ResultStack &Results);
994
995 bool selectVectorConstants(SDNode *N);
996 bool scalarizeShuffle(ArrayRef<int> Mask, const SDLoc &dl, MVT ResTy,
997 SDValue Va, SDValue Vb, SDNode *N);
998 };
999} // namespace llvm
1000
1002 MutableArrayRef<int> MaskR) {
1003 unsigned VecLen = Mask.size();
1004 assert(MaskL.size() == VecLen && MaskR.size() == VecLen);
1005 for (unsigned I = 0; I != VecLen; ++I) {
1006 int M = Mask[I];
1007 if (M < 0) {
1008 MaskL[I] = MaskR[I] = -1;
1009 } else if (unsigned(M) < VecLen) {
1010 MaskL[I] = M;
1011 MaskR[I] = -1;
1012 } else {
1013 MaskL[I] = -1;
1014 MaskR[I] = M-VecLen;
1015 }
1016 }
1017}
1018
1019static std::pair<int,unsigned> findStrip(ArrayRef<int> A, int Inc,
1020 unsigned MaxLen) {
1021 assert(A.size() > 0 && A.size() >= MaxLen);
1022 int F = A[0];
1023 int E = F;
1024 for (unsigned I = 1; I != MaxLen; ++I) {
1025 if (A[I] - E != Inc)
1026 return { F, I };
1027 E = A[I];
1028 }
1029 return { F, MaxLen };
1030}
1031
1032static bool isUndef(ArrayRef<int> Mask) {
1033 for (int Idx : Mask)
1034 if (Idx != -1)
1035 return false;
1036 return true;
1037}
1038
1039static bool isIdentity(ArrayRef<int> Mask) {
1040 for (int I = 0, E = Mask.size(); I != E; ++I) {
1041 int M = Mask[I];
1042 if (M >= 0 && M != I)
1043 return false;
1044 }
1045 return true;
1046}
1047
1048static bool isLowHalfOnly(ArrayRef<int> Mask) {
1049 int L = Mask.size();
1050 assert(L % 2 == 0);
1051 // Check if the second half of the mask is all-undef.
1052 return llvm::all_of(Mask.drop_front(L / 2), [](int M) { return M < 0; });
1053}
1054
1056 unsigned SegLen) {
1057 assert(isPowerOf2_32(SegLen));
1059 if (SM.MaxSrc == -1)
1060 return SegList;
1061
1062 unsigned Shift = Log2_32(SegLen);
1063 BitVector Segs(alignTo(SM.MaxSrc + 1, SegLen) >> Shift);
1064
1065 for (int M : SM.Mask) {
1066 if (M >= 0)
1067 Segs.set(M >> Shift);
1068 }
1069
1070 for (unsigned B : Segs.set_bits())
1071 SegList.push_back(B);
1072 return SegList;
1073}
1074
1076 unsigned SegLen) {
1077 // Calculate the layout of the output segments in terms of the input
1078 // segments.
1079 // For example [1,3,1,0] means that the output consists of 4 output
1080 // segments, where the first output segment has only elements of the
1081 // input segment at index 1. The next output segment only has elements
1082 // of the input segment 3, etc.
1083 // If an output segment only has undef elements, the value will be ~0u.
1084 // If an output segment has elements from more than one input segment,
1085 // the corresponding value will be ~1u.
1086 unsigned MaskLen = SM.Mask.size();
1087 assert(MaskLen % SegLen == 0);
1088 SmallVector<unsigned, 4> Map(MaskLen / SegLen);
1089
1090 for (int S = 0, E = Map.size(); S != E; ++S) {
1091 unsigned Idx = ~0u;
1092 for (int I = 0; I != static_cast<int>(SegLen); ++I) {
1093 int M = SM.Mask[S*SegLen + I];
1094 if (M < 0)
1095 continue;
1096 unsigned G = M / SegLen; // Input segment of this element.
1097 if (Idx == ~0u) {
1098 Idx = G;
1099 } else if (Idx != G) {
1100 Idx = ~1u;
1101 break;
1102 }
1103 }
1104 Map[S] = Idx;
1105 }
1106
1107 return Map;
1108}
1109
1111 unsigned SegLen, MutableArrayRef<int> PackedMask) {
1113 for (int I = OutSegMap.size() - 1; I >= 0; --I) {
1114 unsigned S = OutSegMap[I];
1115 assert(S != ~0u && "Unexpected undef");
1116 assert(S != ~1u && "Unexpected multi");
1117 if (InvMap.size() <= S)
1118 InvMap.resize(S+1);
1119 InvMap[S] = I;
1120 }
1121
1122 unsigned Shift = Log2_32(SegLen);
1123 for (int I = 0, E = Mask.size(); I != E; ++I) {
1124 int M = Mask[I];
1125 if (M >= 0) {
1126 int OutIdx = InvMap[M >> Shift];
1127 M = (M & (SegLen-1)) + SegLen*OutIdx;
1128 }
1129 PackedMask[I] = M;
1130 }
1131}
1132
1133bool HvxSelector::selectVectorConstants(SDNode *N) {
1134 // Constant vectors are generated as loads from constant pools or as
1135 // splats of a constant value. Since they are generated during the
1136 // selection process, the main selection algorithm is not aware of them.
1137 // Select them directly here.
1139 SetVector<SDNode*> WorkQ;
1140
1141 // The DAG can change (due to CSE) during selection, so cache all the
1142 // unselected nodes first to avoid traversing a mutating DAG.
1143 WorkQ.insert(N);
1144 for (unsigned i = 0; i != WorkQ.size(); ++i) {
1145 SDNode *W = WorkQ[i];
1146 if (!W->isMachineOpcode() && W->getOpcode() == HexagonISD::ISEL)
1147 Nodes.push_back(W);
1148 for (unsigned j = 0, f = W->getNumOperands(); j != f; ++j)
1149 WorkQ.insert(W->getOperand(j).getNode());
1150 }
1151
1152 for (SDNode *L : Nodes)
1153 select(L);
1154
1155 return !Nodes.empty();
1156}
1157
1158void HvxSelector::materialize(const ResultStack &Results) {
1159 DEBUG_WITH_TYPE("isel", {
1160 dbgs() << "Materializing\n";
1161 Results.print(dbgs(), DAG);
1162 });
1163 if (Results.empty())
1164 return;
1165 const SDLoc &dl(Results.InpNode);
1166 std::vector<SDValue> Output;
1167
1168 for (unsigned I = 0, E = Results.size(); I != E; ++I) {
1169 const NodeTemplate &Node = Results[I];
1170 std::vector<SDValue> Ops;
1171 for (const OpRef &R : Node.Ops) {
1172 assert(R.isValid());
1173 if (R.isValue()) {
1174 Ops.push_back(R.OpV);
1175 continue;
1176 }
1177 if (R.OpN & OpRef::Undef) {
1178 MVT::SimpleValueType SVT = MVT::SimpleValueType(R.OpN & OpRef::Index);
1179 Ops.push_back(ISel.selectUndef(dl, MVT(SVT)));
1180 continue;
1181 }
1182 // R is an index of a result.
1183 unsigned Part = R.OpN & OpRef::Whole;
1184 int Idx = SignExtend32(R.OpN & OpRef::Index, OpRef::IndexBits);
1185 if (Idx < 0)
1186 Idx += I;
1187 assert(Idx >= 0 && unsigned(Idx) < Output.size());
1188 SDValue Op = Output[Idx];
1189 MVT OpTy = Op.getValueType().getSimpleVT();
1190 if (Part != OpRef::Whole) {
1191 assert(Part == OpRef::LoHalf || Part == OpRef::HiHalf);
1193 OpTy.getVectorNumElements()/2);
1194 unsigned Sub = (Part == OpRef::LoHalf) ? Hexagon::vsub_lo
1195 : Hexagon::vsub_hi;
1196 Op = DAG.getTargetExtractSubreg(Sub, dl, HalfTy, Op);
1197 }
1198 Ops.push_back(Op);
1199 } // for (Node : Results)
1200
1201 assert(Node.Ty != MVT::Other);
1202 SDNode *ResN = (Node.Opc == TargetOpcode::COPY)
1203 ? Ops.front().getNode()
1204 : DAG.getMachineNode(Node.Opc, dl, Node.Ty, Ops);
1205 Output.push_back(SDValue(ResN, 0));
1206 }
1207
1208 SDNode *OutN = Output.back().getNode();
1209 SDNode *InpN = Results.InpNode;
1210 DEBUG_WITH_TYPE("isel", {
1211 dbgs() << "Generated node:\n";
1212 OutN->dumpr(&DAG);
1213 });
1214
1215 ISel.ReplaceNode(InpN, OutN);
1216 selectVectorConstants(OutN);
1218}
1219
1220OpRef HvxSelector::concats(OpRef Lo, OpRef Hi, ResultStack &Results) {
1221 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1222 const SDLoc &dl(Results.InpNode);
1223 Results.push(TargetOpcode::REG_SEQUENCE, getPairVT(MVT::i8), {
1224 getConst32(Hexagon::HvxWRRegClassID, dl),
1225 Lo, getConst32(Hexagon::vsub_lo, dl),
1226 Hi, getConst32(Hexagon::vsub_hi, dl),
1227 });
1228 return OpRef::res(Results.top());
1229}
1230
1231OpRef HvxSelector::funnels(OpRef Va, OpRef Vb, int Amount,
1232 ResultStack &Results) {
1233 // Do a funnel shift towards the low end (shift right) by Amount bytes.
1234 // If Amount < 0, treat it as shift left, i.e. do a shift right by
1235 // Amount + HwLen.
1236 auto VecLen = static_cast<int>(HwLen);
1237
1238 if (Amount == 0)
1239 return Va;
1240 if (Amount == VecLen)
1241 return Vb;
1242
1243 MVT Ty = getSingleVT(MVT::i8);
1244 const SDLoc &dl(Results.InpNode);
1245
1246 if (Amount < 0)
1247 Amount += VecLen;
1248 if (Amount > VecLen) {
1249 Amount -= VecLen;
1250 std::swap(Va, Vb);
1251 }
1252
1253 if (isUInt<3>(Amount)) {
1254 SDValue A = getConst32(Amount, dl);
1255 Results.push(Hexagon::V6_valignbi, Ty, {Vb, Va, A});
1256 } else if (isUInt<3>(VecLen - Amount)) {
1257 SDValue A = getConst32(VecLen - Amount, dl);
1258 Results.push(Hexagon::V6_vlalignbi, Ty, {Vb, Va, A});
1259 } else {
1260 SDValue A = getConst32(Amount, dl);
1261 Results.push(Hexagon::A2_tfrsi, Ty, {A});
1262 Results.push(Hexagon::V6_valignb, Ty, {Vb, Va, OpRef::res(-1)});
1263 }
1264 return OpRef::res(Results.top());
1265}
1266
1267// Va, Vb are single vectors. If SM only uses two vector halves from Va/Vb,
1268// pack these halves into a single vector, and remap SM into NewMask to use
1269// the new vector instead.
1270OpRef HvxSelector::packs(ShuffleMask SM, OpRef Va, OpRef Vb,
1271 ResultStack &Results, MutableArrayRef<int> NewMask,
1272 unsigned Options) {
1273 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1274 if (!Va.isValid() || !Vb.isValid())
1275 return OpRef::fail();
1276
1277 if (Vb.isUndef()) {
1278 std::copy(SM.Mask.begin(), SM.Mask.end(), NewMask.begin());
1279 return Va;
1280 }
1281 if (Va.isUndef()) {
1282 std::copy(SM.Mask.begin(), SM.Mask.end(), NewMask.begin());
1284 return Vb;
1285 }
1286
1287 MVT Ty = getSingleVT(MVT::i8);
1288 MVT PairTy = getPairVT(MVT::i8);
1289 OpRef Inp[2] = {Va, Vb};
1290 unsigned VecLen = SM.Mask.size();
1291
1292 auto valign = [this](OpRef Lo, OpRef Hi, unsigned Amt, MVT Ty,
1293 ResultStack &Results) {
1294 if (Amt == 0)
1295 return Lo;
1296 const SDLoc &dl(Results.InpNode);
1297 if (isUInt<3>(Amt) || isUInt<3>(HwLen - Amt)) {
1298 bool IsRight = isUInt<3>(Amt); // Right align.
1299 SDValue S = getConst32(IsRight ? Amt : HwLen - Amt, dl);
1300 unsigned Opc = IsRight ? Hexagon::V6_valignbi : Hexagon::V6_vlalignbi;
1301 Results.push(Opc, Ty, {Hi, Lo, S});
1302 return OpRef::res(Results.top());
1303 }
1304 Results.push(Hexagon::A2_tfrsi, MVT::i32, {getConst32(Amt, dl)});
1305 OpRef A = OpRef::res(Results.top());
1306 Results.push(Hexagon::V6_valignb, Ty, {Hi, Lo, A});
1307 return OpRef::res(Results.top());
1308 };
1309
1310 // Segment is a vector half.
1311 unsigned SegLen = HwLen / 2;
1312
1313 // Check if we can shuffle vector halves around to get the used elements
1314 // into a single vector.
1315 shuffles::MaskT MaskH(SM.Mask);
1316 SmallVector<unsigned, 4> SegList = getInputSegmentList(SM.Mask, SegLen);
1317 unsigned SegCount = SegList.size();
1318 SmallVector<unsigned, 4> SegMap = getOutputSegmentMap(SM.Mask, SegLen);
1319
1320 if (SegList.empty())
1321 return OpRef::undef(Ty);
1322
1323 // NOTE:
1324 // In the following part of the function, where the segments are rearranged,
1325 // the shuffle mask SM can be of any length that is a multiple of a vector
1326 // (i.e. a multiple of 2*SegLen), and non-zero.
1327 // The output segment map is computed, and it may have any even number of
1328 // entries, but the rearrangement of input segments will be done based only
1329 // on the first two (non-undef) entries in the segment map.
1330 // For example, if the output map is 3, 1, 1, 3 (it can have at most two
1331 // distinct entries!), the segments 1 and 3 of Va/Vb will be packaged into
1332 // a single vector V = 3:1. The output mask will then be updated to use
1333 // seg(0,V), seg(1,V), seg(1,V), seg(0,V).
1334 //
1335 // Picking the segments based on the output map is an optimization. For
1336 // correctness it is only necessary that Seg0 and Seg1 are the two input
1337 // segments that are used in the output.
1338
1339 unsigned Seg0 = ~0u, Seg1 = ~0u;
1340 for (int I = 0, E = SegMap.size(); I != E; ++I) {
1341 unsigned X = SegMap[I];
1342 if (X == ~0u)
1343 continue;
1344 if (Seg0 == ~0u)
1345 Seg0 = X;
1346 else if (Seg1 != ~0u)
1347 break;
1348 if (X == ~1u || X != Seg0)
1349 Seg1 = X;
1350 }
1351
1352 if (SegCount == 1) {
1353 unsigned SrcOp = SegList[0] / 2;
1354 for (int I = 0; I != static_cast<int>(VecLen); ++I) {
1355 int M = SM.Mask[I];
1356 if (M >= 0) {
1357 M -= SrcOp * HwLen;
1358 assert(M >= 0);
1359 }
1360 NewMask[I] = M;
1361 }
1362 return Inp[SrcOp];
1363 }
1364
1365 if (SegCount == 2) {
1366 // Seg0 should not be undef here: this would imply a SegList
1367 // with <= 1 elements, which was checked earlier.
1368 assert(Seg0 != ~0u);
1369
1370 // If Seg0 or Seg1 are "multi-defined", pick them from the input
1371 // segment list instead.
1372 if (Seg0 == ~1u || Seg1 == ~1u) {
1373 if (Seg0 == Seg1) {
1374 Seg0 = SegList[0];
1375 Seg1 = SegList[1];
1376 } else if (Seg0 == ~1u) {
1377 Seg0 = SegList[0] != Seg1 ? SegList[0] : SegList[1];
1378 } else {
1379 assert(Seg1 == ~1u);
1380 Seg1 = SegList[0] != Seg0 ? SegList[0] : SegList[1];
1381 }
1382 }
1383 assert(Seg0 != ~1u && Seg1 != ~1u);
1384
1385 assert(Seg0 != Seg1 && "Expecting different segments");
1386 const SDLoc &dl(Results.InpNode);
1387 Results.push(Hexagon::A2_tfrsi, MVT::i32, {getConst32(SegLen, dl)});
1388 OpRef HL = OpRef::res(Results.top());
1389
1390 // Va = AB, Vb = CD
1391
1392 if (Seg0 / 2 == Seg1 / 2) {
1393 // Same input vector.
1394 Va = Inp[Seg0 / 2];
1395 if (Seg0 > Seg1) {
1396 // Swap halves.
1397 Results.push(Hexagon::V6_vror, Ty, {Inp[Seg0 / 2], HL});
1398 Va = OpRef::res(Results.top());
1399 }
1400 packSegmentMask(SM.Mask, {Seg0, Seg1}, SegLen, MaskH);
1401 } else if (Seg0 % 2 == Seg1 % 2) {
1402 // Picking AC, BD, CA, or DB.
1403 // vshuff(CD,AB,HL) -> BD:AC
1404 // vshuff(AB,CD,HL) -> DB:CA
1405 auto Vs = (Seg0 == 0 || Seg0 == 1) ? std::make_pair(Vb, Va) // AC or BD
1406 : std::make_pair(Va, Vb); // CA or DB
1407 Results.push(Hexagon::V6_vshuffvdd, PairTy, {Vs.first, Vs.second, HL});
1408 OpRef P = OpRef::res(Results.top());
1409 Va = (Seg0 == 0 || Seg0 == 2) ? OpRef::lo(P) : OpRef::hi(P);
1410 packSegmentMask(SM.Mask, {Seg0, Seg1}, SegLen, MaskH);
1411 } else {
1412 // Picking AD, BC, CB, or DA.
1413 if ((Seg0 == 0 && Seg1 == 3) || (Seg0 == 2 && Seg1 == 1)) {
1414 // AD or BC: this can be done using vmux.
1415 // Q = V6_pred_scalar2 SegLen
1416 // V = V6_vmux Q, (Va, Vb) or (Vb, Va)
1417 Results.push(Hexagon::V6_pred_scalar2, getBoolVT(), {HL});
1418 OpRef Qt = OpRef::res(Results.top());
1419 auto Vs = (Seg0 == 0) ? std::make_pair(Va, Vb) // AD
1420 : std::make_pair(Vb, Va); // CB
1421 Results.push(Hexagon::V6_vmux, Ty, {Qt, Vs.first, Vs.second});
1422 Va = OpRef::res(Results.top());
1423 packSegmentMask(SM.Mask, {Seg0, Seg1}, SegLen, MaskH);
1424 } else {
1425 // BC or DA: this could be done via valign by SegLen.
1426 // Do nothing here, because valign (if possible) will be generated
1427 // later on (make sure the Seg0 values are as expected).
1428 assert(Seg0 == 1 || Seg0 == 3);
1429 }
1430 }
1431 }
1432
1433 // Check if the arguments can be packed by valign(Va,Vb) or valign(Vb,Va).
1434 // FIXME: maybe remove this?
1435 ShuffleMask SMH(MaskH);
1436 assert(SMH.Mask.size() == VecLen);
1437 shuffles::MaskT MaskA(SMH.Mask);
1438
1439 if (SMH.MaxSrc - SMH.MinSrc >= static_cast<int>(HwLen)) {
1440 // valign(Lo=Va,Hi=Vb) won't work. Try swapping Va/Vb.
1441 shuffles::MaskT Swapped(SMH.Mask);
1443 ShuffleMask SW(Swapped);
1444 if (SW.MaxSrc - SW.MinSrc < static_cast<int>(HwLen)) {
1445 MaskA.assign(SW.Mask.begin(), SW.Mask.end());
1446 std::swap(Va, Vb);
1447 }
1448 }
1449 ShuffleMask SMA(MaskA);
1450 assert(SMA.Mask.size() == VecLen);
1451
1452 if (SMA.MaxSrc - SMA.MinSrc < static_cast<int>(HwLen)) {
1453 int ShiftR = SMA.MinSrc;
1454 if (ShiftR >= static_cast<int>(HwLen)) {
1455 Va = Vb;
1456 Vb = OpRef::undef(Ty);
1457 ShiftR -= HwLen;
1458 }
1459 OpRef RetVal = valign(Va, Vb, ShiftR, Ty, Results);
1460
1461 for (int I = 0; I != static_cast<int>(VecLen); ++I) {
1462 int M = SMA.Mask[I];
1463 if (M != -1)
1464 M -= SMA.MinSrc;
1465 NewMask[I] = M;
1466 }
1467 return RetVal;
1468 }
1469
1470 // By here, packing by segment (half-vector) shuffling, and vector alignment
1471 // failed. Try vmux.
1472 // Note: since this is using the original mask, Va and Vb must not have been
1473 // modified.
1474
1475 if (Options & PackMux) {
1476 // If elements picked from Va and Vb have all different (source) indexes
1477 // (relative to the start of the argument), do a mux, and update the mask.
1478 BitVector Picked(HwLen);
1480 bool CanMux = true;
1481 for (int I = 0; I != static_cast<int>(VecLen); ++I) {
1482 int M = SM.Mask[I];
1483 if (M == -1)
1484 continue;
1485 if (M >= static_cast<int>(HwLen))
1486 M -= HwLen;
1487 else
1488 MuxBytes[M] = 0xFF;
1489 if (Picked[M]) {
1490 CanMux = false;
1491 break;
1492 }
1493 NewMask[I] = M;
1494 }
1495 if (CanMux)
1496 return vmuxs(MuxBytes, Va, Vb, Results);
1497 }
1498 return OpRef::fail();
1499}
1500
1501// Va, Vb are vector pairs. If SM only uses two single vectors from Va/Vb,
1502// pack these vectors into a pair, and remap SM into NewMask to use the
1503// new pair instead.
1504OpRef HvxSelector::packp(ShuffleMask SM, OpRef Va, OpRef Vb,
1505 ResultStack &Results, MutableArrayRef<int> NewMask) {
1506 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1508 if (SegList.empty())
1509 return OpRef::undef(getPairVT(MVT::i8));
1510
1511 // If more than two halves are used, bail.
1512 // TODO: be more aggressive here?
1513 unsigned SegCount = SegList.size();
1514 if (SegCount > 2)
1515 return OpRef::fail();
1516
1517 MVT HalfTy = getSingleVT(MVT::i8);
1518
1519 OpRef Inp[2] = { Va, Vb };
1520 OpRef Out[2] = { OpRef::undef(HalfTy), OpRef::undef(HalfTy) };
1521
1522 // Really make sure we have at most 2 vectors used in the mask.
1523 assert(SegCount <= 2);
1524
1525 for (int I = 0, E = SegList.size(); I != E; ++I) {
1526 unsigned S = SegList[I];
1527 OpRef Op = Inp[S / 2];
1528 Out[I] = (S & 1) ? OpRef::hi(Op) : OpRef::lo(Op);
1529 }
1530
1531 // NOTE: Using SegList as the packing map here (not SegMap). This works,
1532 // because we're not concerned here about the order of the segments (i.e.
1533 // single vectors) in the output pair. Changing the order of vectors is
1534 // free (as opposed to changing the order of vector halves as in packs),
1535 // and so there is no extra cost added in case the order needs to be
1536 // changed later.
1537 packSegmentMask(SM.Mask, SegList, HwLen, NewMask);
1538 return concats(Out[0], Out[1], Results);
1539}
1540
1541OpRef HvxSelector::vmuxs(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,
1542 ResultStack &Results) {
1543 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1544 MVT ByteTy = getSingleVT(MVT::i8);
1545 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
1546 const SDLoc &dl(Results.InpNode);
1547 SDValue B = getVectorConstant(Bytes, dl);
1548 Results.push(Hexagon::V6_vd0, ByteTy, {});
1549 Results.push(Hexagon::V6_veqb, BoolTy, {OpRef(B), OpRef::res(-1)});
1550 Results.push(Hexagon::V6_vmux, ByteTy, {OpRef::res(-1), Vb, Va});
1551 return OpRef::res(Results.top());
1552}
1553
1554OpRef HvxSelector::vmuxp(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,
1555 ResultStack &Results) {
1556 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1557 size_t S = Bytes.size() / 2;
1558 OpRef L = vmuxs(Bytes.take_front(S), OpRef::lo(Va), OpRef::lo(Vb), Results);
1559 OpRef H = vmuxs(Bytes.drop_front(S), OpRef::hi(Va), OpRef::hi(Vb), Results);
1560 return concats(L, H, Results);
1561}
1562
1563OpRef HvxSelector::shuffs1(ShuffleMask SM, OpRef Va, ResultStack &Results) {
1564 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1565 unsigned VecLen = SM.Mask.size();
1566 assert(HwLen == VecLen);
1567 (void)VecLen;
1568 assert(all_of(SM.Mask, [this](int M) { return M == -1 || M < int(HwLen); }));
1569
1570 if (isIdentity(SM.Mask))
1571 return Va;
1572 if (isUndef(SM.Mask))
1573 return OpRef::undef(getSingleVT(MVT::i8));
1574
1575 // First, check for rotations.
1576 if (auto Dist = rotationDistance(SM, VecLen)) {
1577 OpRef Rotate = funnels(Va, Va, *Dist, Results);
1578 if (Rotate.isValid())
1579 return Rotate;
1580 }
1581 unsigned HalfLen = HwLen / 2;
1582 assert(isPowerOf2_32(HalfLen));
1583
1584 // Handle special case where the output is the same half of the input
1585 // repeated twice, i.e. if Va = AB, then handle the output of AA or BB.
1586 std::pair<int, unsigned> Strip1 = findStrip(SM.Mask, 1, HalfLen);
1587 if ((Strip1.first & ~HalfLen) == 0 && Strip1.second == HalfLen) {
1588 std::pair<int, unsigned> Strip2 =
1589 findStrip(SM.Mask.drop_front(HalfLen), 1, HalfLen);
1590 if (Strip1 == Strip2) {
1591 const SDLoc &dl(Results.InpNode);
1592 Results.push(Hexagon::A2_tfrsi, MVT::i32, {getConst32(HalfLen, dl)});
1593 Results.push(Hexagon::V6_vshuffvdd, getPairVT(MVT::i8),
1594 {Va, Va, OpRef::res(Results.top())});
1595 OpRef S = OpRef::res(Results.top());
1596 return (Strip1.first == 0) ? OpRef::lo(S) : OpRef::hi(S);
1597 }
1598 }
1599
1600 OpRef P = perfect(SM, Va, Results);
1601 if (P.isValid())
1602 return P;
1603 return butterfly(SM, Va, Results);
1604}
1605
1606OpRef HvxSelector::shuffs2(ShuffleMask SM, OpRef Va, OpRef Vb,
1607 ResultStack &Results) {
1608 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1609 if (isUndef(SM.Mask))
1610 return OpRef::undef(getSingleVT(MVT::i8));
1611
1612 OpRef C = contracting(SM, Va, Vb, Results);
1613 if (C.isValid())
1614 return C;
1615
1616 int VecLen = SM.Mask.size();
1617 shuffles::MaskT PackedMask(VecLen);
1618 OpRef P = packs(SM, Va, Vb, Results, PackedMask);
1619 if (P.isValid())
1620 return shuffs1(ShuffleMask(PackedMask), P, Results);
1621
1622 // TODO: Before we split the mask, try perfect shuffle on concatenated
1623 // operands.
1624
1625 shuffles::MaskT MaskL(VecLen), MaskR(VecLen);
1626 splitMask(SM.Mask, MaskL, MaskR);
1627
1628 OpRef L = shuffs1(ShuffleMask(MaskL), Va, Results);
1629 OpRef R = shuffs1(ShuffleMask(MaskR), Vb, Results);
1630 if (!L.isValid() || !R.isValid())
1631 return OpRef::fail();
1632
1633 SmallVector<uint8_t, 128> Bytes(VecLen);
1634 for (int I = 0; I != VecLen; ++I) {
1635 if (MaskL[I] != -1)
1636 Bytes[I] = 0xFF;
1637 }
1638 return vmuxs(Bytes, L, R, Results);
1639}
1640
1641OpRef HvxSelector::shuffp1(ShuffleMask SM, OpRef Va, ResultStack &Results) {
1642 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1643 int VecLen = SM.Mask.size();
1644
1645 if (isIdentity(SM.Mask))
1646 return Va;
1647 if (isUndef(SM.Mask))
1648 return OpRef::undef(getPairVT(MVT::i8));
1649
1650 shuffles::MaskT PackedMask(VecLen);
1651 OpRef P = packs(SM, OpRef::lo(Va), OpRef::hi(Va), Results, PackedMask);
1652 if (P.isValid()) {
1653 ShuffleMask PM(PackedMask);
1654 OpRef E = expanding(PM, P, Results);
1655 if (E.isValid())
1656 return E;
1657
1658 OpRef L = shuffs1(PM.lo(), P, Results);
1659 OpRef H = shuffs1(PM.hi(), P, Results);
1660 if (L.isValid() && H.isValid())
1661 return concats(L, H, Results);
1662 }
1663
1664 if (!isLowHalfOnly(SM.Mask)) {
1665 // Doing a perfect shuffle on a low-half mask (i.e. where the upper half
1666 // is all-undef) may produce a perfect shuffle that generates legitimate
1667 // upper half. This isn't wrong, but if the perfect shuffle was possible,
1668 // then there is a good chance that a shorter (contracting) code may be
1669 // used as well (e.g. V6_vshuffeb, etc).
1670 OpRef R = perfect(SM, Va, Results);
1671 if (R.isValid())
1672 return R;
1673 // TODO commute the mask and try the opposite order of the halves.
1674 }
1675
1676 OpRef L = shuffs2(SM.lo(), OpRef::lo(Va), OpRef::hi(Va), Results);
1677 OpRef H = shuffs2(SM.hi(), OpRef::lo(Va), OpRef::hi(Va), Results);
1678 if (L.isValid() && H.isValid())
1679 return concats(L, H, Results);
1680
1681 return OpRef::fail();
1682}
1683
1684OpRef HvxSelector::shuffp2(ShuffleMask SM, OpRef Va, OpRef Vb,
1685 ResultStack &Results) {
1686 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1687 if (isUndef(SM.Mask))
1688 return OpRef::undef(getPairVT(MVT::i8));
1689
1690 int VecLen = SM.Mask.size();
1691 SmallVector<int,256> PackedMask(VecLen);
1692 OpRef P = packp(SM, Va, Vb, Results, PackedMask);
1693 if (P.isValid())
1694 return shuffp1(ShuffleMask(PackedMask), P, Results);
1695
1696 SmallVector<int,256> MaskL(VecLen), MaskR(VecLen);
1697 splitMask(SM.Mask, MaskL, MaskR);
1698
1699 OpRef L = shuffp1(ShuffleMask(MaskL), Va, Results);
1700 OpRef R = shuffp1(ShuffleMask(MaskR), Vb, Results);
1701 if (!L.isValid() || !R.isValid())
1702 return OpRef::fail();
1703
1704 // Mux the results.
1705 SmallVector<uint8_t,256> Bytes(VecLen);
1706 for (int I = 0; I != VecLen; ++I) {
1707 if (MaskL[I] != -1)
1708 Bytes[I] = 0xFF;
1709 }
1710 return vmuxp(Bytes, L, R, Results);
1711}
1712
1713namespace {
1714 struct Deleter : public SelectionDAG::DAGNodeDeletedListener {
1715 template <typename T>
1716 Deleter(SelectionDAG &D, T &C)
1717 : SelectionDAG::DAGNodeDeletedListener(D, [&C] (SDNode *N, SDNode *E) {
1718 C.erase(N);
1719 }) {}
1720 };
1721
1722 template <typename T>
1723 struct NullifyingVector : public T {
1725 NullifyingVector(T &&V) : T(V) {
1726 for (unsigned i = 0, e = T::size(); i != e; ++i) {
1727 SDNode *&N = T::operator[](i);
1728 Refs[N] = &N;
1729 }
1730 }
1731 void erase(SDNode *N) {
1732 auto F = Refs.find(N);
1733 if (F != Refs.end())
1734 *F->second = nullptr;
1735 }
1736 };
1737}
1738
1739void HvxSelector::select(SDNode *ISelN) {
1740 // What's important here is to select the right set of nodes. The main
1741 // selection algorithm loops over nodes in a topological order, i.e. users
1742 // are visited before their operands.
1743 //
1744 // It is an error to have an unselected node with a selected operand, and
1745 // there is an assertion in the main selector code to enforce that.
1746 //
1747 // Such a situation could occur if we selected a node, which is both a
1748 // subnode of ISelN, and a subnode of an unrelated (and yet unselected)
1749 // node in the DAG.
1750 assert(ISelN->getOpcode() == HexagonISD::ISEL);
1751 SDNode *N0 = ISelN->getOperand(0).getNode();
1752
1753 // There could have been nodes created (i.e. inserted into the DAG)
1754 // that are now dead. Remove them, in case they use any of the nodes
1755 // to select (and make them look shared).
1757
1758 SetVector<SDNode *> SubNodes;
1759
1760 if (!N0->isMachineOpcode()) {
1761 // Don't want to select N0 if it's shared with another node, except if
1762 // it's shared with other ISELs.
1763 auto IsISelN = [](SDNode *T) { return T->getOpcode() == HexagonISD::ISEL; };
1764 if (llvm::all_of(N0->uses(), IsISelN))
1765 SubNodes.insert(N0);
1766 }
1767 if (SubNodes.empty()) {
1768 ISel.ReplaceNode(ISelN, N0);
1769 return;
1770 }
1771
1772 // Need to manually select the nodes that are dominated by the ISEL. Other
1773 // nodes are reachable from the rest of the DAG, and so will be selected
1774 // by the DAG selection routine.
1775 SetVector<SDNode*> Dom, NonDom;
1776 Dom.insert(N0);
1777
1778 auto IsDomRec = [&Dom, &NonDom] (SDNode *T, auto Rec) -> bool {
1779 if (Dom.count(T))
1780 return true;
1781 if (T->use_empty() || NonDom.count(T))
1782 return false;
1783 for (SDNode *U : T->uses()) {
1784 // If T is reachable from a known non-dominated node, then T itself
1785 // is non-dominated.
1786 if (!Rec(U, Rec)) {
1787 NonDom.insert(T);
1788 return false;
1789 }
1790 }
1791 Dom.insert(T);
1792 return true;
1793 };
1794
1795 auto IsDom = [&IsDomRec] (SDNode *T) { return IsDomRec(T, IsDomRec); };
1796
1797 // Add the rest of nodes dominated by ISEL to SubNodes.
1798 for (unsigned I = 0; I != SubNodes.size(); ++I) {
1799 for (SDValue Op : SubNodes[I]->ops()) {
1800 SDNode *O = Op.getNode();
1801 if (IsDom(O))
1802 SubNodes.insert(O);
1803 }
1804 }
1805
1806 // Do a topological sort of nodes from Dom.
1807 SetVector<SDNode*> TmpQ;
1808
1809 std::map<SDNode *, unsigned> OpCount;
1810 for (SDNode *T : Dom) {
1811 unsigned NumDomOps = llvm::count_if(T->ops(), [&Dom](const SDUse &U) {
1812 return Dom.count(U.getNode());
1813 });
1814
1815 OpCount.insert({T, NumDomOps});
1816 if (NumDomOps == 0)
1817 TmpQ.insert(T);
1818 }
1819
1820 for (unsigned I = 0; I != TmpQ.size(); ++I) {
1821 SDNode *S = TmpQ[I];
1822 for (SDNode *U : S->uses()) {
1823 if (U == ISelN)
1824 continue;
1825 auto F = OpCount.find(U);
1826 assert(F != OpCount.end());
1827 if (F->second > 0 && !--F->second)
1828 TmpQ.insert(F->first);
1829 }
1830 }
1831
1832 // Remove the marker.
1833 ISel.ReplaceNode(ISelN, N0);
1834
1835 assert(SubNodes.size() == TmpQ.size());
1836 NullifyingVector<decltype(TmpQ)::vector_type> Queue(TmpQ.takeVector());
1837
1838 Deleter DUQ(DAG, Queue);
1839 for (SDNode *S : reverse(Queue)) {
1840 if (S == nullptr)
1841 continue;
1842 DEBUG_WITH_TYPE("isel", {dbgs() << "HVX selecting: "; S->dump(&DAG);});
1843 ISel.Select(S);
1844 }
1845}
1846
1847bool HvxSelector::scalarizeShuffle(ArrayRef<int> Mask, const SDLoc &dl,
1848 MVT ResTy, SDValue Va, SDValue Vb,
1849 SDNode *N) {
1850 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1851 MVT ElemTy = ResTy.getVectorElementType();
1852 assert(ElemTy == MVT::i8);
1853 unsigned VecLen = Mask.size();
1854 bool HavePairs = (2*HwLen == VecLen);
1855 MVT SingleTy = getSingleVT(MVT::i8);
1856
1857 // The prior attempts to handle this shuffle may have left a bunch of
1858 // dead nodes in the DAG (such as constants). These nodes will be added
1859 // at the end of DAG's node list, which at that point had already been
1860 // sorted topologically. In the main selection loop, the node list is
1861 // traversed backwards from the root node, which means that any new
1862 // nodes (from the end of the list) will not be visited.
1863 // Scalarization will replace the shuffle node with the scalarized
1864 // expression, and if that expression reused any if the leftoever (dead)
1865 // nodes, these nodes would not be selected (since the "local" selection
1866 // only visits nodes that are not in AllNodes).
1867 // To avoid this issue, remove all dead nodes from the DAG now.
1868// DAG.RemoveDeadNodes();
1869
1871 LLVMContext &Ctx = *DAG.getContext();
1872 MVT LegalTy = Lower.getTypeToTransformTo(Ctx, ElemTy).getSimpleVT();
1873 for (int I : Mask) {
1874 if (I < 0) {
1875 Ops.push_back(ISel.selectUndef(dl, LegalTy));
1876 continue;
1877 }
1878 SDValue Vec;
1879 unsigned M = I;
1880 if (M < VecLen) {
1881 Vec = Va;
1882 } else {
1883 Vec = Vb;
1884 M -= VecLen;
1885 }
1886 if (HavePairs) {
1887 if (M < HwLen) {
1888 Vec = DAG.getTargetExtractSubreg(Hexagon::vsub_lo, dl, SingleTy, Vec);
1889 } else {
1890 Vec = DAG.getTargetExtractSubreg(Hexagon::vsub_hi, dl, SingleTy, Vec);
1891 M -= HwLen;
1892 }
1893 }
1894 SDValue Idx = DAG.getConstant(M, dl, MVT::i32);
1895 SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, LegalTy, {Vec, Idx});
1897 assert(L.getNode());
1898 Ops.push_back(L);
1899 }
1900
1901 SDValue LV;
1902 if (2*HwLen == VecLen) {
1903 SDValue B0 = DAG.getBuildVector(SingleTy, dl, {Ops.data(), HwLen});
1904 SDValue L0 = Lower.LowerOperation(B0, DAG);
1905 SDValue B1 = DAG.getBuildVector(SingleTy, dl, {Ops.data()+HwLen, HwLen});
1906 SDValue L1 = Lower.LowerOperation(B1, DAG);
1907 // XXX CONCAT_VECTORS is legal for HVX vectors. Legalizing (lowering)
1908 // functions may expect to be called only for illegal operations, so
1909 // make sure that they are not called for legal ones. Develop a better
1910 // mechanism for dealing with this.
1911 LV = DAG.getNode(ISD::CONCAT_VECTORS, dl, ResTy, {L0, L1});
1912 } else {
1913 SDValue BV = DAG.getBuildVector(ResTy, dl, Ops);
1914 LV = Lower.LowerOperation(BV, DAG);
1915 }
1916
1917 assert(!N->use_empty());
1918 SDValue IS = DAG.getNode(HexagonISD::ISEL, dl, ResTy, LV);
1919 ISel.ReplaceNode(N, IS.getNode());
1920 select(IS.getNode());
1922 return true;
1923}
1924
1926 unsigned Width) {
1927 auto possibilities = [](ArrayRef<uint8_t> Bs, unsigned Width) -> uint32_t {
1928 unsigned Impossible = ~(1u << Width) + 1;
1929 for (unsigned I = 0, E = Bs.size(); I != E; ++I) {
1930 uint8_t B = Bs[I];
1931 if (B == 0xff)
1932 continue;
1933 if (~Impossible == 0)
1934 break;
1935 for (unsigned Log = 0; Log != Width; ++Log) {
1936 if (Impossible & (1u << Log))
1937 continue;
1938 unsigned Expected = (I >> Log) % 2;
1939 if (B != Expected)
1940 Impossible |= (1u << Log);
1941 }
1942 }
1943 return ~Impossible;
1944 };
1945
1946 SmallVector<uint32_t, 8> Worklist(Width);
1947
1948 for (unsigned BitIdx = 0; BitIdx != Width; ++BitIdx) {
1949 SmallVector<uint8_t> BitValues(SM.Mask.size());
1950 for (int i = 0, e = SM.Mask.size(); i != e; ++i) {
1951 int M = SM.Mask[i];
1952 if (M < 0)
1953 BitValues[i] = 0xff;
1954 else
1955 BitValues[i] = (M & (1u << BitIdx)) != 0;
1956 }
1957 Worklist[BitIdx] = possibilities(BitValues, Width);
1958 }
1959
1960 // If there is a word P in Worklist that matches multiple possibilities,
1961 // then if any other word Q matches any of the possibilities matched by P,
1962 // then Q matches all the possibilities matched by P. In fact, P == Q.
1963 // In other words, for each words P, Q, the sets of possibilities matched
1964 // by P and Q are either equal or disjoint (no partial overlap).
1965 //
1966 // Illustration: For 4-bit values there are 4 complete sequences:
1967 // a: 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1
1968 // b: 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1
1969 // c: 0 0 0 0 1 1 1 1 0 0 0 0 1 1 1 1
1970 // d: 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1
1971 //
1972 // Words containing unknown bits that match two of the complete
1973 // sequences:
1974 // ab: 0 u u 1 0 u u 1 0 u u 1 0 u u 1
1975 // ac: 0 u 0 u u 1 u 1 0 u 0 u u 1 u 1
1976 // ad: 0 u 0 u 0 u 0 u u 1 u 1 u 1 u 1
1977 // bc: 0 0 u u u u 1 1 0 0 u u u u 1 1
1978 // bd: 0 0 u u 0 0 u u u u 1 1 u u 1 1
1979 // cd: 0 0 0 0 u u u u u u u u 1 1 1 1
1980 //
1981 // Proof of the claim above:
1982 // Let P be a word that matches s0 and s1. For that to happen, all known
1983 // bits in P must match s0 and s1 exactly.
1984 // Assume there is Q that matches s1. Note that since P and Q came from
1985 // the same shuffle mask, the positions of unknown bits in P and Q match
1986 // exactly, which makes the indices of known bits be exactly the same
1987 // between P and Q. Since P matches s0 and s1, the known bits of P much
1988 // match both s0 and s1. Also, since Q matches s1, the known bits in Q
1989 // are exactly the same as in s1, which means that they are exactly the
1990 // same as in P. This implies that P == Q.
1991
1992 // There can be a situation where there are more entries with the same
1993 // bits set than there are set bits (e.g. value 9 occuring more than 2
1994 // times). In such cases it will be impossible to complete this to a
1995 // perfect shuffle.
1996 SmallVector<uint32_t, 8> Sorted(Worklist);
1997 llvm::sort(Sorted.begin(), Sorted.end());
1998
1999 for (unsigned I = 0, E = Sorted.size(); I != E;) {
2000 unsigned P = Sorted[I], Count = 1;
2001 while (++I != E && P == Sorted[I])
2002 ++Count;
2003 if ((unsigned)llvm::popcount(P) < Count) {
2004 // Reset all occurences of P, if there are more occurrences of P
2005 // than there are bits in P.
2006 for (unsigned &Q : Worklist) {
2007 if (Q == P)
2008 Q = 0;
2009 }
2010 }
2011 }
2012
2013 return Worklist;
2014}
2015
2018 // Pick a completion if there are multiple possibilities. For now just
2019 // select any valid completion.
2020 SmallVector<uint32_t, 8> Comps(Completions);
2021
2022 for (unsigned I = 0; I != Width; ++I) {
2023 uint32_t P = Comps[I];
2024 assert(P != 0);
2025 if (isPowerOf2_32(P))
2026 continue;
2027 // T = least significant bit of P.
2028 uint32_t T = P ^ ((P - 1) & P);
2029 // Clear T in all remaining words matching P.
2030 for (unsigned J = I + 1; J != Width; ++J) {
2031 if (Comps[J] == P)
2032 Comps[J] ^= T;
2033 }
2034 Comps[I] = T;
2035 }
2036
2037#ifndef NDEBUG
2038 // Check that we have generated a valid completion.
2039 uint32_t OrAll = 0;
2040 for (unsigned I = 0, E = Comps.size(); I != E; ++I) {
2041 uint32_t C = Comps[I];
2043 OrAll |= C;
2044 }
2045 assert(OrAll == (1u << Width) -1);
2046#endif
2047
2048 return Comps;
2049}
2050
2051std::optional<int> HvxSelector::rotationDistance(ShuffleMask SM,
2052 unsigned WrapAt) {
2053 std::optional<int> Dist;
2054 for (int I = 0, E = SM.Mask.size(); I != E; ++I) {
2055 int M = SM.Mask[I];
2056 if (M < 0)
2057 continue;
2058 if (Dist) {
2059 if ((I + *Dist) % static_cast<int>(WrapAt) != M)
2060 return std::nullopt;
2061 } else {
2062 // Integer a%b operator assumes rounding towards zero by /, so it
2063 // "misbehaves" when a crosses 0 (the remainder also changes sign).
2064 // Add WrapAt in an attempt to keep I+Dist non-negative.
2065 Dist = M - I;
2066 if (Dist < 0)
2067 Dist = *Dist + WrapAt;
2068 }
2069 }
2070 return Dist;
2071}
2072
2073OpRef HvxSelector::contracting(ShuffleMask SM, OpRef Va, OpRef Vb,
2074 ResultStack &Results) {
2075 DEBUG_WITH_TYPE("isel", { dbgs() << __func__ << '\n'; });
2076 if (!Va.isValid() || !Vb.isValid())
2077 return OpRef::fail();
2078
2079 // Contracting shuffles, i.e. instructions that always discard some bytes
2080 // from the operand vectors.
2081 //
2082 // Funnel shifts
2083 // V6_vshuff{e,o}b
2084 // V6_vshuf{e,o}h
2085 // V6_vdealb4w
2086 // V6_vpack{e,o}{b,h}
2087
2088 int VecLen = SM.Mask.size();
2089
2090 // First, check for funnel shifts.
2091 if (auto Dist = rotationDistance(SM, 2 * VecLen)) {
2092 OpRef Funnel = funnels(Va, Vb, *Dist, Results);
2093 if (Funnel.isValid())
2094 return Funnel;
2095 }
2096
2097 MVT SingleTy = getSingleVT(MVT::i8);
2098 MVT PairTy = getPairVT(MVT::i8);
2099
2100 auto same = [](ArrayRef<int> Mask1, ArrayRef<int> Mask2) -> bool {
2101 return Mask1 == Mask2;
2102 };
2103
2104 using PackConfig = std::pair<unsigned, bool>;
2105 PackConfig Packs[] = {
2106 {1, false}, // byte, even
2107 {1, true}, // byte, odd
2108 {2, false}, // half, even
2109 {2, true}, // half, odd
2110 };
2111
2112 { // Check vpack
2113 unsigned Opcodes[] = {
2114 Hexagon::V6_vpackeb,
2115 Hexagon::V6_vpackob,
2116 Hexagon::V6_vpackeh,
2117 Hexagon::V6_vpackoh,
2118 };
2119 for (int i = 0, e = std::size(Opcodes); i != e; ++i) {
2120 auto [Size, Odd] = Packs[i];
2121 if (same(SM.Mask, shuffles::mask(shuffles::vpack, HwLen, Size, Odd))) {
2122 Results.push(Opcodes[i], SingleTy, {Vb, Va});
2123 return OpRef::res(Results.top());
2124 }
2125 }
2126 }
2127
2128 { // Check vshuff
2129 unsigned Opcodes[] = {
2130 Hexagon::V6_vshuffeb,
2131 Hexagon::V6_vshuffob,
2132 Hexagon::V6_vshufeh,
2133 Hexagon::V6_vshufoh,
2134 };
2135 for (int i = 0, e = std::size(Opcodes); i != e; ++i) {
2136 auto [Size, Odd] = Packs[i];
2137 if (same(SM.Mask, shuffles::mask(shuffles::vshuff, HwLen, Size, Odd))) {
2138 Results.push(Opcodes[i], SingleTy, {Vb, Va});
2139 return OpRef::res(Results.top());
2140 }
2141 }
2142 }
2143
2144 { // Check vdeal
2145 // There is no "V6_vdealeb", etc, but the supposed behavior of vdealeb
2146 // is equivalent to "(V6_vpackeb (V6_vdealvdd Vu, Vv, -2))". Other such
2147 // variants of "deal" can be done similarly.
2148 unsigned Opcodes[] = {
2149 Hexagon::V6_vpackeb,
2150 Hexagon::V6_vpackob,
2151 Hexagon::V6_vpackeh,
2152 Hexagon::V6_vpackoh,
2153 };
2154 const SDLoc &dl(Results.InpNode);
2155
2156 for (int i = 0, e = std::size(Opcodes); i != e; ++i) {
2157 auto [Size, Odd] = Packs[i];
2158 if (same(SM.Mask, shuffles::mask(shuffles::vdeal, HwLen, Size, Odd))) {
2159 Results.push(Hexagon::A2_tfrsi, MVT::i32, {getConst32(-2 * Size, dl)});
2160 Results.push(Hexagon::V6_vdealvdd, PairTy, {Vb, Va, OpRef::res(-1)});
2161 auto vdeal = OpRef::res(Results.top());
2162 Results.push(Opcodes[i], SingleTy,
2163 {OpRef::hi(vdeal), OpRef::lo(vdeal)});
2164 return OpRef::res(Results.top());
2165 }
2166 }
2167 }
2168
2169 if (same(SM.Mask, shuffles::mask(shuffles::vdealb4w, HwLen))) {
2170 Results.push(Hexagon::V6_vdealb4w, SingleTy, {Vb, Va});
2171 return OpRef::res(Results.top());
2172 }
2173
2174 return OpRef::fail();
2175}
2176
2177OpRef HvxSelector::expanding(ShuffleMask SM, OpRef Va, ResultStack &Results) {
2178 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
2179 // Expanding shuffles (using all elements and inserting into larger vector):
2180 //
2181 // V6_vunpacku{b,h} [*]
2182 //
2183 // [*] Only if the upper elements (filled with 0s) are "don't care" in Mask.
2184 //
2185 // Note: V6_vunpacko{b,h} are or-ing the high byte/half in the result, so
2186 // they are not shuffles.
2187 //
2188 // The argument is a single vector.
2189
2190 int VecLen = SM.Mask.size();
2191 assert(2*HwLen == unsigned(VecLen) && "Expecting vector-pair type");
2192
2193 std::pair<int,unsigned> Strip = findStrip(SM.Mask, 1, VecLen);
2194
2195 // The patterns for the unpacks, in terms of the starting offsets of the
2196 // consecutive strips (L = length of the strip, N = VecLen):
2197 //
2198 // vunpacku: 0, -1, L, -1, 2L, -1 ...
2199
2200 if (Strip.first != 0)
2201 return OpRef::fail();
2202
2203 // The vunpackus only handle byte and half-word.
2204 if (Strip.second != 1 && Strip.second != 2)
2205 return OpRef::fail();
2206
2207 int N = VecLen;
2208 int L = Strip.second;
2209
2210 // First, check the non-ignored strips.
2211 for (int I = 2*L; I < N; I += 2*L) {
2212 auto S = findStrip(SM.Mask.drop_front(I), 1, N-I);
2213 if (S.second != unsigned(L))
2214 return OpRef::fail();
2215 if (2*S.first != I)
2216 return OpRef::fail();
2217 }
2218 // Check the -1s.
2219 for (int I = L; I < N; I += 2*L) {
2220 auto S = findStrip(SM.Mask.drop_front(I), 0, N-I);
2221 if (S.first != -1 || S.second != unsigned(L))
2222 return OpRef::fail();
2223 }
2224
2225 unsigned Opc = Strip.second == 1 ? Hexagon::V6_vunpackub
2226 : Hexagon::V6_vunpackuh;
2227 Results.push(Opc, getPairVT(MVT::i8), {Va});
2228 return OpRef::res(Results.top());
2229}
2230
2231OpRef HvxSelector::perfect(ShuffleMask SM, OpRef Va, ResultStack &Results) {
2232 DEBUG_WITH_TYPE("isel", { dbgs() << __func__ << '\n'; });
2233 // V6_vdeal{b,h}
2234 // V6_vshuff{b,h}
2235
2236 // V6_vshufoe{b,h} those are quivalent to vshuffvdd(..,{1,2})
2237 // V6_vshuffvdd (V6_vshuff)
2238 // V6_dealvdd (V6_vdeal)
2239
2240 int VecLen = SM.Mask.size();
2241 assert(isPowerOf2_32(VecLen) && Log2_32(VecLen) <= 8);
2242 unsigned LogLen = Log2_32(VecLen);
2243 unsigned HwLog = Log2_32(HwLen);
2244 // The result length must be the same as the length of a single vector,
2245 // or a vector pair.
2246 assert(LogLen == HwLog || LogLen == HwLog + 1);
2247 bool HavePairs = LogLen == HwLog + 1;
2248
2249 SmallVector<unsigned, 8> Perm(LogLen);
2250
2251 // Check if this could be a perfect shuffle, or a combination of perfect
2252 // shuffles.
2253 //
2254 // Consider this permutation (using hex digits to make the ASCII diagrams
2255 // easier to read):
2256 // { 0, 8, 1, 9, 2, A, 3, B, 4, C, 5, D, 6, E, 7, F }.
2257 // This is a "deal" operation: divide the input into two halves, and
2258 // create the output by picking elements by alternating between these two
2259 // halves:
2260 // 0 1 2 3 4 5 6 7 --> 0 8 1 9 2 A 3 B 4 C 5 D 6 E 7 F [*]
2261 // 8 9 A B C D E F
2262 //
2263 // Aside from a few special explicit cases (V6_vdealb, etc.), HVX provides
2264 // a somwehat different mechanism that could be used to perform shuffle/
2265 // deal operations: a 2x2 transpose.
2266 // Consider the halves of inputs again, they can be interpreted as a 2x8
2267 // matrix. A 2x8 matrix can be looked at four 2x2 matrices concatenated
2268 // together. Now, when considering 2 elements at a time, it will be a 2x4
2269 // matrix (with elements 01, 23, 45, etc.), or two 2x2 matrices:
2270 // 01 23 45 67
2271 // 89 AB CD EF
2272 // With groups of 4, this will become a single 2x2 matrix, and so on.
2273 //
2274 // The 2x2 transpose instruction works by transposing each of the 2x2
2275 // matrices (or "sub-matrices"), given a specific group size. For example,
2276 // if the group size is 1 (i.e. each element is its own group), there
2277 // will be four transposes of the four 2x2 matrices that form the 2x8.
2278 // For example, with the inputs as above, the result will be:
2279 // 0 8 2 A 4 C 6 E
2280 // 1 9 3 B 5 D 7 F
2281 // Now, this result can be tranposed again, but with the group size of 2:
2282 // 08 19 4C 5D
2283 // 2A 3B 6E 7F
2284 // If we then transpose that result, but with the group size of 4, we get:
2285 // 0819 2A3B
2286 // 4C5D 6E7F
2287 // If we concatenate these two rows, it will be
2288 // 0 8 1 9 2 A 3 B 4 C 5 D 6 E 7 F
2289 // which is the same as the "deal" [*] above.
2290 //
2291 // In general, a "deal" of individual elements is a series of 2x2 transposes,
2292 // with changing group size. HVX has two instructions:
2293 // Vdd = V6_vdealvdd Vu, Vv, Rt
2294 // Vdd = V6_shufvdd Vu, Vv, Rt
2295 // that perform exactly that. The register Rt controls which transposes are
2296 // going to happen: a bit at position n (counting from 0) indicates that a
2297 // transpose with a group size of 2^n will take place. If multiple bits are
2298 // set, multiple transposes will happen: vdealvdd will perform them starting
2299 // with the largest group size, vshuffvdd will do them in the reverse order.
2300 //
2301 // The main observation is that each 2x2 transpose corresponds to swapping
2302 // columns of bits in the binary representation of the values.
2303 //
2304 // The numbers {3,2,1,0} and the log2 of the number of contiguous 1 bits
2305 // in a given column. The * denote the columns that will be swapped.
2306 // The transpose with the group size 2^n corresponds to swapping columns
2307 // 3 (the highest log) and log2(n):
2308 //
2309 // 3 2 1 0 0 2 1 3 0 2 3 1
2310 // * * * * * *
2311 // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
2312 // 1 0 0 0 1 8 1 0 0 0 8 1 0 0 0 8 1 0 0 0
2313 // 2 0 0 1 0 2 0 0 1 0 1 0 0 0 1 1 0 0 0 1
2314 // 3 0 0 1 1 A 1 0 1 0 9 1 0 0 1 9 1 0 0 1
2315 // 4 0 1 0 0 4 0 1 0 0 4 0 1 0 0 2 0 0 1 0
2316 // 5 0 1 0 1 C 1 1 0 0 C 1 1 0 0 A 1 0 1 0
2317 // 6 0 1 1 0 6 0 1 1 0 5 0 1 0 1 3 0 0 1 1
2318 // 7 0 1 1 1 E 1 1 1 0 D 1 1 0 1 B 1 0 1 1
2319 // 8 1 0 0 0 1 0 0 0 1 2 0 0 1 0 4 0 1 0 0
2320 // 9 1 0 0 1 9 1 0 0 1 A 1 0 1 0 C 1 1 0 0
2321 // A 1 0 1 0 3 0 0 1 1 3 0 0 1 1 5 0 1 0 1
2322 // B 1 0 1 1 B 1 0 1 1 B 1 0 1 1 D 1 1 0 1
2323 // C 1 1 0 0 5 0 1 0 1 6 0 1 1 0 6 0 1 1 0
2324 // D 1 1 0 1 D 1 1 0 1 E 1 1 1 0 E 1 1 1 0
2325 // E 1 1 1 0 7 0 1 1 1 7 0 1 1 1 7 0 1 1 1
2326 // F 1 1 1 1 F 1 1 1 1 F 1 1 1 1 F 1 1 1 1
2327
2328 // There is one special case that is not a perfect shuffle, but can be
2329 // turned into one easily: when the shuffle operates on a vector pair,
2330 // but the two vectors in the pair are swapped. The code that identifies
2331 // perfect shuffles will reject it, unless the order is reversed.
2332 shuffles::MaskT MaskStorage(SM.Mask);
2333 bool InvertedPair = false;
2334 if (HavePairs && SM.Mask[0] >= int(HwLen)) {
2335 for (int i = 0, e = SM.Mask.size(); i != e; ++i) {
2336 int M = SM.Mask[i];
2337 MaskStorage[i] = M >= int(HwLen) ? M - HwLen : M + HwLen;
2338 }
2339 InvertedPair = true;
2340 SM = ShuffleMask(MaskStorage);
2341 }
2342
2343 auto Comps = getPerfectCompletions(SM, LogLen);
2344 if (llvm::is_contained(Comps, 0))
2345 return OpRef::fail();
2346
2347 auto Pick = completeToPerfect(Comps, LogLen);
2348 for (unsigned I = 0; I != LogLen; ++I)
2349 Perm[I] = Log2_32(Pick[I]);
2350
2351 // Once we have Perm, represent it as cycles. Denote the maximum log2
2352 // (equal to log2(VecLen)-1) as M. The cycle containing M can then be
2353 // written as (M a1 a2 a3 ... an). That cycle can be broken up into
2354 // simple swaps as (M a1)(M a2)(M a3)...(M an), with the composition
2355 // order being from left to right. Any (contiguous) segment where the
2356 // values ai, ai+1...aj are either all increasing or all decreasing,
2357 // can be implemented via a single vshuffvdd/vdealvdd respectively.
2358 //
2359 // If there is a cycle (a1 a2 ... an) that does not involve M, it can
2360 // be written as (M an)(a1 a2 ... an)(M a1). The first two cycles can
2361 // then be folded to get (M a1 a2 ... an)(M a1), and the above procedure
2362 // can be used to generate a sequence of vshuffvdd/vdealvdd.
2363 //
2364 // Example:
2365 // Assume M = 4 and consider a permutation (0 1)(2 3). It can be written
2366 // as (4 0 1)(4 0) composed with (4 2 3)(4 2), or simply
2367 // (4 0 1)(4 0)(4 2 3)(4 2).
2368 // It can then be expanded into swaps as
2369 // (4 0)(4 1)(4 0)(4 2)(4 3)(4 2),
2370 // and broken up into "increasing" segments as
2371 // [(4 0)(4 1)] [(4 0)(4 2)(4 3)] [(4 2)].
2372 // This is equivalent to
2373 // (4 0 1)(4 0 2 3)(4 2),
2374 // which can be implemented as 3 vshufvdd instructions.
2375
2376 using CycleType = SmallVector<unsigned, 8>;
2377 std::set<CycleType> Cycles;
2378 std::set<unsigned> All;
2379
2380 for (unsigned I : Perm)
2381 All.insert(I);
2382
2383 // If the cycle contains LogLen-1, move it to the front of the cycle.
2384 // Otherwise, return the cycle unchanged.
2385 auto canonicalize = [LogLen](const CycleType &C) -> CycleType {
2386 unsigned LogPos, N = C.size();
2387 for (LogPos = 0; LogPos != N; ++LogPos)
2388 if (C[LogPos] == LogLen - 1)
2389 break;
2390 if (LogPos == N)
2391 return C;
2392
2393 CycleType NewC(C.begin() + LogPos, C.end());
2394 NewC.append(C.begin(), C.begin() + LogPos);
2395 return NewC;
2396 };
2397
2398 auto pfs = [](const std::set<CycleType> &Cs, unsigned Len) {
2399 // Ordering: shuff: 5 0 1 2 3 4, deal: 5 4 3 2 1 0 (for Log=6),
2400 // for bytes zero is included, for halfwords is not.
2401 if (Cs.size() != 1)
2402 return 0u;
2403 const CycleType &C = *Cs.begin();
2404 if (C[0] != Len - 1)
2405 return 0u;
2406 int D = Len - C.size();
2407 if (D != 0 && D != 1)
2408 return 0u;
2409
2410 bool IsDeal = true, IsShuff = true;
2411 for (unsigned I = 1; I != Len - D; ++I) {
2412 if (C[I] != Len - 1 - I)
2413 IsDeal = false;
2414 if (C[I] != I - (1 - D)) // I-1, I
2415 IsShuff = false;
2416 }
2417 // At most one, IsDeal or IsShuff, can be non-zero.
2418 assert(!(IsDeal || IsShuff) || IsDeal != IsShuff);
2419 static unsigned Deals[] = {Hexagon::V6_vdealb, Hexagon::V6_vdealh};
2420 static unsigned Shufs[] = {Hexagon::V6_vshuffb, Hexagon::V6_vshuffh};
2421 return IsDeal ? Deals[D] : (IsShuff ? Shufs[D] : 0);
2422 };
2423
2424 while (!All.empty()) {
2425 unsigned A = *All.begin();
2426 All.erase(A);
2427 CycleType C;
2428 C.push_back(A);
2429 for (unsigned B = Perm[A]; B != A; B = Perm[B]) {
2430 C.push_back(B);
2431 All.erase(B);
2432 }
2433 if (C.size() <= 1)
2434 continue;
2435 Cycles.insert(canonicalize(C));
2436 }
2437
2438 MVT SingleTy = getSingleVT(MVT::i8);
2439 MVT PairTy = getPairVT(MVT::i8);
2440
2441 // Recognize patterns for V6_vdeal{b,h} and V6_vshuff{b,h}.
2442 if (unsigned(VecLen) == HwLen) {
2443 if (unsigned SingleOpc = pfs(Cycles, LogLen)) {
2444 Results.push(SingleOpc, SingleTy, {Va});
2445 return OpRef::res(Results.top());
2446 }
2447 }
2448
2449 // From the cycles, construct the sequence of values that will
2450 // then form the control values for vdealvdd/vshuffvdd, i.e.
2451 // (M a1 a2)(M a3 a4 a5)... -> a1 a2 a3 a4 a5
2452 // This essentially strips the M value from the cycles where
2453 // it's present, and performs the insertion of M (then stripping)
2454 // for cycles without M (as described in an earlier comment).
2455 SmallVector<unsigned, 8> SwapElems;
2456 // When the input is extended (i.e. single vector becomes a pair),
2457 // this is done by using an "undef" vector as the second input.
2458 // However, then we get
2459 // input 1: GOODBITS
2460 // input 2: ........
2461 // but we need
2462 // input 1: ....BITS
2463 // input 2: ....GOOD
2464 // Then at the end, this needs to be undone. To accomplish this,
2465 // artificially add "LogLen-1" at both ends of the sequence.
2466 if (!HavePairs)
2467 SwapElems.push_back(LogLen - 1);
2468 for (const CycleType &C : Cycles) {
2469 // Do the transformation: (a1..an) -> (M a1..an)(M a1).
2470 unsigned First = (C[0] == LogLen - 1) ? 1 : 0;
2471 SwapElems.append(C.begin() + First, C.end());
2472 if (First == 0)
2473 SwapElems.push_back(C[0]);
2474 }
2475 if (!HavePairs)
2476 SwapElems.push_back(LogLen - 1);
2477
2478 const SDLoc &dl(Results.InpNode);
2479 OpRef Arg = HavePairs ? Va : concats(Va, OpRef::undef(SingleTy), Results);
2480 if (InvertedPair)
2481 Arg = concats(OpRef::hi(Arg), OpRef::lo(Arg), Results);
2482
2483 for (unsigned I = 0, E = SwapElems.size(); I != E;) {
2484 bool IsInc = I == E - 1 || SwapElems[I] < SwapElems[I + 1];
2485 unsigned S = (1u << SwapElems[I]);
2486 if (I < E - 1) {
2487 while (++I < E - 1 && IsInc == (SwapElems[I] < SwapElems[I + 1]))
2488 S |= 1u << SwapElems[I];
2489 // The above loop will not add a bit for the final SwapElems[I+1],
2490 // so add it here.
2491 S |= 1u << SwapElems[I];
2492 }
2493 ++I;
2494
2495 NodeTemplate Res;
2496 Results.push(Hexagon::A2_tfrsi, MVT::i32, {getConst32(S, dl)});
2497 Res.Opc = IsInc ? Hexagon::V6_vshuffvdd : Hexagon::V6_vdealvdd;
2498 Res.Ty = PairTy;
2499 Res.Ops = {OpRef::hi(Arg), OpRef::lo(Arg), OpRef::res(-1)};
2500 Results.push(Res);
2501 Arg = OpRef::res(Results.top());
2502 }
2503
2504 return HavePairs ? Arg : OpRef::lo(Arg);
2505}
2506
2507OpRef HvxSelector::butterfly(ShuffleMask SM, OpRef Va, ResultStack &Results) {
2508 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
2509 // Butterfly shuffles.
2510 //
2511 // V6_vdelta
2512 // V6_vrdelta
2513 // V6_vror
2514
2515 // The assumption here is that all elements picked by Mask are in the
2516 // first operand to the vector_shuffle. This assumption is enforced
2517 // by the caller.
2518
2519 MVT ResTy = getSingleVT(MVT::i8);
2520 PermNetwork::Controls FC, RC;
2521 const SDLoc &dl(Results.InpNode);
2522 int VecLen = SM.Mask.size();
2523
2524 for (int M : SM.Mask) {
2525 if (M != -1 && M >= VecLen)
2526 return OpRef::fail();
2527 }
2528
2529 // Try the deltas/benes for both single vectors and vector pairs.
2530 ForwardDeltaNetwork FN(SM.Mask);
2531 if (FN.run(FC)) {
2532 SDValue Ctl = getVectorConstant(FC, dl);
2533 Results.push(Hexagon::V6_vdelta, ResTy, {Va, OpRef(Ctl)});
2534 return OpRef::res(Results.top());
2535 }
2536
2537 // Try reverse delta.
2538 ReverseDeltaNetwork RN(SM.Mask);
2539 if (RN.run(RC)) {
2540 SDValue Ctl = getVectorConstant(RC, dl);
2541 Results.push(Hexagon::V6_vrdelta, ResTy, {Va, OpRef(Ctl)});
2542 return OpRef::res(Results.top());
2543 }
2544
2545 // Do Benes.
2546 BenesNetwork BN(SM.Mask);
2547 if (BN.run(FC, RC)) {
2548 SDValue CtlF = getVectorConstant(FC, dl);
2549 SDValue CtlR = getVectorConstant(RC, dl);
2550 Results.push(Hexagon::V6_vdelta, ResTy, {Va, OpRef(CtlF)});
2551 Results.push(Hexagon::V6_vrdelta, ResTy,
2552 {OpRef::res(-1), OpRef(CtlR)});
2553 return OpRef::res(Results.top());
2554 }
2555
2556 return OpRef::fail();
2557}
2558
2559SDValue HvxSelector::getConst32(int Val, const SDLoc &dl) {
2560 return DAG.getTargetConstant(Val, dl, MVT::i32);
2561}
2562
2563SDValue HvxSelector::getVectorConstant(ArrayRef<uint8_t> Data,
2564 const SDLoc &dl) {
2566 for (uint8_t C : Data)
2567 Elems.push_back(DAG.getConstant(C, dl, MVT::i8));
2568 MVT VecTy = MVT::getVectorVT(MVT::i8, Data.size());
2569 SDValue BV = DAG.getBuildVector(VecTy, dl, Elems);
2570 SDValue LV = Lower.LowerOperation(BV, DAG);
2572 return DAG.getNode(HexagonISD::ISEL, dl, VecTy, LV);
2573}
2574
2576 SDValue Inp = N->getOperand(0);
2577 MVT ResTy = N->getValueType(0).getSimpleVT();
2578 auto IdxN = cast<ConstantSDNode>(N->getOperand(1));
2579 unsigned Idx = IdxN->getZExtValue();
2580
2581 [[maybe_unused]] MVT InpTy = Inp.getValueType().getSimpleVT();
2582 [[maybe_unused]] unsigned ResLen = ResTy.getVectorNumElements();
2584 assert(2 * ResLen == InpTy.getVectorNumElements());
2585 assert(Idx == 0 || Idx == ResLen);
2586
2587 unsigned SubReg = Idx == 0 ? Hexagon::vsub_lo : Hexagon::vsub_hi;
2588 SDValue Ext = DAG.getTargetExtractSubreg(SubReg, SDLoc(N), ResTy, Inp);
2589
2590 ISel.ReplaceNode(N, Ext.getNode());
2591}
2592
2594 DEBUG_WITH_TYPE("isel", {
2595 dbgs() << "Starting " << __func__ << " on node:\n";
2596 N->dump(&DAG);
2597 });
2598 MVT ResTy = N->getValueType(0).getSimpleVT();
2599 // Assume that vector shuffles operate on vectors of bytes.
2600 assert(ResTy.isVector() && ResTy.getVectorElementType() == MVT::i8);
2601
2602 auto *SN = cast<ShuffleVectorSDNode>(N);
2603 std::vector<int> Mask(SN->getMask().begin(), SN->getMask().end());
2604 // This shouldn't really be necessary. Is it?
2605 for (int &Idx : Mask)
2606 if (Idx != -1 && Idx < 0)
2607 Idx = -1;
2608
2609 unsigned VecLen = Mask.size();
2610 bool HavePairs = (2*HwLen == VecLen);
2611 assert(ResTy.getSizeInBits() / 8 == VecLen);
2612
2613 // Vd = vector_shuffle Va, Vb, Mask
2614 //
2615
2616 bool UseLeft = false, UseRight = false;
2617 for (unsigned I = 0; I != VecLen; ++I) {
2618 if (Mask[I] == -1)
2619 continue;
2620 unsigned Idx = Mask[I];
2621 assert(Idx < 2*VecLen);
2622 if (Idx < VecLen)
2623 UseLeft = true;
2624 else
2625 UseRight = true;
2626 }
2627
2628 DEBUG_WITH_TYPE("isel", {
2629 dbgs() << "VecLen=" << VecLen << " HwLen=" << HwLen << " UseLeft="
2630 << UseLeft << " UseRight=" << UseRight << " HavePairs="
2631 << HavePairs << '\n';
2632 });
2633 // If the mask is all -1's, generate "undef".
2634 if (!UseLeft && !UseRight) {
2635 ISel.ReplaceNode(N, ISel.selectUndef(SDLoc(SN), ResTy).getNode());
2636 return;
2637 }
2638
2639 SDValue Vec0 = N->getOperand(0);
2640 SDValue Vec1 = N->getOperand(1);
2641 assert(Vec0.getValueType() == ResTy && Vec1.getValueType() == ResTy);
2642
2643 ResultStack Results(SN);
2644 OpRef Va = OpRef::undef(ResTy);
2645 OpRef Vb = OpRef::undef(ResTy);
2646
2647 if (!Vec0.isUndef()) {
2648 Results.push(TargetOpcode::COPY, ResTy, {Vec0});
2649 Va = OpRef::OpRef::res(Results.top());
2650 }
2651 if (!Vec1.isUndef()) {
2652 Results.push(TargetOpcode::COPY, ResTy, {Vec1});
2653 Vb = OpRef::res(Results.top());
2654 }
2655
2656 OpRef Res = !HavePairs ? shuffs2(ShuffleMask(Mask), Va, Vb, Results)
2657 : shuffp2(ShuffleMask(Mask), Va, Vb, Results);
2658
2659 bool Done = Res.isValid();
2660 if (Done) {
2661 // Make sure that Res is on the stack before materializing.
2662 Results.push(TargetOpcode::COPY, ResTy, {Res});
2663 materialize(Results);
2664 } else {
2665 Done = scalarizeShuffle(Mask, SDLoc(N), ResTy, Vec0, Vec1, N);
2666 }
2667
2668 if (!Done) {
2669#ifndef NDEBUG
2670 dbgs() << "Unhandled shuffle:\n";
2671 SN->dumpr(&DAG);
2672#endif
2673 llvm_unreachable("Failed to select vector shuffle");
2674 }
2675}
2676
2678 // If this is a rotation by less than 8, use V6_valignbi.
2679 MVT Ty = N->getValueType(0).getSimpleVT();
2680 const SDLoc &dl(N);
2681 SDValue VecV = N->getOperand(0);
2682 SDValue RotV = N->getOperand(1);
2683 SDNode *NewN = nullptr;
2684
2685 if (auto *CN = dyn_cast<ConstantSDNode>(RotV.getNode())) {
2686 unsigned S = CN->getZExtValue() % HST.getVectorLength();
2687 if (S == 0) {
2688 NewN = VecV.getNode();
2689 } else if (isUInt<3>(S)) {
2690 NewN = DAG.getMachineNode(Hexagon::V6_valignbi, dl, Ty,
2691 {VecV, VecV, getConst32(S, dl)});
2692 }
2693 }
2694
2695 if (!NewN)
2696 NewN = DAG.getMachineNode(Hexagon::V6_vror, dl, Ty, {VecV, RotV});
2697
2698 ISel.ReplaceNode(N, NewN);
2699}
2700
2702 SDValue Vv = N->getOperand(0);
2703 SDValue Vu = N->getOperand(1);
2704 SDValue Rt = N->getOperand(2);
2705 SDNode *NewN = DAG.getMachineNode(Hexagon::V6_valignb, SDLoc(N),
2706 N->getValueType(0), {Vv, Vu, Rt});
2707 ISel.ReplaceNode(N, NewN);
2709}
2710
2711void HexagonDAGToDAGISel::PreprocessHvxISelDAG() {
2712 auto getNodes = [this]() -> std::vector<SDNode *> {
2713 std::vector<SDNode *> T;
2714 T.reserve(CurDAG->allnodes_size());
2715 for (SDNode &N : CurDAG->allnodes())
2716 T.push_back(&N);
2717 return T;
2718 };
2719
2720 ppHvxShuffleOfShuffle(getNodes());
2721}
2722
2723template <> struct std::hash<SDValue> {
2724 std::size_t operator()(SDValue V) const {
2725 return std::hash<const void *>()(V.getNode()) +
2726 std::hash<unsigned>()(V.getResNo());
2727 };
2728};
2729
2730void HexagonDAGToDAGISel::ppHvxShuffleOfShuffle(std::vector<SDNode *> &&Nodes) {
2731 // Motivating case:
2732 // t10: v64i32 = ...
2733 // t46: v128i8 = vector_shuffle<...> t44, t45
2734 // t48: v128i8 = vector_shuffle<...> t44, t45
2735 // t42: v128i8 = vector_shuffle<...> t46, t48
2736 // t12: v32i32 = extract_subvector t10, Constant:i32<0>
2737 // t44: v128i8 = bitcast t12
2738 // t15: v32i32 = extract_subvector t10, Constant:i32<32>
2739 // t45: v128i8 = bitcast t15
2740 SelectionDAG &DAG = *CurDAG;
2741 unsigned HwLen = HST->getVectorLength();
2742
2743 struct SubVectorInfo {
2744 SubVectorInfo(SDValue S, unsigned H) : Src(S), HalfIdx(H) {}
2745 SDValue Src;
2746 unsigned HalfIdx;
2747 };
2748
2749 using MapType = std::unordered_map<SDValue, unsigned>;
2750
2751 auto getMaskElt = [&](unsigned Idx, ShuffleVectorSDNode *Shuff0,
2752 ShuffleVectorSDNode *Shuff1,
2753 const MapType &OpMap) -> int {
2754 // Treat Shuff0 and Shuff1 as operands to another vector shuffle, and
2755 // Idx as a (non-undef) element of the top level shuffle's mask, that
2756 // is, index into concat(Shuff0, Shuff1).
2757 // Assuming that Shuff0 and Shuff1 both operate on subvectors of the
2758 // same source vector (as described by OpMap), return the index of
2759 // that source vector corresponding to Idx.
2760 ShuffleVectorSDNode *OpShuff = Idx < HwLen ? Shuff0 : Shuff1;
2761 if (Idx >= HwLen)
2762 Idx -= HwLen;
2763
2764 // Get the mask index that M points at in the corresponding operand.
2765 int MaybeN = OpShuff->getMaskElt(Idx);
2766 if (MaybeN < 0)
2767 return -1;
2768
2769 auto N = static_cast<unsigned>(MaybeN);
2770 unsigned SrcBase = N < HwLen ? OpMap.at(OpShuff->getOperand(0))
2771 : OpMap.at(OpShuff->getOperand(1));
2772 if (N >= HwLen)
2773 N -= HwLen;
2774
2775 return N + SrcBase;
2776 };
2777
2778 auto fold3 = [&](SDValue TopShuff, SDValue Inp, MapType &&OpMap) -> SDValue {
2779 // Fold all 3 shuffles into a single one.
2780 auto *This = cast<ShuffleVectorSDNode>(TopShuff);
2781 auto *S0 = cast<ShuffleVectorSDNode>(TopShuff.getOperand(0));
2782 auto *S1 = cast<ShuffleVectorSDNode>(TopShuff.getOperand(1));
2783 ArrayRef<int> TopMask = This->getMask();
2784 // This should be guaranteed by type checks in the caller, and the fact
2785 // that all shuffles should have been promoted to operate on MVT::i8.
2786 assert(TopMask.size() == S0->getMask().size() &&
2787 TopMask.size() == S1->getMask().size());
2788 assert(TopMask.size() == HwLen);
2789
2790 SmallVector<int, 256> FoldedMask(2 * HwLen);
2791 for (unsigned I = 0; I != HwLen; ++I) {
2792 int MaybeM = TopMask[I];
2793 if (MaybeM >= 0) {
2794 FoldedMask[I] =
2795 getMaskElt(static_cast<unsigned>(MaybeM), S0, S1, OpMap);
2796 } else {
2797 FoldedMask[I] = -1;
2798 }
2799 }
2800 // The second half of the result will be all-undef.
2801 std::fill(FoldedMask.begin() + HwLen, FoldedMask.end(), -1);
2802
2803 // Return
2804 // FoldedShuffle = (Shuffle Inp, undef, FoldedMask)
2805 // (LoHalf FoldedShuffle)
2806 const SDLoc &dl(TopShuff);
2807 MVT SingleTy = MVT::getVectorVT(MVT::i8, HwLen);
2808 MVT PairTy = MVT::getVectorVT(MVT::i8, 2 * HwLen);
2809 SDValue FoldedShuff =
2810 DAG.getVectorShuffle(PairTy, dl, DAG.getBitcast(PairTy, Inp),
2811 DAG.getUNDEF(PairTy), FoldedMask);
2812 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SingleTy, FoldedShuff,
2813 DAG.getConstant(0, dl, MVT::i32));
2814 };
2815
2816 auto getSourceInfo = [](SDValue V) -> std::optional<SubVectorInfo> {
2817 while (V.getOpcode() == ISD::BITCAST)
2818 V = V.getOperand(0);
2819 if (V.getOpcode() != ISD::EXTRACT_SUBVECTOR)
2820 return std::nullopt;
2821 return SubVectorInfo(V.getOperand(0),
2822 !cast<ConstantSDNode>(V.getOperand(1))->isZero());
2823 };
2824
2825 for (SDNode *N : Nodes) {
2826 if (N->getOpcode() != ISD::VECTOR_SHUFFLE)
2827 continue;
2828 EVT ResTy = N->getValueType(0);
2829 if (ResTy.getVectorElementType() != MVT::i8)
2830 continue;
2831 if (ResTy.getVectorNumElements() != HwLen)
2832 continue;
2833
2834 SDValue V0 = N->getOperand(0);
2835 SDValue V1 = N->getOperand(1);
2836 if (V0.getOpcode() != ISD::VECTOR_SHUFFLE)
2837 continue;
2838 if (V1.getOpcode() != ISD::VECTOR_SHUFFLE)
2839 continue;
2840 if (V0.getValueType() != ResTy || V1.getValueType() != ResTy)
2841 continue;
2842
2843 // Check if all operands of the two operand shuffles are extract_subvectors
2844 // from the same vector pair.
2845 auto V0A = getSourceInfo(V0.getOperand(0));
2846 if (!V0A.has_value())
2847 continue;
2848 auto V0B = getSourceInfo(V0.getOperand(1));
2849 if (!V0B.has_value() || V0B->Src != V0A->Src)
2850 continue;
2851 auto V1A = getSourceInfo(V1.getOperand(0));
2852 if (!V1A.has_value() || V1A->Src != V0A->Src)
2853 continue;
2854 auto V1B = getSourceInfo(V1.getOperand(1));
2855 if (!V1B.has_value() || V1B->Src != V0A->Src)
2856 continue;
2857
2858 // The source must be a pair. This should be guaranteed here,
2859 // but check just in case.
2860 assert(V0A->Src.getValueType().getSizeInBits() == 16 * HwLen);
2861
2862 MapType OpMap = {
2863 {V0.getOperand(0), V0A->HalfIdx * HwLen},
2864 {V0.getOperand(1), V0B->HalfIdx * HwLen},
2865 {V1.getOperand(0), V1A->HalfIdx * HwLen},
2866 {V1.getOperand(1), V1B->HalfIdx * HwLen},
2867 };
2868 SDValue NewS = fold3(SDValue(N, 0), V0A->Src, std::move(OpMap));
2869 ReplaceNode(N, NewS.getNode());
2870 }
2871}
2872
2873void HexagonDAGToDAGISel::SelectHvxExtractSubvector(SDNode *N) {
2874 HvxSelector(*this, *CurDAG).selectExtractSubvector(N);
2875}
2876
2877void HexagonDAGToDAGISel::SelectHvxShuffle(SDNode *N) {
2878 HvxSelector(*this, *CurDAG).selectShuffle(N);
2879}
2880
2881void HexagonDAGToDAGISel::SelectHvxRor(SDNode *N) {
2882 HvxSelector(*this, *CurDAG).selectRor(N);
2883}
2884
2885void HexagonDAGToDAGISel::SelectHvxVAlign(SDNode *N) {
2886 HvxSelector(*this, *CurDAG).selectVAlign(N);
2887}
2888
2890 const SDLoc &dl(N);
2891 SDValue Chain = N->getOperand(0);
2892 SDValue Address = N->getOperand(2);
2893 SDValue Predicate = N->getOperand(3);
2894 SDValue Base = N->getOperand(4);
2895 SDValue Modifier = N->getOperand(5);
2896 SDValue Offset = N->getOperand(6);
2897 SDValue ImmOperand = CurDAG->getTargetConstant(0, dl, MVT::i32);
2898
2899 unsigned Opcode;
2900 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
2901 switch (IntNo) {
2902 default:
2903 llvm_unreachable("Unexpected HVX gather intrinsic.");
2904 case Intrinsic::hexagon_V6_vgathermhq:
2905 case Intrinsic::hexagon_V6_vgathermhq_128B:
2906 Opcode = Hexagon::V6_vgathermhq_pseudo;
2907 break;
2908 case Intrinsic::hexagon_V6_vgathermwq:
2909 case Intrinsic::hexagon_V6_vgathermwq_128B:
2910 Opcode = Hexagon::V6_vgathermwq_pseudo;
2911 break;
2912 case Intrinsic::hexagon_V6_vgathermhwq:
2913 case Intrinsic::hexagon_V6_vgathermhwq_128B:
2914 Opcode = Hexagon::V6_vgathermhwq_pseudo;
2915 break;
2916 }
2917
2918 SDVTList VTs = CurDAG->getVTList(MVT::Other);
2919 SDValue Ops[] = { Address, ImmOperand,
2920 Predicate, Base, Modifier, Offset, Chain };
2921 SDNode *Result = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
2922
2923 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2924 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
2925
2926 ReplaceNode(N, Result);
2927}
2928
2930 const SDLoc &dl(N);
2931 SDValue Chain = N->getOperand(0);
2932 SDValue Address = N->getOperand(2);
2933 SDValue Base = N->getOperand(3);
2934 SDValue Modifier = N->getOperand(4);
2935 SDValue Offset = N->getOperand(5);
2936 SDValue ImmOperand = CurDAG->getTargetConstant(0, dl, MVT::i32);
2937
2938 unsigned Opcode;
2939 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
2940 switch (IntNo) {
2941 default:
2942 llvm_unreachable("Unexpected HVX gather intrinsic.");
2943 case Intrinsic::hexagon_V6_vgathermh:
2944 case Intrinsic::hexagon_V6_vgathermh_128B:
2945 Opcode = Hexagon::V6_vgathermh_pseudo;
2946 break;
2947 case Intrinsic::hexagon_V6_vgathermw:
2948 case Intrinsic::hexagon_V6_vgathermw_128B:
2949 Opcode = Hexagon::V6_vgathermw_pseudo;
2950 break;
2951 case Intrinsic::hexagon_V6_vgathermhw:
2952 case Intrinsic::hexagon_V6_vgathermhw_128B:
2953 Opcode = Hexagon::V6_vgathermhw_pseudo;
2954 break;
2955 }
2956
2957 SDVTList VTs = CurDAG->getVTList(MVT::Other);
2958 SDValue Ops[] = { Address, ImmOperand, Base, Modifier, Offset, Chain };
2959 SDNode *Result = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
2960
2961 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2962 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
2963
2964 ReplaceNode(N, Result);
2965}
2966
2968 unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
2969 SDNode *Result;
2970 switch (IID) {
2971 case Intrinsic::hexagon_V6_vaddcarry: {
2972 std::array<SDValue, 3> Ops = {
2973 {N->getOperand(1), N->getOperand(2), N->getOperand(3)}};
2974 SDVTList VTs = CurDAG->getVTList(MVT::v16i32, MVT::v64i1);
2975 Result = CurDAG->getMachineNode(Hexagon::V6_vaddcarry, SDLoc(N), VTs, Ops);
2976 break;
2977 }
2978 case Intrinsic::hexagon_V6_vaddcarry_128B: {
2979 std::array<SDValue, 3> Ops = {
2980 {N->getOperand(1), N->getOperand(2), N->getOperand(3)}};
2981 SDVTList VTs = CurDAG->getVTList(MVT::v32i32, MVT::v128i1);
2982 Result = CurDAG->getMachineNode(Hexagon::V6_vaddcarry, SDLoc(N), VTs, Ops);
2983 break;
2984 }
2985 case Intrinsic::hexagon_V6_vsubcarry: {
2986 std::array<SDValue, 3> Ops = {
2987 {N->getOperand(1), N->getOperand(2), N->getOperand(3)}};
2988 SDVTList VTs = CurDAG->getVTList(MVT::v16i32, MVT::v64i1);
2989 Result = CurDAG->getMachineNode(Hexagon::V6_vsubcarry, SDLoc(N), VTs, Ops);
2990 break;
2991 }
2992 case Intrinsic::hexagon_V6_vsubcarry_128B: {
2993 std::array<SDValue, 3> Ops = {
2994 {N->getOperand(1), N->getOperand(2), N->getOperand(3)}};
2995 SDVTList VTs = CurDAG->getVTList(MVT::v32i32, MVT::v128i1);
2996 Result = CurDAG->getMachineNode(Hexagon::V6_vsubcarry, SDLoc(N), VTs, Ops);
2997 break;
2998 }
2999 default:
3000 llvm_unreachable("Unexpected HVX dual output intrinsic.");
3001 }
3002 ReplaceUses(N, Result);
3003 ReplaceUses(SDValue(N, 0), SDValue(Result, 0));
3004 ReplaceUses(SDValue(N, 1), SDValue(Result, 1));
3006}
unsigned SubReg
ReachingDefAnalysis InstSet InstSet & Ignore
Function Alias Analysis Results
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static void fail(const SDLoc &DL, SelectionDAG &DAG, const Twine &Msg, SDValue Val={})
This file implements the BitVector class.
static void push(SmallVectorImpl< uint64_t > &R, StringRef Str)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:510
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:184
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define DEBUG_WITH_TYPE(TYPE, X)
DEBUG_WITH_TYPE macro - This macro should be used by passes to emit debug information.
Definition: Debug.h:64
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
const HexagonInstrInfo * TII
static bool isIdentity(ArrayRef< int > Mask)
static std::pair< int, unsigned > findStrip(ArrayRef< int > A, int Inc, unsigned MaxLen)
static bool isUndef(ArrayRef< int > Mask)
static const HexagonSubtarget & getHexagonSubtarget(SelectionDAG &G)
static void splitMask(ArrayRef< int > Mask, MutableArrayRef< int > MaskL, MutableArrayRef< int > MaskR)
static void packSegmentMask(ArrayRef< int > Mask, ArrayRef< unsigned > OutSegMap, unsigned SegLen, MutableArrayRef< int > PackedMask)
static SmallVector< unsigned, 4 > getInputSegmentList(ShuffleMask SM, unsigned SegLen)
static const HexagonTargetLowering & getHexagonLowering(SelectionDAG &G)
static SmallVector< unsigned, 4 > getOutputSegmentMap(ShuffleMask SM, unsigned SegLen)
static bool isLowHalfOnly(ArrayRef< int > Mask)
static LVOptions Options
Definition: LVOptions.cpp:25
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
#define H(x, y, z)
Definition: MD5.cpp:57
std::pair< MCSymbol *, MachineModuleInfoImpl::StubValueTy > PairTy
nvptx lower args
#define P(N)
const NodeList & List
Definition: RDFGraph.cpp:201
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
This file implements a set that has insertion order iteration characteristics.
Stack Slot Coloring
@ None
xray Insert XRay ops
static constexpr uint32_t Opcode
Definition: aarch32.h:200
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
ArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
Definition: ArrayRef.h:228
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
Definition: ArrayRef.h:204
iterator end() const
Definition: ArrayRef.h:154
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
iterator begin() const
Definition: ArrayRef.h:153
ArrayRef< T > take_back(size_t N=1) const
Return a copy of *this with only the last N elements.
Definition: ArrayRef.h:235
const T * data() const
Definition: ArrayRef.h:162
BitVector & set()
Definition: BitVector.h:351
iterator_range< const_set_bits_iterator > set_bits() const
Definition: BitVector.h:140
This class represents an Operation in the Expression.
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:155
iterator end()
Definition: DenseMap.h:84
Tagged union holding either a T or a Error.
Definition: Error.h:474
bool empty() const
Definition: Function.h:800
void print(raw_ostream &OS, AssemblyAnnotationWriter *AAW=nullptr, bool ShouldPreserveUseListOrder=false, bool IsForDebug=false) const
Print the function to an output stream with an optional AssemblyAnnotationWriter.
Definition: AsmWriter.cpp:4649
size_t size() const
Definition: Function.h:799
void Select(SDNode *N) override
Main hook for targets to transform nodes into machine nodes.
unsigned getVectorLength() const
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
Machine Value Type.
SimpleValueType SimpleTy
unsigned getVectorNumElements() const
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
A description of a memory reference used in the backend.
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:307
MutableArrayRef< T > take_back(size_t N=1) const
Return a copy of *this with only the last N elements.
Definition: ArrayRef.h:419
iterator begin() const
Definition: ArrayRef.h:356
MutableArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
Definition: ArrayRef.h:412
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
iterator begin()
bool insert(SUnit *SU)
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:94
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
void dump() const
Dump this node, for debugging.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
iterator_range< use_iterator > uses()
void dumpr() const
Dump (recursively) this node and its use-def subgraph.
const SDValue & getOperand(unsigned Num) const
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
unsigned getOpcode() const
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:225
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:826
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand * > NewMemRefs)
Mutate the specified machine node's memory references to the provided list.
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
iterator_range< allnodes_iterator > allnodes()
Definition: SelectionDAG.h:543
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
ilist< SDNode >::size_type allnodes_size() const
Definition: SelectionDAG.h:539
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:674
LLVMContext * getContext() const
Definition: SelectionDAG.h:485
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
A vector that has set insertion semantics.
Definition: SetVector.h:57
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:98
Vector takeVector()
Clear the SetVector and return the underlying vector.
Definition: SetVector.h:87
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:264
bool empty() const
Determine if the SetVector is empty or not.
Definition: SetVector.h:93
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:162
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
static void commuteMask(MutableArrayRef< int > Mask)
Change values in a shuffle permute mask assuming the two vector operands have swapped position.
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:687
void resize(size_type N)
Definition: SmallVector.h:642
void push_back(const T &Elt)
Definition: SmallVector.h:416
pointer data()
Return a pointer to the vector's buffer, even if empty().
Definition: SmallVector.h:289
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
TargetInstrInfo - Interface to description of machine instruction set.
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:543
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:903
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:600
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:573
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:535
@ Invalid
Invalid file type.
Definition: InterfaceFile.h:55
@ Undef
Value of the register doesn't matter.
@ Switch
The "resume-switch" lowering, where there are separate resume and destroy functions that are shared b...
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
constexpr double e
Definition: MathExtras.h:31
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:228
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:237
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
@ Offset
Definition: DWP.cpp:440
@ Length
Definition: DWP.cpp:440
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1726
int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition: bit.h:384
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1684
@ Done
Definition: Threading.h:61
SmallVectorImpl< T >::const_pointer c_str(SmallVectorImpl< T > &str)
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition: STLExtras.h:2028
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:313
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:428
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:264
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1651
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:125
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
@ Invalid
Denotes invalid value.
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:292
constexpr int32_t SignExtend32(uint32_t X)
Sign-extend the number in the bottom B bits of X to a 32-bit integer.
Definition: MathExtras.h:436
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition: STLExtras.h:1925
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1883
MaskT vshuffvdd(ArrayRef< int > Vu, ArrayRef< int > Vv, unsigned Rt)
MaskT vpack(ArrayRef< int > Vu, ArrayRef< int > Vv, unsigned Size, bool TakeOdd)
ArrayRef< int > hi(ArrayRef< int > Vuu)
auto mask(ShuffFunc S, unsigned Length, OptArgs... args) -> MaskT
MaskT vshuff(ArrayRef< int > Vu, ArrayRef< int > Vv, unsigned Size, bool TakeOdd)
MaskT vdealb4w(ArrayRef< int > Vu, ArrayRef< int > Vv)
MaskT vdeal(ArrayRef< int > Vu, ArrayRef< int > Vv, unsigned Size, bool TakeOdd)
ArrayRef< int > lo(ArrayRef< int > Vuu)
MaskT vdealvdd(ArrayRef< int > Vu, ArrayRef< int > Vv, unsigned Rt)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
Extended Value Type.
Definition: ValueTypes.h:34
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:299
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:311
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:319
HvxSelector(HexagonDAGToDAGISel &HS, SelectionDAG &G)
MVT getSingleVT(MVT ElemTy) const
static SmallVector< uint32_t, 8 > completeToPerfect(ArrayRef< uint32_t > Completions, unsigned Width)
HexagonDAGToDAGISel & ISel
const HexagonTargetLowering & Lower
void selectExtractSubvector(SDNode *N)
static SmallVector< uint32_t, 8 > getPerfectCompletions(ShuffleMask SM, unsigned Width)
MVT getPairVT(MVT ElemTy) const
const HexagonSubtarget & HST
static std::optional< int > rotationDistance(ShuffleMask SM, unsigned WrapAt)
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
std::size_t operator()(SDValue V) const