LLVM 23.0.0git
HexagonISelDAGToDAGHVX.cpp
Go to the documentation of this file.
1//===-- HexagonISelDAGToDAGHVX.cpp ----------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
10#include "HexagonISelLowering.h"
11#include "llvm/ADT/BitVector.h"
12#include "llvm/ADT/SetVector.h"
14#include "llvm/IR/Intrinsics.h"
15#include "llvm/IR/IntrinsicsHexagon.h"
16#include "llvm/Support/Debug.h"
18
19#include <algorithm>
20#include <cmath>
21#include <deque>
22#include <functional>
23#include <map>
24#include <optional>
25#include <set>
26#include <utility>
27#include <vector>
28
29#define DEBUG_TYPE "hexagon-isel"
30using namespace llvm;
31
32namespace {
33
34// --------------------------------------------------------------------
35// Implementation of permutation networks.
36
37// Implementation of the node routing through butterfly networks:
38// - Forward delta.
39// - Reverse delta.
40// - Benes.
41//
42//
43// Forward delta network consists of log(N) steps, where N is the number
44// of inputs. In each step, an input can stay in place, or it can get
45// routed to another position[1]. The step after that consists of two
46// networks, each half in size in terms of the number of nodes. In those
47// terms, in the given step, an input can go to either the upper or the
48// lower network in the next step.
49//
50// [1] Hexagon's vdelta/vrdelta allow an element to be routed to both
51// positions as long as there is no conflict.
52
53// Here's a delta network for 8 inputs, only the switching routes are
54// shown:
55//
56// Steps:
57// |- 1 ---------------|- 2 -----|- 3 -|
58//
59// Inp[0] *** *** *** *** Out[0]
60// \ / \ / \ /
61// \ / \ / X
62// \ / \ / / \
63// Inp[1] *** \ / *** X *** *** Out[1]
64// \ \ / / \ / \ /
65// \ \ / / X X
66// \ \ / / / \ / \
67// Inp[2] *** \ \ / / *** X *** *** Out[2]
68// \ \ X / / / \ \ /
69// \ \ / \ / / / \ X
70// \ X X / / \ / \
71// Inp[3] *** \ / \ / \ / *** *** *** Out[3]
72// \ X X X /
73// \ / \ / \ / \ /
74// X X X X
75// / \ / \ / \ / \
76// / X X X \
77// Inp[4] *** / \ / \ / \ *** *** *** Out[4]
78// / X X \ \ / \ /
79// / / \ / \ \ \ / X
80// / / X \ \ \ / / \
81// Inp[5] *** / / \ \ *** X *** *** Out[5]
82// / / \ \ \ / \ /
83// / / \ \ X X
84// / / \ \ / \ / \
85// Inp[6] *** / \ *** X *** *** Out[6]
86// / \ / \ \ /
87// / \ / \ X
88// / \ / \ / \
89// Inp[7] *** *** *** *** Out[7]
90//
91//
92// Reverse delta network is same as delta network, with the steps in
93// the opposite order.
94//
95//
96// Benes network is a forward delta network immediately followed by
97// a reverse delta network.
98
99enum class ColorKind { None, Red, Black };
100
101// Graph coloring utility used to partition nodes into two groups:
102// they will correspond to nodes routed to the upper and lower networks.
103struct Coloring {
104 using Node = int;
105 using MapType = std::map<Node, ColorKind>;
106 static constexpr Node Ignore = Node(-1);
107
108 Coloring(ArrayRef<Node> Ord) : Order(Ord) {
109 build();
110 if (!color())
111 Colors.clear();
112 }
113
114 const MapType &colors() const {
115 return Colors;
116 }
117
118 ColorKind other(ColorKind Color) {
119 if (Color == ColorKind::None)
120 return ColorKind::Red;
121 return Color == ColorKind::Red ? ColorKind::Black : ColorKind::Red;
122 }
123
124 LLVM_DUMP_METHOD void dump() const;
125
126private:
127 ArrayRef<Node> Order;
128 MapType Colors;
129 std::set<Node> Needed;
130
131 using NodeSet = std::set<Node>;
132 std::map<Node,NodeSet> Edges;
133
134 Node conj(Node Pos) {
135 Node Num = Order.size();
136 return (Pos < Num/2) ? Pos + Num/2 : Pos - Num/2;
137 }
138
139 ColorKind getColor(Node N) {
140 auto F = Colors.find(N);
141 return F != Colors.end() ? F->second : ColorKind::None;
142 }
143
144 std::pair<bool, ColorKind> getUniqueColor(const NodeSet &Nodes);
145
146 void build();
147 bool color();
148};
149} // namespace
150
151std::pair<bool, ColorKind> Coloring::getUniqueColor(const NodeSet &Nodes) {
152 auto Color = ColorKind::None;
153 for (Node N : Nodes) {
154 ColorKind ColorN = getColor(N);
155 if (ColorN == ColorKind::None)
156 continue;
157 if (Color == ColorKind::None)
158 Color = ColorN;
159 else if (Color != ColorKind::None && Color != ColorN)
160 return { false, ColorKind::None };
161 }
162 return { true, Color };
163}
164
165void Coloring::build() {
166 // Add Order[P] and Order[conj(P)] to Edges.
167 for (unsigned P = 0; P != Order.size(); ++P) {
168 Node I = Order[P];
169 if (I != Ignore) {
170 Needed.insert(I);
171 Node PC = Order[conj(P)];
172 if (PC != Ignore && PC != I)
173 Edges[I].insert(PC);
174 }
175 }
176 // Add I and conj(I) to Edges.
177 for (unsigned I = 0; I != Order.size(); ++I) {
178 if (!Needed.count(I))
179 continue;
180 Node C = conj(I);
181 // This will create an entry in the edge table, even if I is not
182 // connected to any other node. This is necessary, because it still
183 // needs to be colored.
184 NodeSet &Is = Edges[I];
185 if (Needed.count(C))
186 Is.insert(C);
187 }
188}
189
190bool Coloring::color() {
191 SetVector<Node> FirstQ;
192 auto Enqueue = [this,&FirstQ] (Node N) {
193 SetVector<Node> Q;
194 Q.insert(N);
195 for (unsigned I = 0; I != Q.size(); ++I) {
196 NodeSet &Ns = Edges[Q[I]];
197 Q.insert_range(Ns);
198 }
199 FirstQ.insert_range(Q);
200 };
201 for (Node N : Needed)
202 Enqueue(N);
203
204 for (Node N : FirstQ) {
205 if (Colors.count(N))
206 continue;
207 NodeSet &Ns = Edges[N];
208 auto P = getUniqueColor(Ns);
209 if (!P.first)
210 return false;
211 Colors[N] = other(P.second);
212 }
213
214 // First, color nodes that don't have any dups.
215 for (auto E : Edges) {
216 Node N = E.first;
217 if (!Needed.count(conj(N)) || Colors.count(N))
218 continue;
219 auto P = getUniqueColor(E.second);
220 if (!P.first)
221 return false;
222 Colors[N] = other(P.second);
223 }
224
225 // Now, nodes that are still uncolored. Since the graph can be modified
226 // in this step, create a work queue.
227 std::vector<Node> WorkQ;
228 for (auto E : Edges) {
229 Node N = E.first;
230 if (!Colors.count(N))
231 WorkQ.push_back(N);
232 }
233
234 for (Node N : WorkQ) {
235 NodeSet &Ns = Edges[N];
236 auto P = getUniqueColor(Ns);
237 if (P.first) {
238 Colors[N] = other(P.second);
239 continue;
240 }
241
242 // Coloring failed. Split this node.
243 Node C = conj(N);
244 ColorKind ColorN = other(ColorKind::None);
245 ColorKind ColorC = other(ColorN);
246 NodeSet &Cs = Edges[C];
247 NodeSet CopyNs = Ns;
248 for (Node M : CopyNs) {
249 ColorKind ColorM = getColor(M);
250 if (ColorM == ColorC) {
251 // Connect M with C, disconnect M from N.
252 Cs.insert(M);
253 Edges[M].insert(C);
254 Ns.erase(M);
255 Edges[M].erase(N);
256 }
257 }
258 Colors[N] = ColorN;
259 Colors[C] = ColorC;
260 }
261
262 // Explicitly assign "None" to all uncolored nodes.
263 for (unsigned I = 0; I != Order.size(); ++I)
264 Colors.try_emplace(I, ColorKind::None);
265
266 return true;
267}
268
269#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
270void Coloring::dump() const {
271 dbgs() << "{ Order: {";
272 for (Node P : Order) {
273 if (P != Ignore)
274 dbgs() << ' ' << P;
275 else
276 dbgs() << " -";
277 }
278 dbgs() << " }\n";
279 dbgs() << " Needed: {";
280 for (Node N : Needed)
281 dbgs() << ' ' << N;
282 dbgs() << " }\n";
283
284 dbgs() << " Edges: {\n";
285 for (auto E : Edges) {
286 dbgs() << " " << E.first << " -> {";
287 for (auto N : E.second)
288 dbgs() << ' ' << N;
289 dbgs() << " }\n";
290 }
291 dbgs() << " }\n";
292
293 auto ColorKindToName = [](ColorKind C) {
294 switch (C) {
295 case ColorKind::None:
296 return "None";
297 case ColorKind::Red:
298 return "Red";
299 case ColorKind::Black:
300 return "Black";
301 }
302 llvm_unreachable("all ColorKinds should be handled by the switch above");
303 };
304
305 dbgs() << " Colors: {\n";
306 for (auto C : Colors)
307 dbgs() << " " << C.first << " -> " << ColorKindToName(C.second) << "\n";
308 dbgs() << " }\n}\n";
309}
310#endif
311
312namespace {
313// Base class of for reordering networks. They don't strictly need to be
314// permutations, as outputs with repeated occurrences of an input element
315// are allowed.
316struct PermNetwork {
317 using Controls = std::vector<uint8_t>;
318 using ElemType = int;
319 static constexpr ElemType Ignore = ElemType(-1);
320
321 enum : uint8_t {
322 None,
323 Pass,
324 Switch
325 };
326 enum : uint8_t {
327 Forward,
328 Reverse
329 };
330
331 PermNetwork(ArrayRef<ElemType> Ord, unsigned Mult = 1) {
332 Order.assign(Ord.data(), Ord.data()+Ord.size());
333 Log = 0;
334
335 unsigned S = Order.size();
336 while (S >>= 1)
337 ++Log;
338
339 Table.resize(Order.size());
340 for (RowType &Row : Table)
341 Row.resize(Mult*Log, None);
342 }
343
344 void getControls(Controls &V, unsigned StartAt, uint8_t Dir) const {
345 unsigned Size = Order.size();
346 V.resize(Size);
347 for (unsigned I = 0; I != Size; ++I) {
348 unsigned W = 0;
349 for (unsigned L = 0; L != Log; ++L) {
350 unsigned C = ctl(I, StartAt+L) == Switch;
351 if (Dir == Forward)
352 W |= C << (Log-1-L);
353 else
354 W |= C << L;
355 }
356 assert(isUInt<8>(W));
357 V[I] = uint8_t(W);
358 }
359 }
360
361 uint8_t ctl(ElemType Pos, unsigned Step) const {
362 return Table[Pos][Step];
363 }
364 unsigned size() const {
365 return Order.size();
366 }
367 unsigned steps() const {
368 return Log;
369 }
370
371protected:
372 unsigned Log;
373 std::vector<ElemType> Order;
374 using RowType = std::vector<uint8_t>;
375 std::vector<RowType> Table;
376};
377
378struct ForwardDeltaNetwork : public PermNetwork {
379 ForwardDeltaNetwork(ArrayRef<ElemType> Ord) : PermNetwork(Ord) {}
380
381 bool run(Controls &V) {
382 if (!route(Order.data(), Table.data(), size(), 0))
383 return false;
384 getControls(V, 0, Forward);
385 return true;
386 }
387
388private:
389 bool route(ElemType *P, RowType *T, unsigned Size, unsigned Step);
390};
391
392struct ReverseDeltaNetwork : public PermNetwork {
393 ReverseDeltaNetwork(ArrayRef<ElemType> Ord) : PermNetwork(Ord) {}
394
395 bool run(Controls &V) {
396 if (!route(Order.data(), Table.data(), size(), 0))
397 return false;
398 getControls(V, 0, Reverse);
399 return true;
400 }
401
402private:
403 bool route(ElemType *P, RowType *T, unsigned Size, unsigned Step);
404};
405
406struct BenesNetwork : public PermNetwork {
407 BenesNetwork(ArrayRef<ElemType> Ord) : PermNetwork(Ord, 2) {}
408
409 bool run(Controls &F, Controls &R) {
410 if (!route(Order.data(), Table.data(), size(), 0))
411 return false;
412
413 getControls(F, 0, Forward);
414 getControls(R, Log, Reverse);
415 return true;
416 }
417
418private:
419 bool route(ElemType *P, RowType *T, unsigned Size, unsigned Step);
420};
421} // namespace
422
423bool ForwardDeltaNetwork::route(ElemType *P, RowType *T, unsigned Size,
424 unsigned Step) {
425 bool UseUp = false, UseDown = false;
426 ElemType Num = Size;
427
428 // Cannot use coloring here, because coloring is used to determine
429 // the "big" switch, i.e. the one that changes halves, and in a forward
430 // network, a color can be simultaneously routed to both halves in the
431 // step we're working on.
432 for (ElemType J = 0; J != Num; ++J) {
433 ElemType I = P[J];
434 // I is the position in the input,
435 // J is the position in the output.
436 if (I == Ignore)
437 continue;
438 uint8_t S;
439 if (I < Num/2)
440 S = (J < Num/2) ? Pass : Switch;
441 else
442 S = (J < Num/2) ? Switch : Pass;
443
444 // U is the element in the table that needs to be updated.
445 ElemType U = (S == Pass) ? I : (I < Num/2 ? I+Num/2 : I-Num/2);
446 if (U < Num/2)
447 UseUp = true;
448 else
449 UseDown = true;
450 if (T[U][Step] != S && T[U][Step] != None)
451 return false;
452 T[U][Step] = S;
453 }
454
455 for (ElemType J = 0; J != Num; ++J)
456 if (P[J] != Ignore && P[J] >= Num/2)
457 P[J] -= Num/2;
458
459 if (Step+1 < Log) {
460 if (UseUp && !route(P, T, Size/2, Step+1))
461 return false;
462 if (UseDown && !route(P+Size/2, T+Size/2, Size/2, Step+1))
463 return false;
464 }
465 return true;
466}
467
468bool ReverseDeltaNetwork::route(ElemType *P, RowType *T, unsigned Size,
469 unsigned Step) {
470 unsigned Pets = Log-1 - Step;
471 bool UseUp = false, UseDown = false;
472 ElemType Num = Size;
473
474 // In this step half-switching occurs, so coloring can be used.
475 Coloring G({P,Size});
476 const Coloring::MapType &M = G.colors();
477 if (M.empty())
478 return false;
479
480 ColorKind ColorUp = ColorKind::None;
481 for (ElemType J = 0; J != Num; ++J) {
482 ElemType I = P[J];
483 // I is the position in the input,
484 // J is the position in the output.
485 if (I == Ignore)
486 continue;
487 ColorKind C = M.at(I);
488 if (C == ColorKind::None)
489 continue;
490 // During "Step", inputs cannot switch halves, so if the "up" color
491 // is still unknown, make sure that it is selected in such a way that
492 // "I" will stay in the same half.
493 bool InpUp = I < Num/2;
494 if (ColorUp == ColorKind::None)
495 ColorUp = InpUp ? C : G.other(C);
496 if ((C == ColorUp) != InpUp) {
497 // If I should go to a different half than where is it now, give up.
498 return false;
499 }
500
501 uint8_t S;
502 if (InpUp) {
503 S = (J < Num/2) ? Pass : Switch;
504 UseUp = true;
505 } else {
506 S = (J < Num/2) ? Switch : Pass;
507 UseDown = true;
508 }
509 T[J][Pets] = S;
510 }
511
512 // Reorder the working permutation according to the computed switch table
513 // for the last step (i.e. Pets).
514 for (ElemType J = 0, E = Size / 2; J != E; ++J) {
515 ElemType PJ = P[J]; // Current values of P[J]
516 ElemType PC = P[J+Size/2]; // and P[conj(J)]
517 ElemType QJ = PJ; // New values of P[J]
518 ElemType QC = PC; // and P[conj(J)]
519 if (T[J][Pets] == Switch)
520 QC = PJ;
521 if (T[J+Size/2][Pets] == Switch)
522 QJ = PC;
523 P[J] = QJ;
524 P[J+Size/2] = QC;
525 }
526
527 for (ElemType J = 0; J != Num; ++J)
528 if (P[J] != Ignore && P[J] >= Num/2)
529 P[J] -= Num/2;
530
531 if (Step+1 < Log) {
532 if (UseUp && !route(P, T, Size/2, Step+1))
533 return false;
534 if (UseDown && !route(P+Size/2, T+Size/2, Size/2, Step+1))
535 return false;
536 }
537 return true;
538}
539
540bool BenesNetwork::route(ElemType *P, RowType *T, unsigned Size,
541 unsigned Step) {
542 Coloring G({P,Size});
543 const Coloring::MapType &M = G.colors();
544 if (M.empty())
545 return false;
546 ElemType Num = Size;
547
548 unsigned Pets = 2*Log-1 - Step;
549 bool UseUp = false, UseDown = false;
550
551 // Both assignments, i.e. Red->Up and Red->Down are valid, but they will
552 // result in different controls. Let's pick the one where the first
553 // control will be "Pass".
554 ColorKind ColorUp = ColorKind::None;
555 for (ElemType J = 0; J != Num; ++J) {
556 ElemType I = P[J];
557 if (I == Ignore)
558 continue;
559 ColorKind C = M.at(I);
560 if (C == ColorKind::None)
561 continue;
562 if (ColorUp == ColorKind::None) {
563 ColorUp = (I < Num / 2) ? ColorKind::Red : ColorKind::Black;
564 }
565 unsigned CI = (I < Num/2) ? I+Num/2 : I-Num/2;
566 if (C == ColorUp) {
567 if (I < Num/2)
568 T[I][Step] = Pass;
569 else
570 T[CI][Step] = Switch;
571 T[J][Pets] = (J < Num/2) ? Pass : Switch;
572 UseUp = true;
573 } else { // Down
574 if (I < Num/2)
575 T[CI][Step] = Switch;
576 else
577 T[I][Step] = Pass;
578 T[J][Pets] = (J < Num/2) ? Switch : Pass;
579 UseDown = true;
580 }
581 }
582
583 // Reorder the working permutation according to the computed switch table
584 // for the last step (i.e. Pets).
585 for (ElemType J = 0; J != Num/2; ++J) {
586 ElemType PJ = P[J]; // Current values of P[J]
587 ElemType PC = P[J+Num/2]; // and P[conj(J)]
588 ElemType QJ = PJ; // New values of P[J]
589 ElemType QC = PC; // and P[conj(J)]
590 if (T[J][Pets] == Switch)
591 QC = PJ;
592 if (T[J+Num/2][Pets] == Switch)
593 QJ = PC;
594 P[J] = QJ;
595 P[J+Num/2] = QC;
596 }
597
598 for (ElemType J = 0; J != Num; ++J)
599 if (P[J] != Ignore && P[J] >= Num/2)
600 P[J] -= Num/2;
601
602 if (Step+1 < Log) {
603 if (UseUp && !route(P, T, Size/2, Step+1))
604 return false;
605 if (UseDown && !route(P+Size/2, T+Size/2, Size/2, Step+1))
606 return false;
607 }
608 return true;
609}
610
611// --------------------------------------------------------------------
612// Support for building selection results (output instructions that are
613// parts of the final selection).
614
615namespace {
616struct OpRef {
617 OpRef(SDValue V) : OpV(V) {}
618 bool isValue() const { return OpV.getNode() != nullptr; }
619 bool isValid() const { return isValue() || !(OpN & Invalid); }
620 bool isUndef() const { return OpN & Undef; }
621 static OpRef res(int N) { return OpRef(Whole | (N & Index)); }
622 static OpRef fail() { return OpRef(Invalid); }
623
624 static OpRef lo(const OpRef &R) {
625 assert(!R.isValue());
626 return OpRef(R.OpN & (Undef | Index | LoHalf));
627 }
628 static OpRef hi(const OpRef &R) {
629 assert(!R.isValue());
630 return OpRef(R.OpN & (Undef | Index | HiHalf));
631 }
632 static OpRef undef(MVT Ty) { return OpRef(Undef | Ty.SimpleTy); }
633
634 // Direct value.
635 SDValue OpV = SDValue();
636
637 // Reference to the operand of the input node:
638 // If the 31st bit is 1, it's undef, otherwise, bits 28..0 are the
639 // operand index:
640 // If bit 30 is set, it's the high half of the operand.
641 // If bit 29 is set, it's the low half of the operand.
642 unsigned OpN = 0;
643
644 enum : unsigned {
645 Invalid = 0x10000000,
646 LoHalf = 0x20000000,
647 HiHalf = 0x40000000,
648 Whole = LoHalf | HiHalf,
649 Undef = 0x80000000,
650 Index = 0x0FFFFFFF, // Mask of the index value.
651 IndexBits = 28,
652 };
653
655 void print(raw_ostream &OS, const SelectionDAG &G) const;
656
657private:
658 OpRef(unsigned N) : OpN(N) {}
659};
660
661struct NodeTemplate {
662 NodeTemplate() = default;
663 unsigned Opc = 0;
664 MVT Ty = MVT::Other;
665 std::vector<OpRef> Ops;
666
667 LLVM_DUMP_METHOD void print(raw_ostream &OS, const SelectionDAG &G) const;
668};
669
670struct ResultStack {
671 ResultStack(SDNode *Inp)
672 : InpNode(Inp), InpTy(Inp->getValueType(0).getSimpleVT()) {}
673 SDNode *InpNode;
674 MVT InpTy;
675 unsigned push(const NodeTemplate &Res) {
676 List.push_back(Res);
677 return List.size()-1;
678 }
679 unsigned push(unsigned Opc, MVT Ty, std::vector<OpRef> &&Ops) {
680 NodeTemplate Res;
681 Res.Opc = Opc;
682 Res.Ty = Ty;
683 Res.Ops = Ops;
684 return push(Res);
685 }
686 bool empty() const { return List.empty(); }
687 unsigned size() const { return List.size(); }
688 unsigned top() const { return size()-1; }
689 const NodeTemplate &operator[](unsigned I) const { return List[I]; }
690 unsigned reset(unsigned NewTop) {
691 List.resize(NewTop+1);
692 return NewTop;
693 }
694
695 using BaseType = std::vector<NodeTemplate>;
696 BaseType::iterator begin() { return List.begin(); }
697 BaseType::iterator end() { return List.end(); }
698 BaseType::const_iterator begin() const { return List.begin(); }
699 BaseType::const_iterator end() const { return List.end(); }
700
702
704 void print(raw_ostream &OS, const SelectionDAG &G) const;
705};
706} // namespace
707
708#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
709void OpRef::print(raw_ostream &OS, const SelectionDAG &G) const {
710 if (isValue()) {
711 OpV.getNode()->print(OS, &G);
712 return;
713 }
714 if (OpN & Invalid) {
715 OS << "invalid";
716 return;
717 }
718 if (OpN & Undef) {
719 OS << "undef";
720 return;
721 }
722 if ((OpN & Whole) != Whole) {
723 assert((OpN & Whole) == LoHalf || (OpN & Whole) == HiHalf);
724 if (OpN & LoHalf)
725 OS << "lo ";
726 else
727 OS << "hi ";
728 }
729 OS << '#' << SignExtend32(OpN & Index, IndexBits);
730}
731
732void NodeTemplate::print(raw_ostream &OS, const SelectionDAG &G) const {
733 const TargetInstrInfo &TII = *G.getSubtarget().getInstrInfo();
734 OS << format("%8s", EVT(Ty).getEVTString().c_str()) << " "
735 << TII.getName(Opc);
736 bool Comma = false;
737 for (const auto &R : Ops) {
738 if (Comma)
739 OS << ',';
740 Comma = true;
741 OS << ' ';
742 R.print(OS, G);
743 }
744}
745
746void ResultStack::print(raw_ostream &OS, const SelectionDAG &G) const {
747 OS << "Input node:\n";
748#ifndef NDEBUG
749 InpNode->dumpr(&G);
750#endif
751 OS << "Result templates:\n";
752 for (unsigned I = 0, E = List.size(); I != E; ++I) {
753 OS << '[' << I << "] ";
754 List[I].print(OS, G);
755 OS << '\n';
756 }
757}
758#endif
759
760namespace {
761struct ShuffleMask {
762 ShuffleMask(ArrayRef<int> M) : Mask(M) {
763 for (int M : Mask) {
764 if (M == -1)
765 continue;
766 MinSrc = (MinSrc == -1) ? M : std::min(MinSrc, M);
767 MaxSrc = (MaxSrc == -1) ? M : std::max(MaxSrc, M);
768 }
769 }
770
771 ArrayRef<int> Mask;
772 int MinSrc = -1, MaxSrc = -1;
773
774 ShuffleMask lo() const {
775 size_t H = Mask.size()/2;
776 return ShuffleMask(Mask.take_front(H));
777 }
778 ShuffleMask hi() const {
779 size_t H = Mask.size()/2;
780 return ShuffleMask(Mask.take_back(H));
781 }
782
783 void print(raw_ostream &OS) const {
784 OS << "MinSrc:" << MinSrc << ", MaxSrc:" << MaxSrc << " {";
785 for (int M : Mask)
786 OS << ' ' << M;
787 OS << " }";
788 }
789};
790
791[[maybe_unused]]
792raw_ostream &operator<<(raw_ostream &OS, const ShuffleMask &SM) {
793 SM.print(OS);
794 return OS;
795}
796} // namespace
797
798namespace shuffles {
800// Vdd = vshuffvdd(Vu, Vv, Rt)
801// Vdd = vdealvdd(Vu, Vv, Rt)
802// Vd = vpack(Vu, Vv, Size, TakeOdd)
803// Vd = vshuff(Vu, Vv, Size, TakeOdd)
804// Vd = vdeal(Vu, Vv, Size, TakeOdd)
805// Vd = vdealb4w(Vu, Vv)
806
807ArrayRef<int> lo(ArrayRef<int> Vuu) { return Vuu.take_front(Vuu.size() / 2); }
808ArrayRef<int> hi(ArrayRef<int> Vuu) { return Vuu.take_back(Vuu.size() / 2); }
809
811 int Len = Vu.size();
812 MaskT Vdd(2 * Len);
813 llvm::copy(Vv, Vdd.begin());
814 llvm::copy(Vu, Vdd.begin() + Len);
815
816 auto Vd0 = MutableArrayRef<int>(Vdd).take_front(Len);
817 auto Vd1 = MutableArrayRef<int>(Vdd).take_back(Len);
818
819 for (int Offset = 1; Offset < Len; Offset *= 2) {
820 if ((Rt & Offset) == 0)
821 continue;
822 for (int i = 0; i != Len; ++i) {
823 if ((i & Offset) == 0)
824 std::swap(Vd1[i], Vd0[i + Offset]);
825 }
826 }
827 return Vdd;
828}
829
831 int Len = Vu.size();
832 MaskT Vdd(2 * Len);
833 llvm::copy(Vv, Vdd.begin());
834 llvm::copy(Vu, Vdd.begin() + Len);
835
836 auto Vd0 = MutableArrayRef<int>(Vdd).take_front(Len);
837 auto Vd1 = MutableArrayRef<int>(Vdd).take_back(Len);
838
839 for (int Offset = Len / 2; Offset > 0; Offset /= 2) {
840 if ((Rt & Offset) == 0)
841 continue;
842 for (int i = 0; i != Len; ++i) {
843 if ((i & Offset) == 0)
844 std::swap(Vd1[i], Vd0[i + Offset]);
845 }
846 }
847 return Vdd;
848}
849
850MaskT vpack(ArrayRef<int> Vu, ArrayRef<int> Vv, unsigned Size, bool TakeOdd) {
851 int Len = Vu.size();
852 MaskT Vd(Len);
853 auto Odd = static_cast<int>(TakeOdd);
854 for (int i = 0, e = Len / (2 * Size); i != e; ++i) {
855 for (int b = 0; b != static_cast<int>(Size); ++b) {
856 // clang-format off
857 Vd[i * Size + b] = Vv[(2 * i + Odd) * Size + b];
858 Vd[i * Size + b + Len / 2] = Vu[(2 * i + Odd) * Size + b];
859 // clang-format on
860 }
861 }
862 return Vd;
863}
864
865MaskT vshuff(ArrayRef<int> Vu, ArrayRef<int> Vv, unsigned Size, bool TakeOdd) {
866 int Len = Vu.size();
867 MaskT Vd(Len);
868 auto Odd = static_cast<int>(TakeOdd);
869 for (int i = 0, e = Len / (2 * Size); i != e; ++i) {
870 for (int b = 0; b != static_cast<int>(Size); ++b) {
871 Vd[(2 * i + 0) * Size + b] = Vv[(2 * i + Odd) * Size + b];
872 Vd[(2 * i + 1) * Size + b] = Vu[(2 * i + Odd) * Size + b];
873 }
874 }
875 return Vd;
876}
877
878MaskT vdeal(ArrayRef<int> Vu, ArrayRef<int> Vv, unsigned Size, bool TakeOdd) {
879 int Len = Vu.size();
880 MaskT T = vdealvdd(Vu, Vv, Len - 2 * Size);
881 return vpack(hi(T), lo(T), Size, TakeOdd);
882}
883
885 int Len = Vu.size();
886 MaskT Vd(Len);
887 for (int i = 0, e = Len / 4; i != e; ++i) {
888 Vd[0 * (Len / 4) + i] = Vv[4 * i + 0];
889 Vd[1 * (Len / 4) + i] = Vv[4 * i + 2];
890 Vd[2 * (Len / 4) + i] = Vu[4 * i + 0];
891 Vd[3 * (Len / 4) + i] = Vu[4 * i + 2];
892 }
893 return Vd;
894}
895
896template <typename ShuffFunc, typename... OptArgs>
897auto mask(ShuffFunc S, unsigned Length, OptArgs... args) -> MaskT {
898 MaskT Vu(Length), Vv(Length);
899 std::iota(Vu.begin(), Vu.end(), Length); // High
900 std::iota(Vv.begin(), Vv.end(), 0); // Low
901 return S(Vu, Vv, args...);
902}
903
904} // namespace shuffles
905
906// --------------------------------------------------------------------
907// The HvxSelector class.
908
910 return static_cast<const HexagonTargetLowering&>(G.getTargetLoweringInfo());
911}
913 return G.getSubtarget<HexagonSubtarget>();
914}
915
916namespace llvm {
917 struct HvxSelector {
922 const unsigned HwLen;
923
927
928 MVT getSingleVT(MVT ElemTy) const {
929 assert(ElemTy != MVT::i1 && "Use getBoolVT for predicates");
930 unsigned NumElems = HwLen / (ElemTy.getSizeInBits() / 8);
931 return MVT::getVectorVT(ElemTy, NumElems);
932 }
933
934 MVT getPairVT(MVT ElemTy) const {
935 assert(ElemTy != MVT::i1); // Suspicious: there are no predicate pairs.
936 unsigned NumElems = (2 * HwLen) / (ElemTy.getSizeInBits() / 8);
937 return MVT::getVectorVT(ElemTy, NumElems);
938 }
939
940 MVT getBoolVT() const {
941 // Return HwLen x i1.
942 return MVT::getVectorVT(MVT::i1, HwLen);
943 }
944
945 void selectExtractSubvector(SDNode *N);
946 void selectShuffle(SDNode *N);
947 void selectRor(SDNode *N);
948 void selectVAlign(SDNode *N);
949
950 static SmallVector<uint32_t, 8> getPerfectCompletions(ShuffleMask SM,
951 unsigned Width);
952 static SmallVector<uint32_t, 8> completeToPerfect(
953 ArrayRef<uint32_t> Completions, unsigned Width);
954 static std::optional<int> rotationDistance(ShuffleMask SM, unsigned WrapAt);
955
956 private:
957 void select(SDNode *ISelN);
958 void materialize(const ResultStack &Results);
959
960 SDValue getConst32(unsigned Val, const SDLoc &dl);
961 SDValue getSignedConst32(int Val, const SDLoc &dl);
962 SDValue getVectorConstant(ArrayRef<uint8_t> Data, const SDLoc &dl);
963
964 enum : unsigned {
965 None,
966 PackMux,
967 };
968 OpRef concats(OpRef Va, OpRef Vb, ResultStack &Results);
969 OpRef funnels(OpRef Va, OpRef Vb, int Amount, ResultStack &Results);
970
971 OpRef packs(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results,
972 MutableArrayRef<int> NewMask, unsigned Options = None);
973 OpRef packp(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results,
974 MutableArrayRef<int> NewMask);
975 OpRef vmuxs(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,
976 ResultStack &Results);
977 OpRef vmuxp(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,
978 ResultStack &Results);
979
980 OpRef shuffs1(ShuffleMask SM, OpRef Va, ResultStack &Results);
981 OpRef shuffs2(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results);
982 OpRef shuffp1(ShuffleMask SM, OpRef Va, ResultStack &Results);
983 OpRef shuffp2(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results);
984
985 OpRef butterfly(ShuffleMask SM, OpRef Va, ResultStack &Results);
986 OpRef contracting(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results);
987 OpRef expanding(ShuffleMask SM, OpRef Va, ResultStack &Results);
988 OpRef perfect(ShuffleMask SM, OpRef Va, ResultStack &Results);
989
990 bool selectVectorConstants(SDNode *N);
991 bool scalarizeShuffle(ArrayRef<int> Mask, const SDLoc &dl, MVT ResTy,
992 SDValue Va, SDValue Vb, SDNode *N);
993 };
994} // namespace llvm
995
997 MutableArrayRef<int> MaskR) {
998 unsigned VecLen = Mask.size();
999 assert(MaskL.size() == VecLen && MaskR.size() == VecLen);
1000 for (unsigned I = 0; I != VecLen; ++I) {
1001 int M = Mask[I];
1002 if (M < 0) {
1003 MaskL[I] = MaskR[I] = -1;
1004 } else if (unsigned(M) < VecLen) {
1005 MaskL[I] = M;
1006 MaskR[I] = -1;
1007 } else {
1008 MaskL[I] = -1;
1009 MaskR[I] = M-VecLen;
1010 }
1011 }
1012}
1013
1014static std::pair<int,unsigned> findStrip(ArrayRef<int> A, int Inc,
1015 unsigned MaxLen) {
1016 assert(A.size() > 0 && A.size() >= MaxLen);
1017 int F = A[0];
1018 int E = F;
1019 for (unsigned I = 1; I != MaxLen; ++I) {
1020 if (A[I] - E != Inc)
1021 return { F, I };
1022 E = A[I];
1023 }
1024 return { F, MaxLen };
1025}
1026
1027static bool isUndef(ArrayRef<int> Mask) {
1028 for (int Idx : Mask)
1029 if (Idx != -1)
1030 return false;
1031 return true;
1032}
1033
1034static bool isIdentity(ArrayRef<int> Mask) {
1035 for (int I = 0, E = Mask.size(); I != E; ++I) {
1036 int M = Mask[I];
1037 if (M >= 0 && M != I)
1038 return false;
1039 }
1040 return true;
1041}
1042
1043static bool isLowHalfOnly(ArrayRef<int> Mask) {
1044 int L = Mask.size();
1045 assert(L % 2 == 0);
1046 // Check if the second half of the mask is all-undef.
1047 return llvm::all_of(Mask.drop_front(L / 2), [](int M) { return M < 0; });
1048}
1049
1051 unsigned SegLen) {
1052 assert(isPowerOf2_32(SegLen));
1054 if (SM.MaxSrc == -1)
1055 return SegList;
1056
1057 unsigned Shift = Log2_32(SegLen);
1058 BitVector Segs(alignTo(SM.MaxSrc + 1, SegLen) >> Shift);
1059
1060 for (int M : SM.Mask) {
1061 if (M >= 0)
1062 Segs.set(M >> Shift);
1063 }
1064
1065 llvm::append_range(SegList, Segs.set_bits());
1066 return SegList;
1067}
1068
1070 unsigned SegLen) {
1071 // Calculate the layout of the output segments in terms of the input
1072 // segments.
1073 // For example [1,3,1,0] means that the output consists of 4 output
1074 // segments, where the first output segment has only elements of the
1075 // input segment at index 1. The next output segment only has elements
1076 // of the input segment 3, etc.
1077 // If an output segment only has undef elements, the value will be ~0u.
1078 // If an output segment has elements from more than one input segment,
1079 // the corresponding value will be ~1u.
1080 unsigned MaskLen = SM.Mask.size();
1081 assert(MaskLen % SegLen == 0);
1082 SmallVector<unsigned, 4> Map(MaskLen / SegLen);
1083
1084 for (int S = 0, E = Map.size(); S != E; ++S) {
1085 unsigned Idx = ~0u;
1086 for (int I = 0; I != static_cast<int>(SegLen); ++I) {
1087 int M = SM.Mask[S*SegLen + I];
1088 if (M < 0)
1089 continue;
1090 unsigned G = M / SegLen; // Input segment of this element.
1091 if (Idx == ~0u) {
1092 Idx = G;
1093 } else if (Idx != G) {
1094 Idx = ~1u;
1095 break;
1096 }
1097 }
1098 Map[S] = Idx;
1099 }
1100
1101 return Map;
1102}
1103
1105 unsigned SegLen, MutableArrayRef<int> PackedMask) {
1107 for (int I = OutSegMap.size() - 1; I >= 0; --I) {
1108 unsigned S = OutSegMap[I];
1109 assert(S != ~0u && "Unexpected undef");
1110 assert(S != ~1u && "Unexpected multi");
1111 if (InvMap.size() <= S)
1112 InvMap.resize(S+1);
1113 InvMap[S] = I;
1114 }
1115
1116 unsigned Shift = Log2_32(SegLen);
1117 for (int I = 0, E = Mask.size(); I != E; ++I) {
1118 int M = Mask[I];
1119 if (M >= 0) {
1120 int OutIdx = InvMap[M >> Shift];
1121 M = (M & (SegLen-1)) + SegLen*OutIdx;
1122 }
1123 PackedMask[I] = M;
1124 }
1125}
1126
1127bool HvxSelector::selectVectorConstants(SDNode *N) {
1128 // Constant vectors are generated as loads from constant pools or as
1129 // splats of a constant value. Since they are generated during the
1130 // selection process, the main selection algorithm is not aware of them.
1131 // Select them directly here.
1133 SetVector<SDNode*> WorkQ;
1134
1135 // The DAG can change (due to CSE) during selection, so cache all the
1136 // unselected nodes first to avoid traversing a mutating DAG.
1137 WorkQ.insert(N);
1138 for (unsigned i = 0; i != WorkQ.size(); ++i) {
1139 SDNode *W = WorkQ[i];
1140 if (!W->isMachineOpcode() && W->getOpcode() == HexagonISD::ISEL)
1141 Nodes.push_back(W);
1142 for (unsigned j = 0, f = W->getNumOperands(); j != f; ++j)
1143 WorkQ.insert(W->getOperand(j).getNode());
1144 }
1145
1146 for (SDNode *L : Nodes)
1147 select(L);
1148
1149 return !Nodes.empty();
1150}
1151
1152void HvxSelector::materialize(const ResultStack &Results) {
1153 DEBUG_WITH_TYPE("isel", {
1154 dbgs() << "Materializing\n";
1155 Results.print(dbgs(), DAG);
1156 });
1157 if (Results.empty())
1158 return;
1159 const SDLoc &dl(Results.InpNode);
1160 std::vector<SDValue> Output;
1161
1162 for (unsigned I = 0, E = Results.size(); I != E; ++I) {
1163 const NodeTemplate &Node = Results[I];
1164 std::vector<SDValue> Ops;
1165 for (const OpRef &R : Node.Ops) {
1166 assert(R.isValid());
1167 if (R.isValue()) {
1168 Ops.push_back(R.OpV);
1169 continue;
1170 }
1171 if (R.OpN & OpRef::Undef) {
1172 MVT::SimpleValueType SVT = MVT::SimpleValueType(R.OpN & OpRef::Index);
1173 Ops.push_back(ISel.selectUndef(dl, MVT(SVT)));
1174 continue;
1175 }
1176 // R is an index of a result.
1177 unsigned Part = R.OpN & OpRef::Whole;
1178 int Idx = SignExtend32(R.OpN & OpRef::Index, OpRef::IndexBits);
1179 if (Idx < 0)
1180 Idx += I;
1181 assert(Idx >= 0 && unsigned(Idx) < Output.size());
1182 SDValue Op = Output[Idx];
1183 MVT OpTy = Op.getValueType().getSimpleVT();
1184 if (Part != OpRef::Whole) {
1185 assert(Part == OpRef::LoHalf || Part == OpRef::HiHalf);
1186 MVT HalfTy = MVT::getVectorVT(OpTy.getVectorElementType(),
1187 OpTy.getVectorNumElements()/2);
1188 unsigned Sub = (Part == OpRef::LoHalf) ? Hexagon::vsub_lo
1189 : Hexagon::vsub_hi;
1190 Op = DAG.getTargetExtractSubreg(Sub, dl, HalfTy, Op);
1191 }
1192 Ops.push_back(Op);
1193 } // for (Node : Results)
1194
1195 assert(Node.Ty != MVT::Other);
1196 SDNode *ResN = (Node.Opc == TargetOpcode::COPY)
1197 ? Ops.front().getNode()
1198 : DAG.getMachineNode(Node.Opc, dl, Node.Ty, Ops);
1199 Output.push_back(SDValue(ResN, 0));
1200 }
1201
1202 SDNode *OutN = Output.back().getNode();
1203 SDNode *InpN = Results.InpNode;
1204 DEBUG_WITH_TYPE("isel", {
1205 dbgs() << "Generated node:\n";
1206 OutN->dumpr(&DAG);
1207 });
1208
1209 ISel.ReplaceNode(InpN, OutN);
1210 selectVectorConstants(OutN);
1211 DAG.RemoveDeadNodes();
1212}
1213
1214OpRef HvxSelector::concats(OpRef Lo, OpRef Hi, ResultStack &Results) {
1215 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1216 const SDLoc &dl(Results.InpNode);
1217 Results.push(TargetOpcode::REG_SEQUENCE, getPairVT(MVT::i8), {
1218 getConst32(Hexagon::HvxWRRegClassID, dl),
1219 Lo, getConst32(Hexagon::vsub_lo, dl),
1220 Hi, getConst32(Hexagon::vsub_hi, dl),
1221 });
1222 return OpRef::res(Results.top());
1223}
1224
1225OpRef HvxSelector::funnels(OpRef Va, OpRef Vb, int Amount,
1226 ResultStack &Results) {
1227 // Do a funnel shift towards the low end (shift right) by Amount bytes.
1228 // If Amount < 0, treat it as shift left, i.e. do a shift right by
1229 // Amount + HwLen.
1230 auto VecLen = static_cast<int>(HwLen);
1231
1232 if (Amount == 0)
1233 return Va;
1234 if (Amount == VecLen)
1235 return Vb;
1236
1237 MVT Ty = getSingleVT(MVT::i8);
1238 const SDLoc &dl(Results.InpNode);
1239
1240 if (Amount < 0)
1241 Amount += VecLen;
1242 if (Amount > VecLen) {
1243 Amount -= VecLen;
1244 std::swap(Va, Vb);
1245 }
1246
1247 if (isUInt<3>(Amount)) {
1248 SDValue A = getConst32(Amount, dl);
1249 Results.push(Hexagon::V6_valignbi, Ty, {Vb, Va, A});
1250 } else if (isUInt<3>(VecLen - Amount)) {
1251 SDValue A = getConst32(VecLen - Amount, dl);
1252 Results.push(Hexagon::V6_vlalignbi, Ty, {Vb, Va, A});
1253 } else {
1254 SDValue A = getConst32(Amount, dl);
1255 Results.push(Hexagon::A2_tfrsi, Ty, {A});
1256 Results.push(Hexagon::V6_valignb, Ty, {Vb, Va, OpRef::res(-1)});
1257 }
1258 return OpRef::res(Results.top());
1259}
1260
1261// Va, Vb are single vectors. If SM only uses two vector halves from Va/Vb,
1262// pack these halves into a single vector, and remap SM into NewMask to use
1263// the new vector instead.
1264OpRef HvxSelector::packs(ShuffleMask SM, OpRef Va, OpRef Vb,
1265 ResultStack &Results, MutableArrayRef<int> NewMask,
1266 unsigned Options) {
1267 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1268 if (!Va.isValid() || !Vb.isValid())
1269 return OpRef::fail();
1270
1271 if (Vb.isUndef()) {
1272 llvm::copy(SM.Mask, NewMask.begin());
1273 return Va;
1274 }
1275 if (Va.isUndef()) {
1276 llvm::copy(SM.Mask, NewMask.begin());
1278 return Vb;
1279 }
1280
1281 MVT Ty = getSingleVT(MVT::i8);
1282 MVT PairTy = getPairVT(MVT::i8);
1283 OpRef Inp[2] = {Va, Vb};
1284 unsigned VecLen = SM.Mask.size();
1285
1286 auto valign = [this](OpRef Lo, OpRef Hi, unsigned Amt, MVT Ty,
1287 ResultStack &Results) {
1288 if (Amt == 0)
1289 return Lo;
1290 const SDLoc &dl(Results.InpNode);
1291 if (isUInt<3>(Amt) || isUInt<3>(HwLen - Amt)) {
1292 bool IsRight = isUInt<3>(Amt); // Right align.
1293 SDValue S = getConst32(IsRight ? Amt : HwLen - Amt, dl);
1294 unsigned Opc = IsRight ? Hexagon::V6_valignbi : Hexagon::V6_vlalignbi;
1295 Results.push(Opc, Ty, {Hi, Lo, S});
1296 return OpRef::res(Results.top());
1297 }
1298 Results.push(Hexagon::A2_tfrsi, MVT::i32, {getConst32(Amt, dl)});
1299 OpRef A = OpRef::res(Results.top());
1300 Results.push(Hexagon::V6_valignb, Ty, {Hi, Lo, A});
1301 return OpRef::res(Results.top());
1302 };
1303
1304 // Segment is a vector half.
1305 unsigned SegLen = HwLen / 2;
1306
1307 // Check if we can shuffle vector halves around to get the used elements
1308 // into a single vector.
1309 shuffles::MaskT MaskH(SM.Mask);
1310 SmallVector<unsigned, 4> SegList = getInputSegmentList(SM.Mask, SegLen);
1311 unsigned SegCount = SegList.size();
1312 SmallVector<unsigned, 4> SegMap = getOutputSegmentMap(SM.Mask, SegLen);
1313
1314 if (SegList.empty())
1315 return OpRef::undef(Ty);
1316
1317 // NOTE:
1318 // In the following part of the function, where the segments are rearranged,
1319 // the shuffle mask SM can be of any length that is a multiple of a vector
1320 // (i.e. a multiple of 2*SegLen), and non-zero.
1321 // The output segment map is computed, and it may have any even number of
1322 // entries, but the rearrangement of input segments will be done based only
1323 // on the first two (non-undef) entries in the segment map.
1324 // For example, if the output map is 3, 1, 1, 3 (it can have at most two
1325 // distinct entries!), the segments 1 and 3 of Va/Vb will be packaged into
1326 // a single vector V = 3:1. The output mask will then be updated to use
1327 // seg(0,V), seg(1,V), seg(1,V), seg(0,V).
1328 //
1329 // Picking the segments based on the output map is an optimization. For
1330 // correctness it is only necessary that Seg0 and Seg1 are the two input
1331 // segments that are used in the output.
1332
1333 unsigned Seg0 = ~0u, Seg1 = ~0u;
1334 for (unsigned X : SegMap) {
1335 if (X == ~0u)
1336 continue;
1337 if (Seg0 == ~0u)
1338 Seg0 = X;
1339 else if (Seg1 != ~0u)
1340 break;
1341 if (X == ~1u || X != Seg0)
1342 Seg1 = X;
1343 }
1344
1345 if (SegCount == 1) {
1346 unsigned SrcOp = SegList[0] / 2;
1347 for (int I = 0; I != static_cast<int>(VecLen); ++I) {
1348 int M = SM.Mask[I];
1349 if (M >= 0) {
1350 M -= SrcOp * HwLen;
1351 assert(M >= 0);
1352 }
1353 NewMask[I] = M;
1354 }
1355 return Inp[SrcOp];
1356 }
1357
1358 if (SegCount == 2) {
1359 // Seg0 should not be undef here: this would imply a SegList
1360 // with <= 1 elements, which was checked earlier.
1361 assert(Seg0 != ~0u);
1362
1363 // If Seg0 or Seg1 are "multi-defined", pick them from the input
1364 // segment list instead.
1365 if (Seg0 == ~1u || Seg1 == ~1u) {
1366 if (Seg0 == Seg1) {
1367 Seg0 = SegList[0];
1368 Seg1 = SegList[1];
1369 } else if (Seg0 == ~1u) {
1370 Seg0 = SegList[0] != Seg1 ? SegList[0] : SegList[1];
1371 } else {
1372 assert(Seg1 == ~1u);
1373 Seg1 = SegList[0] != Seg0 ? SegList[0] : SegList[1];
1374 }
1375 }
1376 assert(Seg0 != ~1u && Seg1 != ~1u);
1377
1378 assert(Seg0 != Seg1 && "Expecting different segments");
1379 const SDLoc &dl(Results.InpNode);
1380 Results.push(Hexagon::A2_tfrsi, MVT::i32, {getConst32(SegLen, dl)});
1381 OpRef HL = OpRef::res(Results.top());
1382
1383 // Va = AB, Vb = CD
1384
1385 if (Seg0 / 2 == Seg1 / 2) {
1386 // Same input vector.
1387 Va = Inp[Seg0 / 2];
1388 if (Seg0 > Seg1) {
1389 // Swap halves.
1390 Results.push(Hexagon::V6_vror, Ty, {Inp[Seg0 / 2], HL});
1391 Va = OpRef::res(Results.top());
1392 }
1393 packSegmentMask(SM.Mask, {Seg0, Seg1}, SegLen, MaskH);
1394 } else if (Seg0 % 2 == Seg1 % 2) {
1395 // Picking AC, BD, CA, or DB.
1396 // vshuff(CD,AB,HL) -> BD:AC
1397 // vshuff(AB,CD,HL) -> DB:CA
1398 auto Vs = (Seg0 == 0 || Seg0 == 1) ? std::make_pair(Vb, Va) // AC or BD
1399 : std::make_pair(Va, Vb); // CA or DB
1400 Results.push(Hexagon::V6_vshuffvdd, PairTy, {Vs.first, Vs.second, HL});
1401 OpRef P = OpRef::res(Results.top());
1402 Va = (Seg0 == 0 || Seg0 == 2) ? OpRef::lo(P) : OpRef::hi(P);
1403 packSegmentMask(SM.Mask, {Seg0, Seg1}, SegLen, MaskH);
1404 } else {
1405 // Picking AD, BC, CB, or DA.
1406 if ((Seg0 == 0 && Seg1 == 3) || (Seg0 == 2 && Seg1 == 1)) {
1407 // AD or BC: this can be done using vmux.
1408 // Q = V6_pred_scalar2 SegLen
1409 // V = V6_vmux Q, (Va, Vb) or (Vb, Va)
1410 Results.push(Hexagon::V6_pred_scalar2, getBoolVT(), {HL});
1411 OpRef Qt = OpRef::res(Results.top());
1412 auto Vs = (Seg0 == 0) ? std::make_pair(Va, Vb) // AD
1413 : std::make_pair(Vb, Va); // CB
1414 Results.push(Hexagon::V6_vmux, Ty, {Qt, Vs.first, Vs.second});
1415 Va = OpRef::res(Results.top());
1416 packSegmentMask(SM.Mask, {Seg0, Seg1}, SegLen, MaskH);
1417 } else {
1418 // BC or DA: this could be done via valign by SegLen.
1419 // Do nothing here, because valign (if possible) will be generated
1420 // later on (make sure the Seg0 values are as expected).
1421 assert(Seg0 == 1 || Seg0 == 3);
1422 }
1423 }
1424 }
1425
1426 // Check if the arguments can be packed by valign(Va,Vb) or valign(Vb,Va).
1427 // FIXME: maybe remove this?
1428 ShuffleMask SMH(MaskH);
1429 assert(SMH.Mask.size() == VecLen);
1430 shuffles::MaskT MaskA(SMH.Mask);
1431
1432 if (SMH.MaxSrc - SMH.MinSrc >= static_cast<int>(HwLen)) {
1433 // valign(Lo=Va,Hi=Vb) won't work. Try swapping Va/Vb.
1434 shuffles::MaskT Swapped(SMH.Mask);
1436 ShuffleMask SW(Swapped);
1437 if (SW.MaxSrc - SW.MinSrc < static_cast<int>(HwLen)) {
1438 MaskA.assign(SW.Mask.begin(), SW.Mask.end());
1439 std::swap(Va, Vb);
1440 }
1441 }
1442 ShuffleMask SMA(MaskA);
1443 assert(SMA.Mask.size() == VecLen);
1444
1445 if (SMA.MaxSrc - SMA.MinSrc < static_cast<int>(HwLen)) {
1446 int ShiftR = SMA.MinSrc;
1447 if (ShiftR >= static_cast<int>(HwLen)) {
1448 Va = Vb;
1449 Vb = OpRef::undef(Ty);
1450 ShiftR -= HwLen;
1451 }
1452 OpRef RetVal = valign(Va, Vb, ShiftR, Ty, Results);
1453
1454 for (int I = 0; I != static_cast<int>(VecLen); ++I) {
1455 int M = SMA.Mask[I];
1456 if (M != -1)
1457 M -= SMA.MinSrc;
1458 NewMask[I] = M;
1459 }
1460 return RetVal;
1461 }
1462
1463 // By here, packing by segment (half-vector) shuffling, and vector alignment
1464 // failed. Try vmux.
1465 // Note: since this is using the original mask, Va and Vb must not have been
1466 // modified.
1467
1468 if (Options & PackMux) {
1469 // If elements picked from Va and Vb have all different (source) indexes
1470 // (relative to the start of the argument), do a mux, and update the mask.
1471 BitVector Picked(HwLen);
1472 SmallVector<uint8_t,128> MuxBytes(HwLen);
1473 bool CanMux = true;
1474 for (int I = 0; I != static_cast<int>(VecLen); ++I) {
1475 int M = SM.Mask[I];
1476 if (M == -1)
1477 continue;
1478 if (M >= static_cast<int>(HwLen))
1479 M -= HwLen;
1480 else
1481 MuxBytes[M] = 0xFF;
1482 if (Picked[M]) {
1483 CanMux = false;
1484 break;
1485 }
1486 NewMask[I] = M;
1487 }
1488 if (CanMux)
1489 return vmuxs(MuxBytes, Va, Vb, Results);
1490 }
1491 return OpRef::fail();
1492}
1493
1494// Va, Vb are vector pairs. If SM only uses two single vectors from Va/Vb,
1495// pack these vectors into a pair, and remap SM into NewMask to use the
1496// new pair instead.
1497OpRef HvxSelector::packp(ShuffleMask SM, OpRef Va, OpRef Vb,
1498 ResultStack &Results, MutableArrayRef<int> NewMask) {
1499 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1500 SmallVector<unsigned, 4> SegList = getInputSegmentList(SM.Mask, HwLen);
1501 if (SegList.empty())
1502 return OpRef::undef(getPairVT(MVT::i8));
1503
1504 // If more than two halves are used, bail.
1505 // TODO: be more aggressive here?
1506 unsigned SegCount = SegList.size();
1507 if (SegCount > 2)
1508 return OpRef::fail();
1509
1510 MVT HalfTy = getSingleVT(MVT::i8);
1511
1512 OpRef Inp[2] = { Va, Vb };
1513 OpRef Out[2] = { OpRef::undef(HalfTy), OpRef::undef(HalfTy) };
1514
1515 // Really make sure we have at most 2 vectors used in the mask.
1516 assert(SegCount <= 2);
1517
1518 for (int I = 0, E = SegList.size(); I != E; ++I) {
1519 unsigned S = SegList[I];
1520 OpRef Op = Inp[S / 2];
1521 Out[I] = (S & 1) ? OpRef::hi(Op) : OpRef::lo(Op);
1522 }
1523
1524 // NOTE: Using SegList as the packing map here (not SegMap). This works,
1525 // because we're not concerned here about the order of the segments (i.e.
1526 // single vectors) in the output pair. Changing the order of vectors is
1527 // free (as opposed to changing the order of vector halves as in packs),
1528 // and so there is no extra cost added in case the order needs to be
1529 // changed later.
1530 packSegmentMask(SM.Mask, SegList, HwLen, NewMask);
1531 return concats(Out[0], Out[1], Results);
1532}
1533
1534OpRef HvxSelector::vmuxs(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,
1535 ResultStack &Results) {
1536 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1537 MVT ByteTy = getSingleVT(MVT::i8);
1538 MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
1539 const SDLoc &dl(Results.InpNode);
1540 SDValue B = getVectorConstant(Bytes, dl);
1541 Results.push(Hexagon::V6_vd0, ByteTy, {});
1542 Results.push(Hexagon::V6_veqb, BoolTy, {OpRef(B), OpRef::res(-1)});
1543 Results.push(Hexagon::V6_vmux, ByteTy, {OpRef::res(-1), Vb, Va});
1544 return OpRef::res(Results.top());
1545}
1546
1547OpRef HvxSelector::vmuxp(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,
1548 ResultStack &Results) {
1549 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1550 size_t S = Bytes.size() / 2;
1551 OpRef L = vmuxs(Bytes.take_front(S), OpRef::lo(Va), OpRef::lo(Vb), Results);
1552 OpRef H = vmuxs(Bytes.drop_front(S), OpRef::hi(Va), OpRef::hi(Vb), Results);
1553 return concats(L, H, Results);
1554}
1555
1556OpRef HvxSelector::shuffs1(ShuffleMask SM, OpRef Va, ResultStack &Results) {
1557 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1558 unsigned VecLen = SM.Mask.size();
1559 assert(HwLen == VecLen);
1560 (void)VecLen;
1561 assert(all_of(SM.Mask, [this](int M) { return M == -1 || M < int(HwLen); }));
1562
1563 if (isIdentity(SM.Mask))
1564 return Va;
1565 if (isUndef(SM.Mask))
1566 return OpRef::undef(getSingleVT(MVT::i8));
1567
1568 // First, check for rotations.
1569 if (auto Dist = rotationDistance(SM, VecLen)) {
1570 OpRef Rotate = funnels(Va, Va, *Dist, Results);
1571 if (Rotate.isValid())
1572 return Rotate;
1573 }
1574 unsigned HalfLen = HwLen / 2;
1575 assert(isPowerOf2_32(HalfLen));
1576
1577 // Handle special case where the output is the same half of the input
1578 // repeated twice, i.e. if Va = AB, then handle the output of AA or BB.
1579 std::pair<int, unsigned> Strip1 = findStrip(SM.Mask, 1, HalfLen);
1580 if ((Strip1.first & ~HalfLen) == 0 && Strip1.second == HalfLen) {
1581 std::pair<int, unsigned> Strip2 =
1582 findStrip(SM.Mask.drop_front(HalfLen), 1, HalfLen);
1583 if (Strip1 == Strip2) {
1584 const SDLoc &dl(Results.InpNode);
1585 Results.push(Hexagon::A2_tfrsi, MVT::i32, {getConst32(HalfLen, dl)});
1586 Results.push(Hexagon::V6_vshuffvdd, getPairVT(MVT::i8),
1587 {Va, Va, OpRef::res(Results.top())});
1588 OpRef S = OpRef::res(Results.top());
1589 return (Strip1.first == 0) ? OpRef::lo(S) : OpRef::hi(S);
1590 }
1591 }
1592
1593 OpRef P = perfect(SM, Va, Results);
1594 if (P.isValid())
1595 return P;
1596 return butterfly(SM, Va, Results);
1597}
1598
1599OpRef HvxSelector::shuffs2(ShuffleMask SM, OpRef Va, OpRef Vb,
1600 ResultStack &Results) {
1601 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1602 if (isUndef(SM.Mask))
1603 return OpRef::undef(getSingleVT(MVT::i8));
1604
1605 OpRef C = contracting(SM, Va, Vb, Results);
1606 if (C.isValid())
1607 return C;
1608
1609 int VecLen = SM.Mask.size();
1610 shuffles::MaskT PackedMask(VecLen);
1611 OpRef P = packs(SM, Va, Vb, Results, PackedMask);
1612 if (P.isValid())
1613 return shuffs1(ShuffleMask(PackedMask), P, Results);
1614
1615 // TODO: Before we split the mask, try perfect shuffle on concatenated
1616 // operands.
1617
1618 shuffles::MaskT MaskL(VecLen), MaskR(VecLen);
1619 splitMask(SM.Mask, MaskL, MaskR);
1620
1621 OpRef L = shuffs1(ShuffleMask(MaskL), Va, Results);
1622 OpRef R = shuffs1(ShuffleMask(MaskR), Vb, Results);
1623 if (!L.isValid() || !R.isValid())
1624 return OpRef::fail();
1625
1626 SmallVector<uint8_t, 128> Bytes(VecLen);
1627 for (int I = 0; I != VecLen; ++I) {
1628 if (MaskL[I] != -1)
1629 Bytes[I] = 0xFF;
1630 }
1631 return vmuxs(Bytes, L, R, Results);
1632}
1633
1634OpRef HvxSelector::shuffp1(ShuffleMask SM, OpRef Va, ResultStack &Results) {
1635 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1636 int VecLen = SM.Mask.size();
1637
1638 if (isIdentity(SM.Mask))
1639 return Va;
1640 if (isUndef(SM.Mask))
1641 return OpRef::undef(getPairVT(MVT::i8));
1642
1643 shuffles::MaskT PackedMask(VecLen);
1644 OpRef P = packs(SM, OpRef::lo(Va), OpRef::hi(Va), Results, PackedMask);
1645 if (P.isValid()) {
1646 ShuffleMask PM(PackedMask);
1647 OpRef E = expanding(PM, P, Results);
1648 if (E.isValid())
1649 return E;
1650
1651 OpRef L = shuffs1(PM.lo(), P, Results);
1652 OpRef H = shuffs1(PM.hi(), P, Results);
1653 if (L.isValid() && H.isValid())
1654 return concats(L, H, Results);
1655 }
1656
1657 if (!isLowHalfOnly(SM.Mask)) {
1658 // Doing a perfect shuffle on a low-half mask (i.e. where the upper half
1659 // is all-undef) may produce a perfect shuffle that generates legitimate
1660 // upper half. This isn't wrong, but if the perfect shuffle was possible,
1661 // then there is a good chance that a shorter (contracting) code may be
1662 // used as well (e.g. V6_vshuffeb, etc).
1663 OpRef R = perfect(SM, Va, Results);
1664 if (R.isValid())
1665 return R;
1666 // TODO commute the mask and try the opposite order of the halves.
1667 }
1668
1669 OpRef L = shuffs2(SM.lo(), OpRef::lo(Va), OpRef::hi(Va), Results);
1670 OpRef H = shuffs2(SM.hi(), OpRef::lo(Va), OpRef::hi(Va), Results);
1671 if (L.isValid() && H.isValid())
1672 return concats(L, H, Results);
1673
1674 return OpRef::fail();
1675}
1676
1677OpRef HvxSelector::shuffp2(ShuffleMask SM, OpRef Va, OpRef Vb,
1678 ResultStack &Results) {
1679 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1680 if (isUndef(SM.Mask))
1681 return OpRef::undef(getPairVT(MVT::i8));
1682
1683 int VecLen = SM.Mask.size();
1684 SmallVector<int,256> PackedMask(VecLen);
1685 OpRef P = packp(SM, Va, Vb, Results, PackedMask);
1686 if (P.isValid())
1687 return shuffp1(ShuffleMask(PackedMask), P, Results);
1688
1689 SmallVector<int,256> MaskL(VecLen), MaskR(VecLen);
1690 splitMask(SM.Mask, MaskL, MaskR);
1691
1692 OpRef L = shuffp1(ShuffleMask(MaskL), Va, Results);
1693 OpRef R = shuffp1(ShuffleMask(MaskR), Vb, Results);
1694 if (!L.isValid() || !R.isValid())
1695 return OpRef::fail();
1696
1697 // Mux the results.
1698 SmallVector<uint8_t,256> Bytes(VecLen);
1699 for (int I = 0; I != VecLen; ++I) {
1700 if (MaskL[I] != -1)
1701 Bytes[I] = 0xFF;
1702 }
1703 return vmuxp(Bytes, L, R, Results);
1704}
1705
1706namespace {
1707 struct Deleter : public SelectionDAG::DAGNodeDeletedListener {
1708 template <typename T>
1709 Deleter(SelectionDAG &D, T &C)
1710 : SelectionDAG::DAGNodeDeletedListener(D, [&C] (SDNode *N, SDNode *E) {
1711 C.erase(N);
1712 }) {}
1713 };
1714
1715 template <typename T>
1716 struct NullifyingVector : public T {
1717 DenseMap<SDNode*, SDNode**> Refs;
1718 NullifyingVector(T &&V) : T(V) {
1719 for (unsigned i = 0, e = T::size(); i != e; ++i) {
1720 SDNode *&N = T::operator[](i);
1721 Refs[N] = &N;
1722 }
1723 }
1724 void erase(SDNode *N) {
1725 auto F = Refs.find(N);
1726 if (F != Refs.end())
1727 *F->second = nullptr;
1728 }
1729 };
1730}
1731
1732void HvxSelector::select(SDNode *ISelN) {
1733 // What's important here is to select the right set of nodes. The main
1734 // selection algorithm loops over nodes in a topological order, i.e. users
1735 // are visited before their operands.
1736 //
1737 // It is an error to have an unselected node with a selected operand, and
1738 // there is an assertion in the main selector code to enforce that.
1739 //
1740 // Such a situation could occur if we selected a node, which is both a
1741 // subnode of ISelN, and a subnode of an unrelated (and yet unselected)
1742 // node in the DAG.
1743 assert(ISelN->getOpcode() == HexagonISD::ISEL);
1744 SDNode *N0 = ISelN->getOperand(0).getNode();
1745
1746 // There could have been nodes created (i.e. inserted into the DAG)
1747 // that are now dead. Remove them, in case they use any of the nodes
1748 // to select (and make them look shared).
1749 DAG.RemoveDeadNodes();
1750
1751 SetVector<SDNode *> SubNodes;
1752
1753 if (!N0->isMachineOpcode()) {
1754 // Don't want to select N0 if it's shared with another node, except if
1755 // it's shared with other ISELs.
1756 auto IsISelN = [](SDNode *T) { return T->getOpcode() == HexagonISD::ISEL; };
1757 if (llvm::all_of(N0->users(), IsISelN))
1758 SubNodes.insert(N0);
1759 }
1760 if (SubNodes.empty()) {
1761 ISel.ReplaceNode(ISelN, N0);
1762 return;
1763 }
1764
1765 // Need to manually select the nodes that are dominated by the ISEL. Other
1766 // nodes are reachable from the rest of the DAG, and so will be selected
1767 // by the DAG selection routine.
1768 SetVector<SDNode*> Dom, NonDom;
1769 Dom.insert(N0);
1770
1771 auto IsDomRec = [&Dom, &NonDom] (SDNode *T, auto Rec) -> bool {
1772 if (Dom.count(T))
1773 return true;
1774 if (T->use_empty() || NonDom.count(T))
1775 return false;
1776 for (SDNode *U : T->users()) {
1777 // If T is reachable from a known non-dominated node, then T itself
1778 // is non-dominated.
1779 if (!Rec(U, Rec)) {
1780 NonDom.insert(T);
1781 return false;
1782 }
1783 }
1784 Dom.insert(T);
1785 return true;
1786 };
1787
1788 auto IsDom = [&IsDomRec] (SDNode *T) { return IsDomRec(T, IsDomRec); };
1789
1790 // Add the rest of nodes dominated by ISEL to SubNodes.
1791 for (unsigned I = 0; I != SubNodes.size(); ++I) {
1792 for (SDValue Op : SubNodes[I]->ops()) {
1793 SDNode *O = Op.getNode();
1794 if (IsDom(O))
1795 SubNodes.insert(O);
1796 }
1797 }
1798
1799 // Do a topological sort of nodes from Dom.
1800 SetVector<SDNode*> TmpQ;
1801
1802 std::map<SDNode *, unsigned> OpCount;
1803 for (SDNode *T : Dom) {
1804 unsigned NumDomOps = llvm::count_if(T->ops(), [&Dom](const SDUse &U) {
1805 return Dom.count(U.getNode());
1806 });
1807
1808 OpCount.insert({T, NumDomOps});
1809 if (NumDomOps == 0)
1810 TmpQ.insert(T);
1811 }
1812
1813 for (unsigned I = 0; I != TmpQ.size(); ++I) {
1814 SDNode *S = TmpQ[I];
1815 for (SDNode *U : S->users()) {
1816 if (U == ISelN)
1817 continue;
1818 auto F = OpCount.find(U);
1819 assert(F != OpCount.end());
1820 if (F->second > 0 && !--F->second)
1821 TmpQ.insert(F->first);
1822 }
1823 }
1824
1825 // Remove the marker.
1826 ISel.ReplaceNode(ISelN, N0);
1827
1828 assert(SubNodes.size() == TmpQ.size());
1829 NullifyingVector<decltype(TmpQ)::vector_type> Queue(TmpQ.takeVector());
1830
1831 Deleter DUQ(DAG, Queue);
1832 for (SDNode *S : reverse(Queue)) {
1833 if (S == nullptr)
1834 continue;
1835 DEBUG_WITH_TYPE("isel", {dbgs() << "HVX selecting: "; S->dump(&DAG);});
1836 ISel.Select(S);
1837 }
1838}
1839
1840bool HvxSelector::scalarizeShuffle(ArrayRef<int> Mask, const SDLoc &dl,
1841 MVT ResTy, SDValue Va, SDValue Vb,
1842 SDNode *N) {
1843 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1844 MVT ElemTy = ResTy.getVectorElementType();
1845 assert(ElemTy == MVT::i8);
1846 unsigned VecLen = Mask.size();
1847 bool HavePairs = (2*HwLen == VecLen);
1848 MVT SingleTy = getSingleVT(MVT::i8);
1849
1850 // The prior attempts to handle this shuffle may have left a bunch of
1851 // dead nodes in the DAG (such as constants). These nodes will be added
1852 // at the end of DAG's node list, which at that point had already been
1853 // sorted topologically. In the main selection loop, the node list is
1854 // traversed backwards from the root node, which means that any new
1855 // nodes (from the end of the list) will not be visited.
1856 // Scalarization will replace the shuffle node with the scalarized
1857 // expression, and if that expression reused any if the leftoever (dead)
1858 // nodes, these nodes would not be selected (since the "local" selection
1859 // only visits nodes that are not in AllNodes).
1860 // To avoid this issue, remove all dead nodes from the DAG now.
1861// DAG.RemoveDeadNodes();
1862
1864 LLVMContext &Ctx = *DAG.getContext();
1865 MVT LegalTy = Lower.getTypeToTransformTo(Ctx, ElemTy).getSimpleVT();
1866 for (int I : Mask) {
1867 if (I < 0) {
1868 Ops.push_back(ISel.selectUndef(dl, LegalTy));
1869 continue;
1870 }
1871 SDValue Vec;
1872 unsigned M = I;
1873 if (M < VecLen) {
1874 Vec = Va;
1875 } else {
1876 Vec = Vb;
1877 M -= VecLen;
1878 }
1879 if (HavePairs) {
1880 if (M < HwLen) {
1881 Vec = DAG.getTargetExtractSubreg(Hexagon::vsub_lo, dl, SingleTy, Vec);
1882 } else {
1883 Vec = DAG.getTargetExtractSubreg(Hexagon::vsub_hi, dl, SingleTy, Vec);
1884 M -= HwLen;
1885 }
1886 }
1887 SDValue Idx = DAG.getConstant(M, dl, MVT::i32);
1888 SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, LegalTy, {Vec, Idx});
1889 SDValue L = Lower.LowerOperation(Ex, DAG);
1890 assert(L.getNode());
1891 Ops.push_back(L);
1892 }
1893
1894 SDValue LV;
1895 if (2*HwLen == VecLen) {
1896 SDValue B0 = DAG.getBuildVector(SingleTy, dl, {Ops.data(), HwLen});
1897 SDValue L0 = Lower.LowerOperation(B0, DAG);
1898 SDValue B1 = DAG.getBuildVector(SingleTy, dl, {Ops.data()+HwLen, HwLen});
1899 SDValue L1 = Lower.LowerOperation(B1, DAG);
1900 // XXX CONCAT_VECTORS is legal for HVX vectors. Legalizing (lowering)
1901 // functions may expect to be called only for illegal operations, so
1902 // make sure that they are not called for legal ones. Develop a better
1903 // mechanism for dealing with this.
1904 LV = DAG.getNode(ISD::CONCAT_VECTORS, dl, ResTy, {L0, L1});
1905 } else {
1906 SDValue BV = DAG.getBuildVector(ResTy, dl, Ops);
1907 LV = Lower.LowerOperation(BV, DAG);
1908 }
1909
1910 assert(!N->use_empty());
1911 SDValue IS = DAG.getNode(HexagonISD::ISEL, dl, ResTy, LV);
1912 ISel.ReplaceNode(N, IS.getNode());
1913 select(IS.getNode());
1914 DAG.RemoveDeadNodes();
1915 return true;
1916}
1917
1919 unsigned Width) {
1920 auto possibilities = [](ArrayRef<uint8_t> Bs, unsigned Width) -> uint32_t {
1921 unsigned Impossible = ~(1u << Width) + 1;
1922 for (unsigned I = 0, E = Bs.size(); I != E; ++I) {
1923 uint8_t B = Bs[I];
1924 if (B == 0xff)
1925 continue;
1926 if (~Impossible == 0)
1927 break;
1928 for (unsigned Log = 0; Log != Width; ++Log) {
1929 if (Impossible & (1u << Log))
1930 continue;
1931 unsigned Expected = (I >> Log) % 2;
1932 if (B != Expected)
1933 Impossible |= (1u << Log);
1934 }
1935 }
1936 return ~Impossible;
1937 };
1938
1939 SmallVector<uint32_t, 8> Worklist(Width);
1940
1941 for (unsigned BitIdx = 0; BitIdx != Width; ++BitIdx) {
1942 SmallVector<uint8_t> BitValues(SM.Mask.size());
1943 for (int i = 0, e = SM.Mask.size(); i != e; ++i) {
1944 int M = SM.Mask[i];
1945 if (M < 0)
1946 BitValues[i] = 0xff;
1947 else
1948 BitValues[i] = (M & (1u << BitIdx)) != 0;
1949 }
1950 Worklist[BitIdx] = possibilities(BitValues, Width);
1951 }
1952
1953 // If there is a word P in Worklist that matches multiple possibilities,
1954 // then if any other word Q matches any of the possibilities matched by P,
1955 // then Q matches all the possibilities matched by P. In fact, P == Q.
1956 // In other words, for each words P, Q, the sets of possibilities matched
1957 // by P and Q are either equal or disjoint (no partial overlap).
1958 //
1959 // Illustration: For 4-bit values there are 4 complete sequences:
1960 // a: 0 1 0 1 0 1 0 1 0 1 0 1 0 1 0 1
1961 // b: 0 0 1 1 0 0 1 1 0 0 1 1 0 0 1 1
1962 // c: 0 0 0 0 1 1 1 1 0 0 0 0 1 1 1 1
1963 // d: 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1
1964 //
1965 // Words containing unknown bits that match two of the complete
1966 // sequences:
1967 // ab: 0 u u 1 0 u u 1 0 u u 1 0 u u 1
1968 // ac: 0 u 0 u u 1 u 1 0 u 0 u u 1 u 1
1969 // ad: 0 u 0 u 0 u 0 u u 1 u 1 u 1 u 1
1970 // bc: 0 0 u u u u 1 1 0 0 u u u u 1 1
1971 // bd: 0 0 u u 0 0 u u u u 1 1 u u 1 1
1972 // cd: 0 0 0 0 u u u u u u u u 1 1 1 1
1973 //
1974 // Proof of the claim above:
1975 // Let P be a word that matches s0 and s1. For that to happen, all known
1976 // bits in P must match s0 and s1 exactly.
1977 // Assume there is Q that matches s1. Note that since P and Q came from
1978 // the same shuffle mask, the positions of unknown bits in P and Q match
1979 // exactly, which makes the indices of known bits be exactly the same
1980 // between P and Q. Since P matches s0 and s1, the known bits of P much
1981 // match both s0 and s1. Also, since Q matches s1, the known bits in Q
1982 // are exactly the same as in s1, which means that they are exactly the
1983 // same as in P. This implies that P == Q.
1984
1985 // There can be a situation where there are more entries with the same
1986 // bits set than there are set bits (e.g. value 9 occurring more than 2
1987 // times). In such cases it will be impossible to complete this to a
1988 // perfect shuffle.
1989 SmallVector<uint32_t, 8> Sorted(Worklist);
1990 llvm::sort(Sorted);
1991
1992 for (unsigned I = 0, E = Sorted.size(); I != E;) {
1993 unsigned P = Sorted[I], Count = 1;
1994 while (++I != E && P == Sorted[I])
1995 ++Count;
1996 if ((unsigned)llvm::popcount(P) < Count) {
1997 // Reset all occurrences of P, if there are more occurrences of P
1998 // than there are bits in P.
1999 llvm::replace(Worklist, P, 0U);
2000 }
2001 }
2002
2003 return Worklist;
2004}
2005
2008 // Pick a completion if there are multiple possibilities. For now just
2009 // select any valid completion.
2010 SmallVector<uint32_t, 8> Comps(Completions);
2011
2012 for (unsigned I = 0; I != Width; ++I) {
2013 uint32_t P = Comps[I];
2014 assert(P != 0);
2015 if (isPowerOf2_32(P))
2016 continue;
2017 // T = least significant bit of P.
2018 uint32_t T = P ^ ((P - 1) & P);
2019 // Clear T in all remaining words matching P.
2020 for (unsigned J = I + 1; J != Width; ++J) {
2021 if (Comps[J] == P)
2022 Comps[J] ^= T;
2023 }
2024 Comps[I] = T;
2025 }
2026
2027#ifndef NDEBUG
2028 // Check that we have generated a valid completion.
2029 uint32_t OrAll = 0;
2030 for (uint32_t C : Comps) {
2032 OrAll |= C;
2033 }
2034 assert(OrAll == (1u << Width) -1);
2035#endif
2036
2037 return Comps;
2038}
2039
2040std::optional<int> HvxSelector::rotationDistance(ShuffleMask SM,
2041 unsigned WrapAt) {
2042 std::optional<int> Dist;
2043 for (int I = 0, E = SM.Mask.size(); I != E; ++I) {
2044 int M = SM.Mask[I];
2045 if (M < 0)
2046 continue;
2047 if (Dist) {
2048 if ((I + *Dist) % static_cast<int>(WrapAt) != M)
2049 return std::nullopt;
2050 } else {
2051 // Integer a%b operator assumes rounding towards zero by /, so it
2052 // "misbehaves" when a crosses 0 (the remainder also changes sign).
2053 // Add WrapAt in an attempt to keep I+Dist non-negative.
2054 Dist = M - I;
2055 if (Dist < 0)
2056 Dist = *Dist + WrapAt;
2057 }
2058 }
2059 return Dist;
2060}
2061
2062OpRef HvxSelector::contracting(ShuffleMask SM, OpRef Va, OpRef Vb,
2063 ResultStack &Results) {
2064 DEBUG_WITH_TYPE("isel", { dbgs() << __func__ << '\n'; });
2065 if (!Va.isValid() || !Vb.isValid())
2066 return OpRef::fail();
2067
2068 // Contracting shuffles, i.e. instructions that always discard some bytes
2069 // from the operand vectors.
2070 //
2071 // Funnel shifts
2072 // V6_vshuff{e,o}b
2073 // V6_vshuf{e,o}h
2074 // V6_vdealb4w
2075 // V6_vpack{e,o}{b,h}
2076
2077 int VecLen = SM.Mask.size();
2078
2079 // First, check for funnel shifts.
2080 if (auto Dist = rotationDistance(SM, 2 * VecLen)) {
2081 OpRef Funnel = funnels(Va, Vb, *Dist, Results);
2082 if (Funnel.isValid())
2083 return Funnel;
2084 }
2085
2086 MVT SingleTy = getSingleVT(MVT::i8);
2087 MVT PairTy = getPairVT(MVT::i8);
2088
2089 auto same = [](ArrayRef<int> Mask1, ArrayRef<int> Mask2) -> bool {
2090 return Mask1 == Mask2;
2091 };
2092
2093 using PackConfig = std::pair<unsigned, bool>;
2094 PackConfig Packs[] = {
2095 {1, false}, // byte, even
2096 {1, true}, // byte, odd
2097 {2, false}, // half, even
2098 {2, true}, // half, odd
2099 };
2100
2101 { // Check vpack
2102 unsigned Opcodes[] = {
2103 Hexagon::V6_vpackeb,
2104 Hexagon::V6_vpackob,
2105 Hexagon::V6_vpackeh,
2106 Hexagon::V6_vpackoh,
2107 };
2108 for (int i = 0, e = std::size(Opcodes); i != e; ++i) {
2109 auto [Size, Odd] = Packs[i];
2110 if (same(SM.Mask, shuffles::mask(shuffles::vpack, HwLen, Size, Odd))) {
2111 Results.push(Opcodes[i], SingleTy, {Vb, Va});
2112 return OpRef::res(Results.top());
2113 }
2114 }
2115 }
2116
2117 { // Check vshuff
2118 unsigned Opcodes[] = {
2119 Hexagon::V6_vshuffeb,
2120 Hexagon::V6_vshuffob,
2121 Hexagon::V6_vshufeh,
2122 Hexagon::V6_vshufoh,
2123 };
2124 for (int i = 0, e = std::size(Opcodes); i != e; ++i) {
2125 auto [Size, Odd] = Packs[i];
2126 if (same(SM.Mask, shuffles::mask(shuffles::vshuff, HwLen, Size, Odd))) {
2127 Results.push(Opcodes[i], SingleTy, {Vb, Va});
2128 return OpRef::res(Results.top());
2129 }
2130 }
2131 }
2132
2133 { // Check vdeal
2134 // There is no "V6_vdealeb", etc, but the supposed behavior of vdealeb
2135 // is equivalent to "(V6_vpackeb (V6_vdealvdd Vu, Vv, -2))". Other such
2136 // variants of "deal" can be done similarly.
2137 unsigned Opcodes[] = {
2138 Hexagon::V6_vpackeb,
2139 Hexagon::V6_vpackob,
2140 Hexagon::V6_vpackeh,
2141 Hexagon::V6_vpackoh,
2142 };
2143 const SDLoc &dl(Results.InpNode);
2144
2145 for (int i = 0, e = std::size(Opcodes); i != e; ++i) {
2146 auto [Size, Odd] = Packs[i];
2147 if (same(SM.Mask, shuffles::mask(shuffles::vdeal, HwLen, Size, Odd))) {
2148 Results.push(Hexagon::A2_tfrsi, MVT::i32,
2149 {getSignedConst32(-2 * Size, dl)});
2150 Results.push(Hexagon::V6_vdealvdd, PairTy, {Vb, Va, OpRef::res(-1)});
2151 auto vdeal = OpRef::res(Results.top());
2152 Results.push(Opcodes[i], SingleTy,
2153 {OpRef::hi(vdeal), OpRef::lo(vdeal)});
2154 return OpRef::res(Results.top());
2155 }
2156 }
2157 }
2158
2159 if (same(SM.Mask, shuffles::mask(shuffles::vdealb4w, HwLen))) {
2160 Results.push(Hexagon::V6_vdealb4w, SingleTy, {Vb, Va});
2161 return OpRef::res(Results.top());
2162 }
2163
2164 return OpRef::fail();
2165}
2166
2167OpRef HvxSelector::expanding(ShuffleMask SM, OpRef Va, ResultStack &Results) {
2168 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
2169 // Expanding shuffles (using all elements and inserting into larger vector):
2170 //
2171 // V6_vunpacku{b,h} [*]
2172 //
2173 // [*] Only if the upper elements (filled with 0s) are "don't care" in Mask.
2174 //
2175 // Note: V6_vunpacko{b,h} are or-ing the high byte/half in the result, so
2176 // they are not shuffles.
2177 //
2178 // The argument is a single vector.
2179
2180 int VecLen = SM.Mask.size();
2181 assert(2*HwLen == unsigned(VecLen) && "Expecting vector-pair type");
2182
2183 std::pair<int,unsigned> Strip = findStrip(SM.Mask, 1, VecLen);
2184
2185 // The patterns for the unpacks, in terms of the starting offsets of the
2186 // consecutive strips (L = length of the strip, N = VecLen):
2187 //
2188 // vunpacku: 0, -1, L, -1, 2L, -1 ...
2189
2190 if (Strip.first != 0)
2191 return OpRef::fail();
2192
2193 // The vunpackus only handle byte and half-word.
2194 if (Strip.second != 1 && Strip.second != 2)
2195 return OpRef::fail();
2196
2197 int N = VecLen;
2198 int L = Strip.second;
2199
2200 // First, check the non-ignored strips.
2201 for (int I = 2*L; I < N; I += 2*L) {
2202 auto S = findStrip(SM.Mask.drop_front(I), 1, N-I);
2203 if (S.second != unsigned(L))
2204 return OpRef::fail();
2205 if (2*S.first != I)
2206 return OpRef::fail();
2207 }
2208 // Check the -1s.
2209 for (int I = L; I < N; I += 2*L) {
2210 auto S = findStrip(SM.Mask.drop_front(I), 0, N-I);
2211 if (S.first != -1 || S.second != unsigned(L))
2212 return OpRef::fail();
2213 }
2214
2215 unsigned Opc = Strip.second == 1 ? Hexagon::V6_vunpackub
2216 : Hexagon::V6_vunpackuh;
2217 Results.push(Opc, getPairVT(MVT::i8), {Va});
2218 return OpRef::res(Results.top());
2219}
2220
2221OpRef HvxSelector::perfect(ShuffleMask SM, OpRef Va, ResultStack &Results) {
2222 DEBUG_WITH_TYPE("isel", { dbgs() << __func__ << '\n'; });
2223 // V6_vdeal{b,h}
2224 // V6_vshuff{b,h}
2225
2226 // V6_vshufoe{b,h} those are equivalent to vshuffvdd(..,{1,2})
2227 // V6_vshuffvdd (V6_vshuff)
2228 // V6_dealvdd (V6_vdeal)
2229
2230 int VecLen = SM.Mask.size();
2231 assert(isPowerOf2_32(VecLen) && Log2_32(VecLen) <= 8);
2232 unsigned LogLen = Log2_32(VecLen);
2233 unsigned HwLog = Log2_32(HwLen);
2234 // The result length must be the same as the length of a single vector,
2235 // or a vector pair.
2236 assert(LogLen == HwLog || LogLen == HwLog + 1);
2237 bool HavePairs = LogLen == HwLog + 1;
2238
2239 SmallVector<unsigned, 8> Perm(LogLen);
2240
2241 // Check if this could be a perfect shuffle, or a combination of perfect
2242 // shuffles.
2243 //
2244 // Consider this permutation (using hex digits to make the ASCII diagrams
2245 // easier to read):
2246 // { 0, 8, 1, 9, 2, A, 3, B, 4, C, 5, D, 6, E, 7, F }.
2247 // This is a "deal" operation: divide the input into two halves, and
2248 // create the output by picking elements by alternating between these two
2249 // halves:
2250 // 0 1 2 3 4 5 6 7 --> 0 8 1 9 2 A 3 B 4 C 5 D 6 E 7 F [*]
2251 // 8 9 A B C D E F
2252 //
2253 // Aside from a few special explicit cases (V6_vdealb, etc.), HVX provides
2254 // a somwehat different mechanism that could be used to perform shuffle/
2255 // deal operations: a 2x2 transpose.
2256 // Consider the halves of inputs again, they can be interpreted as a 2x8
2257 // matrix. A 2x8 matrix can be looked at four 2x2 matrices concatenated
2258 // together. Now, when considering 2 elements at a time, it will be a 2x4
2259 // matrix (with elements 01, 23, 45, etc.), or two 2x2 matrices:
2260 // 01 23 45 67
2261 // 89 AB CD EF
2262 // With groups of 4, this will become a single 2x2 matrix, and so on.
2263 //
2264 // The 2x2 transpose instruction works by transposing each of the 2x2
2265 // matrices (or "sub-matrices"), given a specific group size. For example,
2266 // if the group size is 1 (i.e. each element is its own group), there
2267 // will be four transposes of the four 2x2 matrices that form the 2x8.
2268 // For example, with the inputs as above, the result will be:
2269 // 0 8 2 A 4 C 6 E
2270 // 1 9 3 B 5 D 7 F
2271 // Now, this result can be transposed again, but with the group size of 2:
2272 // 08 19 4C 5D
2273 // 2A 3B 6E 7F
2274 // If we then transpose that result, but with the group size of 4, we get:
2275 // 0819 2A3B
2276 // 4C5D 6E7F
2277 // If we concatenate these two rows, it will be
2278 // 0 8 1 9 2 A 3 B 4 C 5 D 6 E 7 F
2279 // which is the same as the "deal" [*] above.
2280 //
2281 // In general, a "deal" of individual elements is a series of 2x2 transposes,
2282 // with changing group size. HVX has two instructions:
2283 // Vdd = V6_vdealvdd Vu, Vv, Rt
2284 // Vdd = V6_shufvdd Vu, Vv, Rt
2285 // that perform exactly that. The register Rt controls which transposes are
2286 // going to happen: a bit at position n (counting from 0) indicates that a
2287 // transpose with a group size of 2^n will take place. If multiple bits are
2288 // set, multiple transposes will happen: vdealvdd will perform them starting
2289 // with the largest group size, vshuffvdd will do them in the reverse order.
2290 //
2291 // The main observation is that each 2x2 transpose corresponds to swapping
2292 // columns of bits in the binary representation of the values.
2293 //
2294 // The numbers {3,2,1,0} and the log2 of the number of contiguous 1 bits
2295 // in a given column. The * denote the columns that will be swapped.
2296 // The transpose with the group size 2^n corresponds to swapping columns
2297 // 3 (the highest log) and log2(n):
2298 //
2299 // 3 2 1 0 0 2 1 3 0 2 3 1
2300 // * * * * * *
2301 // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
2302 // 1 0 0 0 1 8 1 0 0 0 8 1 0 0 0 8 1 0 0 0
2303 // 2 0 0 1 0 2 0 0 1 0 1 0 0 0 1 1 0 0 0 1
2304 // 3 0 0 1 1 A 1 0 1 0 9 1 0 0 1 9 1 0 0 1
2305 // 4 0 1 0 0 4 0 1 0 0 4 0 1 0 0 2 0 0 1 0
2306 // 5 0 1 0 1 C 1 1 0 0 C 1 1 0 0 A 1 0 1 0
2307 // 6 0 1 1 0 6 0 1 1 0 5 0 1 0 1 3 0 0 1 1
2308 // 7 0 1 1 1 E 1 1 1 0 D 1 1 0 1 B 1 0 1 1
2309 // 8 1 0 0 0 1 0 0 0 1 2 0 0 1 0 4 0 1 0 0
2310 // 9 1 0 0 1 9 1 0 0 1 A 1 0 1 0 C 1 1 0 0
2311 // A 1 0 1 0 3 0 0 1 1 3 0 0 1 1 5 0 1 0 1
2312 // B 1 0 1 1 B 1 0 1 1 B 1 0 1 1 D 1 1 0 1
2313 // C 1 1 0 0 5 0 1 0 1 6 0 1 1 0 6 0 1 1 0
2314 // D 1 1 0 1 D 1 1 0 1 E 1 1 1 0 E 1 1 1 0
2315 // E 1 1 1 0 7 0 1 1 1 7 0 1 1 1 7 0 1 1 1
2316 // F 1 1 1 1 F 1 1 1 1 F 1 1 1 1 F 1 1 1 1
2317
2318 // There is one special case that is not a perfect shuffle, but can be
2319 // turned into one easily: when the shuffle operates on a vector pair,
2320 // but the two vectors in the pair are swapped. The code that identifies
2321 // perfect shuffles will reject it, unless the order is reversed.
2322 shuffles::MaskT MaskStorage(SM.Mask);
2323 bool InvertedPair = false;
2324 if (HavePairs && SM.Mask[0] >= int(HwLen)) {
2325 for (int i = 0, e = SM.Mask.size(); i != e; ++i) {
2326 int M = SM.Mask[i];
2327 MaskStorage[i] = M >= int(HwLen) ? M - HwLen : M + HwLen;
2328 }
2329 InvertedPair = true;
2330 SM = ShuffleMask(MaskStorage);
2331 }
2332
2333 auto Comps = getPerfectCompletions(SM, LogLen);
2334 if (llvm::is_contained(Comps, 0))
2335 return OpRef::fail();
2336
2337 auto Pick = completeToPerfect(Comps, LogLen);
2338 for (unsigned I = 0; I != LogLen; ++I)
2339 Perm[I] = Log2_32(Pick[I]);
2340
2341 // Once we have Perm, represent it as cycles. Denote the maximum log2
2342 // (equal to log2(VecLen)-1) as M. The cycle containing M can then be
2343 // written as (M a1 a2 a3 ... an). That cycle can be broken up into
2344 // simple swaps as (M a1)(M a2)(M a3)...(M an), with the composition
2345 // order being from left to right. Any (contiguous) segment where the
2346 // values ai, ai+1...aj are either all increasing or all decreasing,
2347 // can be implemented via a single vshuffvdd/vdealvdd respectively.
2348 //
2349 // If there is a cycle (a1 a2 ... an) that does not involve M, it can
2350 // be written as (M an)(a1 a2 ... an)(M a1). The first two cycles can
2351 // then be folded to get (M a1 a2 ... an)(M a1), and the above procedure
2352 // can be used to generate a sequence of vshuffvdd/vdealvdd.
2353 //
2354 // Example:
2355 // Assume M = 4 and consider a permutation (0 1)(2 3). It can be written
2356 // as (4 0 1)(4 0) composed with (4 2 3)(4 2), or simply
2357 // (4 0 1)(4 0)(4 2 3)(4 2).
2358 // It can then be expanded into swaps as
2359 // (4 0)(4 1)(4 0)(4 2)(4 3)(4 2),
2360 // and broken up into "increasing" segments as
2361 // [(4 0)(4 1)] [(4 0)(4 2)(4 3)] [(4 2)].
2362 // This is equivalent to
2363 // (4 0 1)(4 0 2 3)(4 2),
2364 // which can be implemented as 3 vshufvdd instructions.
2365
2366 using CycleType = SmallVector<unsigned, 8>;
2367 std::set<CycleType> Cycles;
2368 std::set<unsigned> All;
2369
2370 for (unsigned I : Perm)
2371 All.insert(I);
2372
2373 // If the cycle contains LogLen-1, move it to the front of the cycle.
2374 // Otherwise, return the cycle unchanged.
2375 auto canonicalize = [LogLen](const CycleType &C) -> CycleType {
2376 unsigned LogPos, N = C.size();
2377 for (LogPos = 0; LogPos != N; ++LogPos)
2378 if (C[LogPos] == LogLen - 1)
2379 break;
2380 if (LogPos == N)
2381 return C;
2382
2383 CycleType NewC(C.begin() + LogPos, C.end());
2384 NewC.append(C.begin(), C.begin() + LogPos);
2385 return NewC;
2386 };
2387
2388 auto pfs = [](const std::set<CycleType> &Cs, unsigned Len) {
2389 // Ordering: shuff: 5 0 1 2 3 4, deal: 5 4 3 2 1 0 (for Log=6),
2390 // for bytes zero is included, for halfwords is not.
2391 if (Cs.size() != 1)
2392 return 0u;
2393 const CycleType &C = *Cs.begin();
2394 if (C[0] != Len - 1)
2395 return 0u;
2396 int D = Len - C.size();
2397 if (D != 0 && D != 1)
2398 return 0u;
2399
2400 bool IsDeal = true, IsShuff = true;
2401 for (unsigned I = 1; I != Len - D; ++I) {
2402 if (C[I] != Len - 1 - I)
2403 IsDeal = false;
2404 if (C[I] != I - (1 - D)) // I-1, I
2405 IsShuff = false;
2406 }
2407 // At most one, IsDeal or IsShuff, can be non-zero.
2408 assert(!(IsDeal || IsShuff) || IsDeal != IsShuff);
2409 static unsigned Deals[] = {Hexagon::V6_vdealb, Hexagon::V6_vdealh};
2410 static unsigned Shufs[] = {Hexagon::V6_vshuffb, Hexagon::V6_vshuffh};
2411 return IsDeal ? Deals[D] : (IsShuff ? Shufs[D] : 0);
2412 };
2413
2414 while (!All.empty()) {
2415 unsigned A = *All.begin();
2416 All.erase(A);
2417 CycleType C;
2418 C.push_back(A);
2419 for (unsigned B = Perm[A]; B != A; B = Perm[B]) {
2420 C.push_back(B);
2421 All.erase(B);
2422 }
2423 if (C.size() <= 1)
2424 continue;
2425 Cycles.insert(canonicalize(C));
2426 }
2427
2428 MVT SingleTy = getSingleVT(MVT::i8);
2429 MVT PairTy = getPairVT(MVT::i8);
2430
2431 // Recognize patterns for V6_vdeal{b,h} and V6_vshuff{b,h}.
2432 if (unsigned(VecLen) == HwLen) {
2433 if (unsigned SingleOpc = pfs(Cycles, LogLen)) {
2434 Results.push(SingleOpc, SingleTy, {Va});
2435 return OpRef::res(Results.top());
2436 }
2437 }
2438
2439 // From the cycles, construct the sequence of values that will
2440 // then form the control values for vdealvdd/vshuffvdd, i.e.
2441 // (M a1 a2)(M a3 a4 a5)... -> a1 a2 a3 a4 a5
2442 // This essentially strips the M value from the cycles where
2443 // it's present, and performs the insertion of M (then stripping)
2444 // for cycles without M (as described in an earlier comment).
2445 SmallVector<unsigned, 8> SwapElems;
2446 // When the input is extended (i.e. single vector becomes a pair),
2447 // this is done by using an "undef" vector as the second input.
2448 // However, then we get
2449 // input 1: GOODBITS
2450 // input 2: ........
2451 // but we need
2452 // input 1: ....BITS
2453 // input 2: ....GOOD
2454 // Then at the end, this needs to be undone. To accomplish this,
2455 // artificially add "LogLen-1" at both ends of the sequence.
2456 if (!HavePairs)
2457 SwapElems.push_back(LogLen - 1);
2458 for (const CycleType &C : Cycles) {
2459 // Do the transformation: (a1..an) -> (M a1..an)(M a1).
2460 unsigned First = (C[0] == LogLen - 1) ? 1 : 0;
2461 SwapElems.append(C.begin() + First, C.end());
2462 if (First == 0)
2463 SwapElems.push_back(C[0]);
2464 }
2465 if (!HavePairs)
2466 SwapElems.push_back(LogLen - 1);
2467
2468 const SDLoc &dl(Results.InpNode);
2469 OpRef Arg = HavePairs ? Va : concats(Va, OpRef::undef(SingleTy), Results);
2470 if (InvertedPair)
2471 Arg = concats(OpRef::hi(Arg), OpRef::lo(Arg), Results);
2472
2473 for (unsigned I = 0, E = SwapElems.size(); I != E;) {
2474 bool IsInc = I == E - 1 || SwapElems[I] < SwapElems[I + 1];
2475 unsigned S = (1u << SwapElems[I]);
2476 if (I < E - 1) {
2477 while (++I < E - 1 && IsInc == (SwapElems[I] < SwapElems[I + 1]))
2478 S |= 1u << SwapElems[I];
2479 // The above loop will not add a bit for the final SwapElems[I+1],
2480 // so add it here.
2481 S |= 1u << SwapElems[I];
2482 }
2483 ++I;
2484
2485 // Upper bits of the vdeal/vshuff parameter that do not cover any byte in
2486 // the vector are ignored. Technically, A2_tfrsi takes a signed value, which
2487 // is sign-extended to 32 bit if there is no extender. The practical
2488 // advantages are that signed values are smaller in common use cases and are
2489 // not sensitive to the vector size.
2490 int SS = SignExtend32(S, HwLog);
2491
2492 NodeTemplate Res;
2493 Results.push(Hexagon::A2_tfrsi, MVT::i32, {getSignedConst32(SS, dl)});
2494 Res.Opc = IsInc ? Hexagon::V6_vshuffvdd : Hexagon::V6_vdealvdd;
2495 Res.Ty = PairTy;
2496 Res.Ops = {OpRef::hi(Arg), OpRef::lo(Arg), OpRef::res(-1)};
2497 Results.push(Res);
2498 Arg = OpRef::res(Results.top());
2499 }
2500
2501 return HavePairs ? Arg : OpRef::lo(Arg);
2502}
2503
2504OpRef HvxSelector::butterfly(ShuffleMask SM, OpRef Va, ResultStack &Results) {
2505 DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
2506 // Butterfly shuffles.
2507 //
2508 // V6_vdelta
2509 // V6_vrdelta
2510 // V6_vror
2511
2512 // The assumption here is that all elements picked by Mask are in the
2513 // first operand to the vector_shuffle. This assumption is enforced
2514 // by the caller.
2515
2516 MVT ResTy = getSingleVT(MVT::i8);
2517 PermNetwork::Controls FC, RC;
2518 const SDLoc &dl(Results.InpNode);
2519 int VecLen = SM.Mask.size();
2520
2521 for (int M : SM.Mask) {
2522 if (M != -1 && M >= VecLen)
2523 return OpRef::fail();
2524 }
2525
2526 // Try the deltas/benes for both single vectors and vector pairs.
2527 ForwardDeltaNetwork FN(SM.Mask);
2528 if (FN.run(FC)) {
2529 SDValue Ctl = getVectorConstant(FC, dl);
2530 Results.push(Hexagon::V6_vdelta, ResTy, {Va, OpRef(Ctl)});
2531 return OpRef::res(Results.top());
2532 }
2533
2534 // Try reverse delta.
2535 ReverseDeltaNetwork RN(SM.Mask);
2536 if (RN.run(RC)) {
2537 SDValue Ctl = getVectorConstant(RC, dl);
2538 Results.push(Hexagon::V6_vrdelta, ResTy, {Va, OpRef(Ctl)});
2539 return OpRef::res(Results.top());
2540 }
2541
2542 // Do Benes.
2543 BenesNetwork BN(SM.Mask);
2544 if (BN.run(FC, RC)) {
2545 SDValue CtlF = getVectorConstant(FC, dl);
2546 SDValue CtlR = getVectorConstant(RC, dl);
2547 Results.push(Hexagon::V6_vdelta, ResTy, {Va, OpRef(CtlF)});
2548 Results.push(Hexagon::V6_vrdelta, ResTy,
2549 {OpRef::res(-1), OpRef(CtlR)});
2550 return OpRef::res(Results.top());
2551 }
2552
2553 return OpRef::fail();
2554}
2555
2556SDValue HvxSelector::getConst32(unsigned Val, const SDLoc &dl) {
2557 return DAG.getTargetConstant(Val, dl, MVT::i32);
2558}
2559
2560SDValue HvxSelector::getSignedConst32(int Val, const SDLoc &dl) {
2561 return DAG.getSignedTargetConstant(Val, dl, MVT::i32);
2562}
2563
2564SDValue HvxSelector::getVectorConstant(ArrayRef<uint8_t> Data,
2565 const SDLoc &dl) {
2567 for (uint8_t C : Data)
2568 Elems.push_back(DAG.getConstant(C, dl, MVT::i8));
2569 MVT VecTy = MVT::getVectorVT(MVT::i8, Data.size());
2570 SDValue BV = DAG.getBuildVector(VecTy, dl, Elems);
2571 SDValue LV = Lower.LowerOperation(BV, DAG);
2572 DAG.RemoveDeadNode(BV.getNode());
2573 return DAG.getNode(HexagonISD::ISEL, dl, VecTy, LV);
2574}
2575
2577 SDValue Inp = N->getOperand(0);
2578 MVT ResTy = N->getValueType(0).getSimpleVT();
2579 unsigned Idx = N->getConstantOperandVal(1);
2580
2581 [[maybe_unused]] MVT InpTy = Inp.getValueType().getSimpleVT();
2582 [[maybe_unused]] unsigned ResLen = ResTy.getVectorNumElements();
2584 assert(2 * ResLen == InpTy.getVectorNumElements());
2585 assert(Idx == 0 || Idx == ResLen);
2586
2587 unsigned SubReg = Idx == 0 ? Hexagon::vsub_lo : Hexagon::vsub_hi;
2588 SDValue Ext = DAG.getTargetExtractSubreg(SubReg, SDLoc(N), ResTy, Inp);
2589
2590 ISel.ReplaceNode(N, Ext.getNode());
2591}
2592
2594 DEBUG_WITH_TYPE("isel", {
2595 dbgs() << "Starting " << __func__ << " on node:\n";
2596 N->dump(&DAG);
2597 });
2598 MVT ResTy = N->getValueType(0).getSimpleVT();
2599 // Assume that vector shuffles operate on vectors of bytes.
2600 assert(ResTy.isVectorOf(MVT::i8));
2601
2602 auto *SN = cast<ShuffleVectorSDNode>(N);
2603 std::vector<int> Mask(SN->getMask().begin(), SN->getMask().end());
2604 // This shouldn't really be necessary. Is it?
2605 for (int &Idx : Mask)
2606 if (Idx != -1 && Idx < 0)
2607 Idx = -1;
2608
2609 unsigned VecLen = Mask.size();
2610 bool HavePairs = (2*HwLen == VecLen);
2611 assert(ResTy.getSizeInBits() / 8 == VecLen);
2612
2613 // Vd = vector_shuffle Va, Vb, Mask
2614 //
2615
2616 bool UseLeft = false, UseRight = false;
2617 for (unsigned I = 0; I != VecLen; ++I) {
2618 if (Mask[I] == -1)
2619 continue;
2620 unsigned Idx = Mask[I];
2621 assert(Idx < 2*VecLen);
2622 if (Idx < VecLen)
2623 UseLeft = true;
2624 else
2625 UseRight = true;
2626 }
2627
2628 DEBUG_WITH_TYPE("isel", {
2629 dbgs() << "VecLen=" << VecLen << " HwLen=" << HwLen << " UseLeft="
2630 << UseLeft << " UseRight=" << UseRight << " HavePairs="
2631 << HavePairs << '\n';
2632 });
2633 // If the mask is all -1's, generate "undef".
2634 if (!UseLeft && !UseRight) {
2635 ISel.ReplaceNode(N, ISel.selectUndef(SDLoc(SN), ResTy).getNode());
2636 return;
2637 }
2638
2639 SDValue Vec0 = N->getOperand(0);
2640 SDValue Vec1 = N->getOperand(1);
2641 assert(Vec0.getValueType() == ResTy && Vec1.getValueType() == ResTy);
2642
2643 ResultStack Results(SN);
2644 OpRef Va = OpRef::undef(ResTy);
2645 OpRef Vb = OpRef::undef(ResTy);
2646
2647 if (!Vec0.isUndef()) {
2648 Results.push(TargetOpcode::COPY, ResTy, {Vec0});
2649 Va = OpRef::OpRef::res(Results.top());
2650 }
2651 if (!Vec1.isUndef()) {
2652 Results.push(TargetOpcode::COPY, ResTy, {Vec1});
2653 Vb = OpRef::res(Results.top());
2654 }
2655
2656 OpRef Res = !HavePairs ? shuffs2(ShuffleMask(Mask), Va, Vb, Results)
2657 : shuffp2(ShuffleMask(Mask), Va, Vb, Results);
2658
2659 bool Done = Res.isValid();
2660 if (Done) {
2661 // Make sure that Res is on the stack before materializing.
2662 Results.push(TargetOpcode::COPY, ResTy, {Res});
2663 materialize(Results);
2664 } else {
2665 Done = scalarizeShuffle(Mask, SDLoc(N), ResTy, Vec0, Vec1, N);
2666 }
2667
2668 if (!Done) {
2669#ifndef NDEBUG
2670 dbgs() << "Unhandled shuffle:\n";
2671 SN->dumpr(&DAG);
2672#endif
2673 llvm_unreachable("Failed to select vector shuffle");
2674 }
2675}
2676
2678 // If this is a rotation by less than 8, use V6_valignbi.
2679 MVT Ty = N->getValueType(0).getSimpleVT();
2680 const SDLoc &dl(N);
2681 SDValue VecV = N->getOperand(0);
2682 SDValue RotV = N->getOperand(1);
2683 SDNode *NewN = nullptr;
2684
2685 if (auto *CN = dyn_cast<ConstantSDNode>(RotV.getNode())) {
2686 unsigned S = CN->getZExtValue() % HST.getVectorLength();
2687 if (S == 0) {
2688 NewN = VecV.getNode();
2689 } else if (isUInt<3>(S)) {
2690 NewN = DAG.getMachineNode(Hexagon::V6_valignbi, dl, Ty,
2691 {VecV, VecV, getConst32(S, dl)});
2692 }
2693 }
2694
2695 if (!NewN)
2696 NewN = DAG.getMachineNode(Hexagon::V6_vror, dl, Ty, {VecV, RotV});
2697
2698 ISel.ReplaceNode(N, NewN);
2699}
2700
2702 SDValue Vv = N->getOperand(0);
2703 SDValue Vu = N->getOperand(1);
2704 SDValue Rt = N->getOperand(2);
2705 SDNode *NewN = DAG.getMachineNode(Hexagon::V6_valignb, SDLoc(N),
2706 N->getValueType(0), {Vv, Vu, Rt});
2707 ISel.ReplaceNode(N, NewN);
2708 DAG.RemoveDeadNode(N);
2709}
2710
2711void HexagonDAGToDAGISel::PreprocessHvxISelDAG() {
2712 auto getNodes = [this]() -> std::vector<SDNode *> {
2713 std::vector<SDNode *> T;
2714 T.reserve(CurDAG->allnodes_size());
2715 for (SDNode &N : CurDAG->allnodes())
2716 T.push_back(&N);
2717 return T;
2718 };
2719
2720 ppHvxShuffleOfShuffle(getNodes());
2721}
2722
2723template <> struct std::hash<SDValue> {
2724 std::size_t operator()(SDValue V) const {
2725 return std::hash<const void *>()(V.getNode()) +
2726 std::hash<unsigned>()(V.getResNo());
2727 };
2728};
2729
2730void HexagonDAGToDAGISel::ppHvxShuffleOfShuffle(std::vector<SDNode *> &&Nodes) {
2731 // Motivating case:
2732 // t10: v64i32 = ...
2733 // t46: v128i8 = vector_shuffle<...> t44, t45
2734 // t48: v128i8 = vector_shuffle<...> t44, t45
2735 // t42: v128i8 = vector_shuffle<...> t46, t48
2736 // t12: v32i32 = extract_subvector t10, Constant:i32<0>
2737 // t44: v128i8 = bitcast t12
2738 // t15: v32i32 = extract_subvector t10, Constant:i32<32>
2739 // t45: v128i8 = bitcast t15
2740 SelectionDAG &DAG = *CurDAG;
2741 unsigned HwLen = HST->getVectorLength();
2742
2743 struct SubVectorInfo {
2744 SubVectorInfo(SDValue S, unsigned H) : Src(S), HalfIdx(H) {}
2745 SDValue Src;
2746 unsigned HalfIdx;
2747 };
2748
2749 using MapType = DenseMap<SDValue, unsigned>;
2750
2751 auto getMaskElt = [&](unsigned Idx, ShuffleVectorSDNode *Shuff0,
2752 ShuffleVectorSDNode *Shuff1,
2753 const MapType &OpMap) -> int {
2754 // Treat Shuff0 and Shuff1 as operands to another vector shuffle, and
2755 // Idx as a (non-undef) element of the top level shuffle's mask, that
2756 // is, index into concat(Shuff0, Shuff1).
2757 // Assuming that Shuff0 and Shuff1 both operate on subvectors of the
2758 // same source vector (as described by OpMap), return the index of
2759 // that source vector corresponding to Idx.
2760 ShuffleVectorSDNode *OpShuff = Idx < HwLen ? Shuff0 : Shuff1;
2761 if (Idx >= HwLen)
2762 Idx -= HwLen;
2763
2764 // Get the mask index that M points at in the corresponding operand.
2765 int MaybeN = OpShuff->getMaskElt(Idx);
2766 if (MaybeN < 0)
2767 return -1;
2768
2769 auto N = static_cast<unsigned>(MaybeN);
2770 unsigned SrcBase = N < HwLen ? OpMap.at(OpShuff->getOperand(0))
2771 : OpMap.at(OpShuff->getOperand(1));
2772 if (N >= HwLen)
2773 N -= HwLen;
2774
2775 return N + SrcBase;
2776 };
2777
2778 auto fold3 = [&](SDValue TopShuff, SDValue Inp, MapType &&OpMap) -> SDValue {
2779 // Fold all 3 shuffles into a single one.
2780 auto *This = cast<ShuffleVectorSDNode>(TopShuff);
2781 auto *S0 = cast<ShuffleVectorSDNode>(TopShuff.getOperand(0));
2782 auto *S1 = cast<ShuffleVectorSDNode>(TopShuff.getOperand(1));
2783 ArrayRef<int> TopMask = This->getMask();
2784 // This should be guaranteed by type checks in the caller, and the fact
2785 // that all shuffles should have been promoted to operate on MVT::i8.
2786 assert(TopMask.size() == S0->getMask().size() &&
2787 TopMask.size() == S1->getMask().size());
2788 assert(TopMask.size() == HwLen);
2789
2790 SmallVector<int, 256> FoldedMask(2 * HwLen);
2791 for (unsigned I = 0; I != HwLen; ++I) {
2792 int MaybeM = TopMask[I];
2793 if (MaybeM >= 0) {
2794 FoldedMask[I] =
2795 getMaskElt(static_cast<unsigned>(MaybeM), S0, S1, OpMap);
2796 } else {
2797 FoldedMask[I] = -1;
2798 }
2799 }
2800 // The second half of the result will be all-undef.
2801 std::fill(FoldedMask.begin() + HwLen, FoldedMask.end(), -1);
2802
2803 // Return
2804 // FoldedShuffle = (Shuffle Inp, undef, FoldedMask)
2805 // (LoHalf FoldedShuffle)
2806 const SDLoc &dl(TopShuff);
2807 MVT SingleTy = MVT::getVectorVT(MVT::i8, HwLen);
2808 MVT PairTy = MVT::getVectorVT(MVT::i8, 2 * HwLen);
2809 SDValue FoldedShuff =
2810 DAG.getVectorShuffle(PairTy, dl, DAG.getBitcast(PairTy, Inp),
2811 DAG.getUNDEF(PairTy), FoldedMask);
2812 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SingleTy, FoldedShuff,
2813 DAG.getConstant(0, dl, MVT::i32));
2814 };
2815
2816 auto getSourceInfo = [](SDValue V) -> std::optional<SubVectorInfo> {
2817 while (V.getOpcode() == ISD::BITCAST)
2818 V = V.getOperand(0);
2819 if (V.getOpcode() != ISD::EXTRACT_SUBVECTOR)
2820 return std::nullopt;
2821 return SubVectorInfo(V.getOperand(0),
2822 !cast<ConstantSDNode>(V.getOperand(1))->isZero());
2823 };
2824
2825 for (SDNode *N : Nodes) {
2826 if (N->getOpcode() != ISD::VECTOR_SHUFFLE)
2827 continue;
2828 EVT ResTy = N->getValueType(0);
2829 if (ResTy.getVectorElementType() != MVT::i8)
2830 continue;
2831 if (ResTy.getVectorNumElements() != HwLen)
2832 continue;
2833
2834 SDValue V0 = N->getOperand(0);
2835 SDValue V1 = N->getOperand(1);
2836 if (V0.getOpcode() != ISD::VECTOR_SHUFFLE)
2837 continue;
2838 if (V1.getOpcode() != ISD::VECTOR_SHUFFLE)
2839 continue;
2840 if (V0.getValueType() != ResTy || V1.getValueType() != ResTy)
2841 continue;
2842
2843 // Check if all operands of the two operand shuffles are extract_subvectors
2844 // from the same vector pair.
2845 auto V0A = getSourceInfo(V0.getOperand(0));
2846 if (!V0A.has_value())
2847 continue;
2848 auto V0B = getSourceInfo(V0.getOperand(1));
2849 if (!V0B.has_value() || V0B->Src != V0A->Src)
2850 continue;
2851 auto V1A = getSourceInfo(V1.getOperand(0));
2852 if (!V1A.has_value() || V1A->Src != V0A->Src)
2853 continue;
2854 auto V1B = getSourceInfo(V1.getOperand(1));
2855 if (!V1B.has_value() || V1B->Src != V0A->Src)
2856 continue;
2857
2858 // The source must be a pair. This should be guaranteed here,
2859 // but check just in case.
2860 assert(V0A->Src.getValueType().getSizeInBits() == 16 * HwLen);
2861
2862 MapType OpMap = {
2863 {V0.getOperand(0), V0A->HalfIdx * HwLen},
2864 {V0.getOperand(1), V0B->HalfIdx * HwLen},
2865 {V1.getOperand(0), V1A->HalfIdx * HwLen},
2866 {V1.getOperand(1), V1B->HalfIdx * HwLen},
2867 };
2868 SDValue NewS = fold3(SDValue(N, 0), V0A->Src, std::move(OpMap));
2869 ReplaceNode(N, NewS.getNode());
2870 }
2871}
2872
2873void HexagonDAGToDAGISel::SelectHvxExtractSubvector(SDNode *N) {
2874 HvxSelector(*this, *CurDAG).selectExtractSubvector(N);
2875}
2876
2877void HexagonDAGToDAGISel::SelectHvxShuffle(SDNode *N) {
2878 HvxSelector(*this, *CurDAG).selectShuffle(N);
2879}
2880
2881void HexagonDAGToDAGISel::SelectHvxRor(SDNode *N) {
2882 HvxSelector(*this, *CurDAG).selectRor(N);
2883}
2884
2885void HexagonDAGToDAGISel::SelectHvxVAlign(SDNode *N) {
2886 HvxSelector(*this, *CurDAG).selectVAlign(N);
2887}
2888
2890 const SDLoc &dl(N);
2891 SDValue Chain = N->getOperand(0);
2892 SDValue Address = N->getOperand(2);
2893 SDValue Predicate = N->getOperand(3);
2894 SDValue Base = N->getOperand(4);
2895 SDValue Modifier = N->getOperand(5);
2896 SDValue Offset = N->getOperand(6);
2897 SDValue ImmOperand = CurDAG->getTargetConstant(0, dl, MVT::i32);
2898
2899 unsigned Opcode;
2900 unsigned IntNo = N->getConstantOperandVal(1);
2901 switch (IntNo) {
2902 default:
2903 llvm_unreachable("Unexpected HVX gather intrinsic.");
2904 case Intrinsic::hexagon_V6_vgathermhq:
2905 case Intrinsic::hexagon_V6_vgathermhq_128B:
2906 Opcode = Hexagon::V6_vgathermhq_pseudo;
2907 break;
2908 case Intrinsic::hexagon_V6_vgathermwq:
2909 case Intrinsic::hexagon_V6_vgathermwq_128B:
2910 Opcode = Hexagon::V6_vgathermwq_pseudo;
2911 break;
2912 case Intrinsic::hexagon_V6_vgathermhwq:
2913 case Intrinsic::hexagon_V6_vgathermhwq_128B:
2914 Opcode = Hexagon::V6_vgathermhwq_pseudo;
2915 break;
2916 }
2917
2918 SDVTList VTs = CurDAG->getVTList(MVT::Other);
2919 SDValue Ops[] = { Address, ImmOperand,
2920 Predicate, Base, Modifier, Offset, Chain };
2921 SDNode *Result = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
2922
2924 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
2925
2926 ReplaceNode(N, Result);
2927}
2928
2930 const SDLoc &dl(N);
2931 SDValue Chain = N->getOperand(0);
2932 SDValue Address = N->getOperand(2);
2933 SDValue Base = N->getOperand(3);
2934 SDValue Modifier = N->getOperand(4);
2935 SDValue Offset = N->getOperand(5);
2936 SDValue ImmOperand = CurDAG->getTargetConstant(0, dl, MVT::i32);
2937
2938 unsigned Opcode;
2939 unsigned IntNo = N->getConstantOperandVal(1);
2940 switch (IntNo) {
2941 default:
2942 llvm_unreachable("Unexpected HVX gather intrinsic.");
2943 case Intrinsic::hexagon_V6_vgathermh:
2944 case Intrinsic::hexagon_V6_vgathermh_128B:
2945 Opcode = Hexagon::V6_vgathermh_pseudo;
2946 break;
2947 case Intrinsic::hexagon_V6_vgathermw:
2948 case Intrinsic::hexagon_V6_vgathermw_128B:
2949 Opcode = Hexagon::V6_vgathermw_pseudo;
2950 break;
2951 case Intrinsic::hexagon_V6_vgathermhw:
2952 case Intrinsic::hexagon_V6_vgathermhw_128B:
2953 Opcode = Hexagon::V6_vgathermhw_pseudo;
2954 break;
2955 case Intrinsic::hexagon_V6_vgather_vscattermh:
2956 case Intrinsic::hexagon_V6_vgather_vscattermh_128B:
2957 Opcode = Hexagon::V6_vgather_vscatter_mh_pseudo;
2958 break;
2959 }
2960
2961 SDVTList VTs = CurDAG->getVTList(MVT::Other);
2962 SDValue Ops[] = { Address, ImmOperand, Base, Modifier, Offset, Chain };
2963 SDNode *Result = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
2964
2966 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
2967
2968 ReplaceNode(N, Result);
2969}
2970
2972 unsigned IID = N->getConstantOperandVal(0);
2973 SDNode *Result;
2974 switch (IID) {
2975 case Intrinsic::hexagon_V6_vaddcarry: {
2976 std::array<SDValue, 3> Ops = {
2977 {N->getOperand(1), N->getOperand(2), N->getOperand(3)}};
2978 SDVTList VTs = CurDAG->getVTList(MVT::v16i32, MVT::v64i1);
2979 Result = CurDAG->getMachineNode(Hexagon::V6_vaddcarry, SDLoc(N), VTs, Ops);
2980 break;
2981 }
2982 case Intrinsic::hexagon_V6_vaddcarry_128B: {
2983 std::array<SDValue, 3> Ops = {
2984 {N->getOperand(1), N->getOperand(2), N->getOperand(3)}};
2985 SDVTList VTs = CurDAG->getVTList(MVT::v32i32, MVT::v128i1);
2986 Result = CurDAG->getMachineNode(Hexagon::V6_vaddcarry, SDLoc(N), VTs, Ops);
2987 break;
2988 }
2989 case Intrinsic::hexagon_V6_vsubcarry: {
2990 std::array<SDValue, 3> Ops = {
2991 {N->getOperand(1), N->getOperand(2), N->getOperand(3)}};
2992 SDVTList VTs = CurDAG->getVTList(MVT::v16i32, MVT::v64i1);
2993 Result = CurDAG->getMachineNode(Hexagon::V6_vsubcarry, SDLoc(N), VTs, Ops);
2994 break;
2995 }
2996 case Intrinsic::hexagon_V6_vsubcarry_128B: {
2997 std::array<SDValue, 3> Ops = {
2998 {N->getOperand(1), N->getOperand(2), N->getOperand(3)}};
2999 SDVTList VTs = CurDAG->getVTList(MVT::v32i32, MVT::v128i1);
3000 Result = CurDAG->getMachineNode(Hexagon::V6_vsubcarry, SDLoc(N), VTs, Ops);
3001 break;
3002 }
3003 default:
3004 llvm_unreachable("Unexpected HVX dual output intrinsic.");
3005 }
3006 ReplaceUses(N, Result);
3007 ReplaceUses(SDValue(N, 0), SDValue(Result, 0));
3008 ReplaceUses(SDValue(N, 1), SDValue(Result, 1));
3009 CurDAG->RemoveDeadNode(N);
3010}
3011
3012// Check if the intrinsic corresponds to IEEE HVX instruction.
3014 return Opcode == Intrinsic::hexagon_V6_vabs_hf_128B ||
3015 Opcode == Intrinsic::hexagon_V6_vabs_sf_128B ||
3016 Opcode == Intrinsic::hexagon_V6_vsub_hf_hf_128B ||
3017 Opcode == Intrinsic::hexagon_V6_vadd_hf_hf_128B ||
3018 Opcode == Intrinsic::hexagon_V6_vadd_sf_hf_128B ||
3019 Opcode == Intrinsic::hexagon_V6_vsub_sf_hf_128B ||
3020 Opcode == Intrinsic::hexagon_V6_vadd_sf_sf_128B ||
3021 Opcode == Intrinsic::hexagon_V6_vsub_sf_sf_128B ||
3022 Opcode == Intrinsic::hexagon_V6_vassign_fp_128B ||
3023 Opcode == Intrinsic::hexagon_V6_vfmin_hf_128B ||
3024 Opcode == Intrinsic::hexagon_V6_vfmin_sf_128B ||
3025 Opcode == Intrinsic::hexagon_V6_vfmax_hf_128B ||
3026 Opcode == Intrinsic::hexagon_V6_vfmax_sf_128B ||
3027 Opcode == Intrinsic::hexagon_V6_vfneg_hf_128B ||
3028 Opcode == Intrinsic::hexagon_V6_vfneg_sf_128B ||
3029 Opcode == Intrinsic::hexagon_V6_vmpy_sf_hf_acc_128B ||
3030 Opcode == Intrinsic::hexagon_V6_vmpy_hf_hf_acc_128B ||
3031 Opcode == Intrinsic::hexagon_V6_vmpy_sf_hf_128B ||
3032 Opcode == Intrinsic::hexagon_V6_vmpy_hf_hf_128B ||
3033 Opcode == Intrinsic::hexagon_V6_vmpy_sf_sf_128B ||
3034 Opcode == Intrinsic::hexagon_V6_vcvt_sf_hf_128B ||
3035 Opcode == Intrinsic::hexagon_V6_vcvt_hf_h_128B;
3036}
3037
3038// Translate IEEE HVX intrinsics to QFloat instructions.
3040 unsigned &Opcode) {
3041 SDLoc DL(N);
3042 MVT ResTy = N->getValueType(0).getSimpleVT();
3043 switch (Opcode) {
3044 // v0.sf = vadd(v0.sf,v1.sf) is translated to
3045 // v2.qf32 = vadd(v0.sf,v1.sf); v0.sf = v2.qf32.
3046 case Intrinsic::hexagon_V6_vadd_sf_sf_128B: {
3047 SDNode *AddNode = CurDAG->getMachineNode(
3048 Hexagon::V6_vadd_sf, DL, ResTy, N->getOperand(1), N->getOperand(2));
3049 SDNode *ConvNode = CurDAG->getMachineNode(Hexagon::V6_vconv_sf_qf32, DL,
3050 ResTy, SDValue(AddNode, 0));
3051 ReplaceUses(SDValue(N, 0), SDValue(ConvNode, 0));
3052 CurDAG->RemoveDeadNode(N);
3053 return;
3054 }
3055 // v0.sf = vsub(v0.sf,v1.sf) is translated to
3056 // v2.qf32 = vsub(v0.sf,v1.sf); v0.sf = v2.qf32.
3057 case Intrinsic::hexagon_V6_vsub_sf_sf_128B: {
3058 SDNode *SubNode = CurDAG->getMachineNode(
3059 Hexagon::V6_vsub_sf, DL, ResTy, N->getOperand(1), N->getOperand(2));
3060 SDNode *ConvNode = CurDAG->getMachineNode(Hexagon::V6_vconv_sf_qf32, DL,
3061 ResTy, SDValue(SubNode, 0));
3062 ReplaceUses(SDValue(N, 0), SDValue(ConvNode, 0));
3063 CurDAG->RemoveDeadNode(N);
3064 return;
3065 }
3066 // v0.hf = vadd(v0.hf,v1.hf) is translated to
3067 // v2.qf16 = vadd(v0.hf,v1.hf); v0.hf = v2.qf16.
3068 case Intrinsic::hexagon_V6_vadd_hf_hf_128B: {
3069 SDNode *AddNode = CurDAG->getMachineNode(
3070 Hexagon::V6_vadd_hf, DL, ResTy, N->getOperand(1), N->getOperand(2));
3071 SDNode *ConvNode = CurDAG->getMachineNode(Hexagon::V6_vconv_hf_qf16, DL,
3072 ResTy, SDValue(AddNode, 0));
3073 ReplaceUses(SDValue(N, 0), SDValue(ConvNode, 0));
3074 CurDAG->RemoveDeadNode(N);
3075 return;
3076 }
3077 // v0.hf = vsub(v0.hf,v1.hf) is translated to
3078 // v2.qf16 = vsub(v0.hf,v1.hf); v0.hf = v2.qf16.
3079 case Intrinsic::hexagon_V6_vsub_hf_hf_128B: {
3080 SDNode *SubNode = CurDAG->getMachineNode(
3081 Hexagon::V6_vsub_hf, DL, ResTy, N->getOperand(1), N->getOperand(2));
3082 SDNode *ConvNode = CurDAG->getMachineNode(Hexagon::V6_vconv_hf_qf16, DL,
3083 ResTy, SDValue(SubNode, 0));
3084 ReplaceUses(SDValue(N, 0), SDValue(ConvNode, 0));
3085 CurDAG->RemoveDeadNode(N);
3086 return;
3087 }
3088 // v1:0.sf = vadd(v0.hf,v1.hf) is translated to
3089 // r2 = #15360; v2.h = vsplat(r2);
3090 // v5:4.qf32 = vmpy(v1.hf,v2.hf); v31:30.qf32 = vmpy(v0.hf,v2.hf)
3091 // v4.qf32 = vadd(v30.qf32,v4.qf32); v3.qf32 = vadd(v31.qf32,v5.qf32)
3092 // v0.sf = v4.qf32; v1.sf = v3.qf32
3093 // Widen the hf operands to sf by doing a widening multiply with 1.0f
3094 // and perform the add.
3095 case Intrinsic::hexagon_V6_vadd_sf_hf_128B: {
3096 SDValue Const = CurDAG->getTargetConstant(0x3C00U, DL, MVT::i32);
3097 SDNode *SplatPseudoNode =
3098 CurDAG->getMachineNode(Hexagon::PS_vsplatih, DL, ResTy, Const);
3099 SDNode *MpyNodeOp1 =
3100 CurDAG->getMachineNode(Hexagon::V6_vmpy_qf32_hf, DL, ResTy,
3101 N->getOperand(1), SDValue(SplatPseudoNode, 0));
3102 SDNode *MpyNodeOp2 =
3103 CurDAG->getMachineNode(Hexagon::V6_vmpy_qf32_hf, DL, ResTy,
3104 N->getOperand(2), SDValue(SplatPseudoNode, 0));
3105
3106 SDValue LoRegOp1 = CurDAG->getTargetExtractSubreg(
3107 Hexagon::vsub_lo, DL, MVT::v32i32, SDValue(MpyNodeOp1, 0));
3108 SDValue HiRegOp1 = CurDAG->getTargetExtractSubreg(
3109 Hexagon::vsub_hi, DL, MVT::v32i32, SDValue(MpyNodeOp1, 0));
3110 SDValue LoRegOp2 = CurDAG->getTargetExtractSubreg(
3111 Hexagon::vsub_lo, DL, MVT::v32i32, SDValue(MpyNodeOp2, 0));
3112 SDValue HiRegOp2 = CurDAG->getTargetExtractSubreg(
3113 Hexagon::vsub_hi, DL, MVT::v32i32, SDValue(MpyNodeOp2, 0));
3114
3115 SDNode *LoAddNode = CurDAG->getMachineNode(Hexagon::V6_vadd_qf32, DL,
3116 MVT::v32i32, LoRegOp1, LoRegOp2);
3117 SDNode *HiAddNode = CurDAG->getMachineNode(Hexagon::V6_vadd_qf32, DL,
3118 MVT::v32i32, HiRegOp1, HiRegOp2);
3119
3120 SDNode *ConvLoNode = CurDAG->getMachineNode(
3121 Hexagon::V6_vconv_sf_qf32, DL, MVT::v32i32, SDValue(LoAddNode, 0));
3122 SDNode *ConvHiNode = CurDAG->getMachineNode(
3123 Hexagon::V6_vconv_sf_qf32, DL, MVT::v32i32, SDValue(HiAddNode, 0));
3124
3125 SDValue RC =
3126 CurDAG->getTargetConstant(Hexagon::HvxWRRegClassID, DL, MVT::i32);
3127 SDValue SubRegL = CurDAG->getTargetConstant(Hexagon::vsub_lo, DL, MVT::i32);
3128 SDValue SubRegH = CurDAG->getTargetConstant(Hexagon::vsub_hi, DL, MVT::i32);
3129 const SDValue Ops[] = {RC, SDValue(ConvHiNode, 0), SubRegH,
3130 SDValue(ConvLoNode, 0), SubRegL};
3131 SDNode *RS = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
3132 MVT::v64i32, Ops);
3133
3134 ReplaceUses(SDValue(N, 0), SDValue(RS, 0));
3135 CurDAG->RemoveDeadNode(N);
3136 return;
3137 }
3138 // v1:0.sf = vsub(v0.hf,v1.hf) is translated to
3139 // r2 = #15360; v2.h = vsplat(r2);
3140 // v5:4.qf32 = vmpy(v1.hf,v2.hf); v31:30.qf32 = vmpy(v0.hf,v2.hf)
3141 // v4.qf32 = vsub(v30.qf32,v4.qf32); v3.qf32 = vsub(v31.qf32,v5.qf32)
3142 // v0.sf = v4.qf32; v1.sf = v3.qf32
3143 // Widen the hf operands to sf by doing a widening multiply with 1.0f
3144 // and perform the sub.
3145 case Intrinsic::hexagon_V6_vsub_sf_hf_128B: {
3146 SDValue Const = CurDAG->getTargetConstant(0x3C00U, DL, MVT::i32);
3147 SDNode *SplatPseudoNode =
3148 CurDAG->getMachineNode(Hexagon::PS_vsplatih, DL, ResTy, Const);
3149 SDNode *MpyNodeOp1 =
3150 CurDAG->getMachineNode(Hexagon::V6_vmpy_qf32_hf, DL, ResTy,
3151 N->getOperand(1), SDValue(SplatPseudoNode, 0));
3152 SDNode *MpyNodeOp2 =
3153 CurDAG->getMachineNode(Hexagon::V6_vmpy_qf32_hf, DL, ResTy,
3154 N->getOperand(2), SDValue(SplatPseudoNode, 0));
3155
3156 SDValue LoRegOp1 = CurDAG->getTargetExtractSubreg(
3157 Hexagon::vsub_lo, DL, MVT::v32i32, SDValue(MpyNodeOp1, 0));
3158 SDValue HiRegOp1 = CurDAG->getTargetExtractSubreg(
3159 Hexagon::vsub_hi, DL, MVT::v32i32, SDValue(MpyNodeOp1, 0));
3160 SDValue LoRegOp2 = CurDAG->getTargetExtractSubreg(
3161 Hexagon::vsub_lo, DL, MVT::v32i32, SDValue(MpyNodeOp2, 0));
3162 SDValue HiRegOp2 = CurDAG->getTargetExtractSubreg(
3163 Hexagon::vsub_hi, DL, MVT::v32i32, SDValue(MpyNodeOp2, 0));
3164
3165 SDNode *LoSubNode = CurDAG->getMachineNode(Hexagon::V6_vsub_qf32, DL,
3166 MVT::v32i32, LoRegOp1, LoRegOp2);
3167 SDNode *HiSubNode = CurDAG->getMachineNode(Hexagon::V6_vsub_qf32, DL,
3168 MVT::v32i32, HiRegOp1, HiRegOp2);
3169
3170 SDNode *ConvLoNode = CurDAG->getMachineNode(
3171 Hexagon::V6_vconv_sf_qf32, DL, MVT::v32i32, SDValue(LoSubNode, 0));
3172 SDNode *ConvHiNode = CurDAG->getMachineNode(
3173 Hexagon::V6_vconv_sf_qf32, DL, MVT::v32i32, SDValue(HiSubNode, 0));
3174
3175 SDValue RC =
3176 CurDAG->getTargetConstant(Hexagon::HvxWRRegClassID, DL, MVT::i32);
3177 SDValue SubRegL = CurDAG->getTargetConstant(Hexagon::vsub_lo, DL, MVT::i32);
3178 SDValue SubRegH = CurDAG->getTargetConstant(Hexagon::vsub_hi, DL, MVT::i32);
3179 const SDValue Ops[] = {RC, SDValue(ConvHiNode, 0), SubRegH,
3180 SDValue(ConvLoNode, 0), SubRegL};
3181 SDNode *RS = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
3182 MVT::v64i32, Ops);
3183
3184 ReplaceUses(SDValue(N, 0), SDValue(RS, 0));
3185 CurDAG->RemoveDeadNode(N);
3186 return;
3187 }
3188 // v0.w = vfmv(v1.w) is translated to v0 = v1.
3189 case Intrinsic::hexagon_V6_vassign_fp_128B: {
3190 SDNode *AssignNode = CurDAG->getMachineNode(Hexagon::V6_vassign, DL, ResTy,
3191 N->getOperand(1));
3192 ReplaceUses(SDValue(N, 0), SDValue(AssignNode, 0));
3193 CurDAG->RemoveDeadNode(N);
3194 return;
3195 }
3196 // v0.hf = vmpy(v0.hf,v1.hf) is translated to
3197 // v1:0.qf32 = vmpy(v0.hf,v1.hf); v0.hf = v1:0.qf32.
3198 case Intrinsic::hexagon_V6_vmpy_hf_hf_128B: {
3199 SDNode *MpyNode =
3200 CurDAG->getMachineNode(Hexagon::V6_vmpy_qf32_hf, DL, MVT::v64i32,
3201 N->getOperand(1), N->getOperand(2));
3202 SDNode *ConvNode = CurDAG->getMachineNode(Hexagon::V6_vconv_hf_qf32, DL,
3203 ResTy, SDValue(MpyNode, 0));
3204 ReplaceUses(SDValue(N, 0), SDValue(ConvNode, 0));
3205 CurDAG->RemoveDeadNode(N);
3206 return;
3207 }
3208 // v0.sf = vmpy(v0.sf,v1.sf) is translated to
3209 // v2.qf32 = vmpy(v0.sf,v1.sf); v0.sf = v2.qf32.
3210 case Intrinsic::hexagon_V6_vmpy_sf_sf_128B: {
3211 SDNode *MpyNode =
3212 CurDAG->getMachineNode(Hexagon::V6_vmpy_qf32_sf, DL, ResTy,
3213 N->getOperand(1), N->getOperand(2));
3214 SDNode *ConvNode = CurDAG->getMachineNode(Hexagon::V6_vconv_sf_qf32, DL,
3215 ResTy, SDValue(MpyNode, 0));
3216 ReplaceUses(SDValue(N, 0), SDValue(ConvNode, 0));
3217 CurDAG->RemoveDeadNode(N);
3218 return;
3219 }
3220 // v1:0.sf = vmpy(v0.hf,v1.hf) is translated to
3221 // v3:2.qf32 = vmpy(v0.hf,v1.hf); v0.sf = v2.qf32 ; v1.sf = v3.qf32.
3222 case Intrinsic::hexagon_V6_vmpy_sf_hf_128B: {
3223 SDNode *MpyNode =
3224 CurDAG->getMachineNode(Hexagon::V6_vmpy_qf32_hf, DL, ResTy,
3225 N->getOperand(1), N->getOperand(2));
3226 SDValue LoReg = CurDAG->getTargetExtractSubreg(
3227 Hexagon::vsub_lo, DL, MVT::v32i32, SDValue(MpyNode, 0));
3228 SDValue HiReg = CurDAG->getTargetExtractSubreg(
3229 Hexagon::vsub_hi, DL, MVT::v32i32, SDValue(MpyNode, 0));
3230 SDNode *ConvLoNode = CurDAG->getMachineNode(Hexagon::V6_vconv_sf_qf32, DL,
3231 MVT::v32i32, LoReg);
3232 SDNode *ConvHiNode = CurDAG->getMachineNode(Hexagon::V6_vconv_sf_qf32, DL,
3233 MVT::v32i32, HiReg);
3234
3235 SDValue RC =
3236 CurDAG->getTargetConstant(Hexagon::HvxWRRegClassID, DL, MVT::i32);
3237 SDValue SubRegL = CurDAG->getTargetConstant(Hexagon::vsub_lo, DL, MVT::i32);
3238 SDValue SubRegH = CurDAG->getTargetConstant(Hexagon::vsub_hi, DL, MVT::i32);
3239 const SDValue Ops[] = {RC, SDValue(ConvHiNode, 0), SubRegH,
3240 SDValue(ConvLoNode, 0), SubRegL};
3241 SDNode *RS = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
3242 MVT::v64i32, Ops);
3243
3244 ReplaceUses(SDValue(N, 0), SDValue(RS, 0));
3245 CurDAG->RemoveDeadNode(N);
3246 return;
3247 }
3248 // v0.hf += vmpy(v1.hf,v2.hf) is translated to
3249 // v7:6.qf32 = vmpy(v1.hf, v2.hf) // widening multiply
3250 // V5:4.qf32 = vmpy(v0.hf,1.0) // Convert accum to qf32
3251 // V4.qf32 = vadd(V6.qf32, V4.qf32) // accumulation
3252 // V5.qf32 = vadd(V7.qf32, V5.qf32) // accumulation
3253 // V4.hf = V5:4.qf32
3254 case Intrinsic::hexagon_V6_vmpy_hf_hf_acc_128B: {
3255 SDNode *MpyNode =
3256 CurDAG->getMachineNode(Hexagon::V6_vmpy_qf32_hf, DL, MVT::v64i32,
3257 N->getOperand(2), N->getOperand(3));
3258
3259 SDValue Const = CurDAG->getTargetConstant(0x3C00U, DL, MVT::i32);
3260 SDNode *SplatConstNode =
3261 CurDAG->getMachineNode(Hexagon::PS_vsplatih, DL, MVT::v64i32, Const);
3262 SDNode *WidenAcc =
3263 CurDAG->getMachineNode(Hexagon::V6_vmpy_qf32_hf, DL, MVT::v64i32,
3264 N->getOperand(1), SDValue(SplatConstNode, 0));
3265
3266 SDValue LoMpyNode = CurDAG->getTargetExtractSubreg(
3267 Hexagon::vsub_lo, DL, MVT::v32i32, SDValue(MpyNode, 0));
3268 SDValue HiMpyNode = CurDAG->getTargetExtractSubreg(
3269 Hexagon::vsub_hi, DL, MVT::v32i32, SDValue(MpyNode, 0));
3270 SDValue LoWidenAcc = CurDAG->getTargetExtractSubreg(
3271 Hexagon::vsub_lo, DL, MVT::v32i32, SDValue(WidenAcc, 0));
3272 SDValue HiWidenAcc = CurDAG->getTargetExtractSubreg(
3273 Hexagon::vsub_hi, DL, MVT::v32i32, SDValue(WidenAcc, 0));
3274
3275 SDNode *LoAddNode = CurDAG->getMachineNode(
3276 Hexagon::V6_vadd_qf32, DL, MVT::v32i32, LoWidenAcc, LoMpyNode);
3277 SDNode *HiAddNode = CurDAG->getMachineNode(
3278 Hexagon::V6_vadd_qf32, DL, MVT::v32i32, HiWidenAcc, HiMpyNode);
3279
3280 SDValue RC =
3281 CurDAG->getTargetConstant(Hexagon::HvxWRRegClassID, DL, MVT::i32);
3282 SDValue SubRegL = CurDAG->getTargetConstant(Hexagon::vsub_lo, DL, MVT::i32);
3283 SDValue SubRegH = CurDAG->getTargetConstant(Hexagon::vsub_hi, DL, MVT::i32);
3284 const SDValue Ops[] = {RC, SDValue(HiAddNode, 0), SubRegH,
3285 SDValue(LoAddNode, 0), SubRegL};
3286 SDNode *RS = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
3287 MVT::v64i32, Ops);
3288
3289 SDNode *ConvNode = CurDAG->getMachineNode(Hexagon::V6_vconv_hf_qf32, DL,
3290 ResTy, SDValue(RS, 0));
3291 ReplaceUses(SDValue(N, 0), SDValue(ConvNode, 0));
3292 CurDAG->RemoveDeadNode(N);
3293 return;
3294 }
3295 // v1:0.sf += vmpy(v2.hf,v3.hf) is translated to
3296 // v3:2.qf32 = vmpy(v2.hf,v3.hf);
3297 // v0.qf32 = vadd(v2.qf32,v0.sf); v1.qf32 = vadd(v3.qf32,v1.sf);
3298 // v0.sf = v0.qf32; v1.sf = v1.qf32
3299 case Intrinsic::hexagon_V6_vmpy_sf_hf_acc_128B: {
3300 SDNode *MpyNode =
3301 CurDAG->getMachineNode(Hexagon::V6_vmpy_qf32_hf, DL, ResTy,
3302 N->getOperand(2), N->getOperand(3));
3303
3304 SDValue LoRegMpy = CurDAG->getTargetExtractSubreg(
3305 Hexagon::vsub_lo, DL, MVT::v32i32, SDValue(MpyNode, 0));
3306 SDValue HiRegMpy = CurDAG->getTargetExtractSubreg(
3307 Hexagon::vsub_hi, DL, MVT::v32i32, SDValue(MpyNode, 0));
3308
3309 SDValue LoRegDest = CurDAG->getTargetExtractSubreg(
3310 Hexagon::vsub_lo, DL, MVT::v32i32, N->getOperand(1));
3311 SDValue HiRegDest = CurDAG->getTargetExtractSubreg(
3312 Hexagon::vsub_hi, DL, MVT::v32i32, N->getOperand(1));
3313
3314 SDNode *LoAddNode = CurDAG->getMachineNode(
3315 Hexagon::V6_vadd_qf32_mix, DL, MVT::v32i32, LoRegMpy, LoRegDest);
3316 SDNode *HiAddNode = CurDAG->getMachineNode(
3317 Hexagon::V6_vadd_qf32_mix, DL, MVT::v32i32, HiRegMpy, HiRegDest);
3318
3319 SDNode *ConvLoNode = CurDAG->getMachineNode(
3320 Hexagon::V6_vconv_sf_qf32, DL, MVT::v32i32, SDValue(LoAddNode, 0));
3321 SDNode *ConvHiNode = CurDAG->getMachineNode(
3322 Hexagon::V6_vconv_sf_qf32, DL, MVT::v32i32, SDValue(HiAddNode, 0));
3323
3324 SDValue RC =
3325 CurDAG->getTargetConstant(Hexagon::HvxWRRegClassID, DL, MVT::i32);
3326 SDValue SubRegL = CurDAG->getTargetConstant(Hexagon::vsub_lo, DL, MVT::i32);
3327 SDValue SubRegH = CurDAG->getTargetConstant(Hexagon::vsub_hi, DL, MVT::i32);
3328 const SDValue Ops[] = {RC, SDValue(ConvHiNode, 0), SubRegH,
3329 SDValue(ConvLoNode, 0), SubRegL};
3330 SDNode *RS = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
3331 MVT::v64i32, Ops);
3332
3333 ReplaceUses(SDValue(N, 0), SDValue(RS, 0));
3334 CurDAG->RemoveDeadNode(N);
3335 return;
3336 }
3337 // v0.hf = vfmin(v0.hf,v1.hf) is translated to v0.hf = vmin(v0.hf,v1.hf).
3338 case Intrinsic::hexagon_V6_vfmin_hf_128B: {
3339 SDNode *MinNode = CurDAG->getMachineNode(
3340 Hexagon::V6_vmin_hf, DL, ResTy, N->getOperand(1), N->getOperand(2));
3341 ReplaceUses(SDValue(N, 0), SDValue(MinNode, 0));
3342 CurDAG->RemoveDeadNode(N);
3343 return;
3344 }
3345 // v0.sf = vfmin(v0.sf,v1.sf) is translated to v0.sf = vmin(v0.sf,v1.sf).
3346 case Intrinsic::hexagon_V6_vfmin_sf_128B: {
3347 SDNode *MinNode = CurDAG->getMachineNode(
3348 Hexagon::V6_vmin_sf, DL, ResTy, N->getOperand(1), N->getOperand(2));
3349 ReplaceUses(SDValue(N, 0), SDValue(MinNode, 0));
3350 CurDAG->RemoveDeadNode(N);
3351 return;
3352 }
3353 // v0.hf = vfmax(v0.hf,v1.hf) is translated to v0.hf = vmax(v0.hf,v1.hf).
3354 case Intrinsic::hexagon_V6_vfmax_hf_128B: {
3355 SDNode *MaxNode = CurDAG->getMachineNode(
3356 Hexagon::V6_vmax_hf, DL, ResTy, N->getOperand(1), N->getOperand(2));
3357 ReplaceUses(SDValue(N, 0), SDValue(MaxNode, 0));
3358 CurDAG->RemoveDeadNode(N);
3359 return;
3360 }
3361 // v0.sf = vfmax(v0.sf,v1.sf) is translated to v0.sf = vmax(v0.sf,v1.sf).
3362 case Intrinsic::hexagon_V6_vfmax_sf_128B: {
3363 SDNode *MaxNode = CurDAG->getMachineNode(
3364 Hexagon::V6_vmax_sf, DL, ResTy, N->getOperand(1), N->getOperand(2));
3365 ReplaceUses(SDValue(N, 0), SDValue(MaxNode, 0));
3366 CurDAG->RemoveDeadNode(N);
3367 return;
3368 }
3369 // v0.hf = vabs(v0.hf) is translated to
3370 // r2 = #32767 ; v1.h = vsplat(r2) ; v0 = vand(v0,v1)
3371 // Reset the sign bit by splatting 0x7FFF and does a vector and.
3372 case Intrinsic::hexagon_V6_vabs_hf_128B: {
3373 SDValue Const = CurDAG->getTargetConstant(0x7FFFU, DL, MVT::i32);
3374 SDNode *SplatPseudoNode =
3375 CurDAG->getMachineNode(Hexagon::PS_vsplatih, DL, ResTy, Const);
3376 SDNode *AndNode =
3377 CurDAG->getMachineNode(Hexagon::V6_vand, DL, ResTy, N->getOperand(1),
3378 SDValue(SplatPseudoNode, 0));
3379 ReplaceUses(SDValue(N, 0), SDValue(AndNode, 0));
3380 CurDAG->RemoveDeadNode(N);
3381 return;
3382 }
3383 // v0.sf = vabs(v0.sf) is translated to
3384 // r2 = ##2147483647 ; v1 = vsplat(r2) ; v0 = vand(v0,v1)
3385 // Reset the sign bit by splatting 0x7FFF FFFF and does a vector and.
3386 case Intrinsic::hexagon_V6_vabs_sf_128B: {
3387 SDValue Const = CurDAG->getTargetConstant(0x7FFFFFFFU, DL, MVT::i32);
3388 SDNode *SplatPseudoNode =
3389 CurDAG->getMachineNode(Hexagon::PS_vsplatiw, DL, ResTy, Const);
3390 SDNode *AndNode =
3391 CurDAG->getMachineNode(Hexagon::V6_vand, DL, ResTy, N->getOperand(1),
3392 SDValue(SplatPseudoNode, 0));
3393 ReplaceUses(SDValue(N, 0), SDValue(AndNode, 0));
3394 CurDAG->RemoveDeadNode(N);
3395 return;
3396 }
3397 // v0.hf = vfneg(v0.hf) is translated to
3398 // r2 = #32768 ; v1.h = vsplat(r2) ; v0 = vxor(v0,v1)
3399 // Flip the sign bit by splatting 0x8000 and does a vector xor.
3400 case Intrinsic::hexagon_V6_vfneg_hf_128B: {
3401 SDValue Const = CurDAG->getTargetConstant(0x8000U, DL, MVT::i32);
3402 SDNode *SplatPseudoNode =
3403 CurDAG->getMachineNode(Hexagon::PS_vsplatih, DL, ResTy, Const);
3404 SDNode *XorNode =
3405 CurDAG->getMachineNode(Hexagon::V6_vxor, DL, ResTy, N->getOperand(1),
3406 SDValue(SplatPseudoNode, 0));
3407 ReplaceUses(SDValue(N, 0), SDValue(XorNode, 0));
3408 CurDAG->RemoveDeadNode(N);
3409 return;
3410 }
3411 // v0.sf = vfneg(v0.sf) is translated to
3412 // r2 = ##-2147483648 ; v1 = vsplat(r2) ; v0 = vxor(v0,v1)
3413 // Flip the sign bit by splatting 0x8000 0000 and does a vector xor.
3414 case Intrinsic::hexagon_V6_vfneg_sf_128B: {
3415 SDValue Const = CurDAG->getTargetConstant(0x80000000U, DL, MVT::i32);
3416 SDNode *SplatPseudoNode =
3417 CurDAG->getMachineNode(Hexagon::PS_vsplatiw, DL, ResTy, Const);
3418 SDNode *XorNode =
3419 CurDAG->getMachineNode(Hexagon::V6_vxor, DL, ResTy, N->getOperand(1),
3420 SDValue(SplatPseudoNode, 0));
3421 ReplaceUses(SDValue(N, 0), SDValue(XorNode, 0));
3422 CurDAG->RemoveDeadNode(N);
3423 return;
3424 }
3425 // v0.h = vcvt(v0.hf) is translated to v0.hf = v0.h.
3426 case Intrinsic::hexagon_V6_vcvt_hf_h_128B: {
3427 SDNode *ConvNode = CurDAG->getMachineNode(Hexagon::V6_vconv_hf_h, DL, ResTy,
3428 N->getOperand(1));
3429 ReplaceUses(SDValue(N, 0), SDValue(ConvNode, 0));
3430 CurDAG->RemoveDeadNode(N);
3431 return;
3432 }
3433 // v1:0.sf = vcvt(v0.hf) is translated to
3434 // r2 = #15360; v1.h = vsplat(r2); v3:2.qf32 = vmpy(v0.hf,v1.hf);
3435 // v0.sf = v2.qf32; v1.sf = v3.qf32
3436 // Do a widening multiply with 1.0f to convert the hf to sf.
3437 case Intrinsic::hexagon_V6_vcvt_sf_hf_128B: {
3438 SDValue Const = CurDAG->getTargetConstant(0x3C00U, DL, MVT::i32);
3439 SDNode *SplatPseudoNode =
3440 CurDAG->getMachineNode(Hexagon::PS_vsplatih, DL, ResTy, Const);
3441 SDNode *MpyNode =
3442 CurDAG->getMachineNode(Hexagon::V6_vmpy_qf32_hf, DL, ResTy,
3443 N->getOperand(1), SDValue(SplatPseudoNode, 0));
3444
3445 SDValue LoReg = CurDAG->getTargetExtractSubreg(
3446 Hexagon::vsub_lo, DL, MVT::v32i32, SDValue(MpyNode, 0));
3447 SDValue HiReg = CurDAG->getTargetExtractSubreg(
3448 Hexagon::vsub_hi, DL, MVT::v32i32, SDValue(MpyNode, 0));
3449 SDNode *ConvLoNode = CurDAG->getMachineNode(Hexagon::V6_vconv_sf_qf32, DL,
3450 MVT::v32i32, LoReg);
3451 SDNode *ConvHiNode = CurDAG->getMachineNode(Hexagon::V6_vconv_sf_qf32, DL,
3452 MVT::v32i32, HiReg);
3453
3454 SDValue RC =
3455 CurDAG->getTargetConstant(Hexagon::HvxWRRegClassID, DL, MVT::i32);
3456 SDValue SubRegL = CurDAG->getTargetConstant(Hexagon::vsub_lo, DL, MVT::i32);
3457 SDValue SubRegH = CurDAG->getTargetConstant(Hexagon::vsub_hi, DL, MVT::i32);
3458 const SDValue Ops[] = {RC, SDValue(ConvHiNode, 0), SubRegH,
3459 SDValue(ConvLoNode, 0), SubRegL};
3460 SDNode *RS = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL,
3461 MVT::v64i32, Ops);
3462
3463 ReplaceUses(SDValue(N, 0), SDValue(RS, 0));
3464 CurDAG->RemoveDeadNode(N);
3465 return;
3466 }
3467
3468 default:
3469 llvm_unreachable("Unexpected HVX IEEE intrinsic: no QFloat translation");
3470 return;
3471 }
3472
3473 return;
3474}
return SDValue()
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
static msgpack::DocNode getNode(msgpack::DocNode DN, msgpack::Type Type, MCValue Val)
constexpr LLT S1
ReachingDefInfo InstSet InstSet & Ignore
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
static void fail(const SDLoc &DL, SelectionDAG &DAG, const Twine &Msg, SDValue Val={})
#define X(NUM, ENUM, NAME)
Definition ELF.h:853
This file implements the BitVector class.
static constexpr unsigned long long mask(BlockVerifier::State S)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition Compiler.h:663
const HexagonInstrInfo * TII
static bool isIdentity(ArrayRef< int > Mask)
static std::pair< int, unsigned > findStrip(ArrayRef< int > A, int Inc, unsigned MaxLen)
static const HexagonSubtarget & getHexagonSubtarget(SelectionDAG &G)
static void splitMask(ArrayRef< int > Mask, MutableArrayRef< int > MaskL, MutableArrayRef< int > MaskR)
static void packSegmentMask(ArrayRef< int > Mask, ArrayRef< unsigned > OutSegMap, unsigned SegLen, MutableArrayRef< int > PackedMask)
static SmallVector< unsigned, 4 > getInputSegmentList(ShuffleMask SM, unsigned SegLen)
static const HexagonTargetLowering & getHexagonLowering(SelectionDAG &G)
static SmallVector< unsigned, 4 > getOutputSegmentMap(ShuffleMask SM, unsigned SegLen)
static bool isLowHalfOnly(ArrayRef< int > Mask)
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static LVOptions Options
Definition LVOptions.cpp:25
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define G(x, y, z)
Definition MD5.cpp:55
#define H(x, y, z)
Definition MD5.cpp:56
print mir2vec MIR2Vec Vocabulary Printer Pass
Definition MIR2Vec.cpp:598
std::pair< MCSymbol *, MachineModuleInfoImpl::StubValueTy > PairTy
static bool isUndef(const MachineInstr &MI)
#define T
nvptx lower args
static constexpr unsigned SMA(unsigned Version)
static constexpr unsigned SM(unsigned Version)
#define P(N)
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
static Type * getValueType(Value *V, bool LookThroughCmp=false)
Returns the "element type" of the given value/instruction V.
BaseType
A given derived pointer can have multiple base pointers through phi/selects.
static LLVM_ATTRIBUTE_ALWAYS_INLINE MVT::SimpleValueType getSimpleVT(const uint8_t *MatcherTable, size_t &MatcherIndex)
getSimpleVT - Decode a value in MatcherTable, if it's a VBR encoded value, use GetVBR to decode it.
This file implements a set that has insertion order iteration characteristics.
Stack Slot Coloring
#define DEBUG_WITH_TYPE(TYPE,...)
DEBUG_WITH_TYPE macro - This macro should be used by passes to emit debug information.
Definition Debug.h:72
xray Insert XRay ops
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
ArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
Definition ArrayRef.h:218
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
Definition ArrayRef.h:194
size_t size() const
Get the array size.
Definition ArrayRef.h:141
ArrayRef< T > take_back(size_t N=1) const
Return a copy of *this with only the last N elements.
Definition ArrayRef.h:225
const T * data() const
Definition ArrayRef.h:138
BitVector & set()
Set all bits in the bitvector.
Definition BitVector.h:366
iterator_range< const_set_bits_iterator > set_bits() const
Definition BitVector.h:159
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:225
iterator end()
Definition DenseMap.h:143
Tagged union holding either a T or a Error.
Definition Error.h:485
bool empty() const
Definition Function.h:859
void print(raw_ostream &OS, AssemblyAnnotationWriter *AAW=nullptr, bool ShouldPreserveUseListOrder=false, bool IsForDebug=false) const
Print the function to an output stream with an optional AssemblyAnnotationWriter.
size_t size() const
Definition Function.h:858
void translateIEEEIntrinsicToQFloat(SDNode *N, unsigned &Opcode)
Machine Value Type.
SimpleValueType SimpleTy
unsigned getVectorNumElements() const
static MVT getVectorVT(MVT VT, unsigned NumElements)
MVT getVectorElementType() const
A description of a memory reference used in the backend.
Represent a mutable reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:294
iterator begin() const
Definition ArrayRef.h:338
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
bool insert(SUnit *SU)
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
LLVM_ABI void dump() const
Dump this node, for debugging.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
LLVM_ABI void dumpr() const
Dump (recursively) this node and its use-def subgraph.
const SDValue & getOperand(unsigned Num) const
LLVM_ABI void print(raw_ostream &OS, const SelectionDAG *G=nullptr) const
iterator_range< user_iterator > users()
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
unsigned getOpcode() const
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
size_type size() const
Determine the number of elements in the SetVector.
Definition SetVector.h:103
void insert_range(Range &&R)
Definition SetVector.h:176
size_type count(const_arg_type key) const
Count the number of elements of a given key in the SetVector.
Definition SetVector.h:262
Vector takeVector()
Clear the SetVector and return the underlying vector.
Definition SetVector.h:94
bool empty() const
Determine if the SetVector is empty or not.
Definition SetVector.h:100
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition SetVector.h:151
int getMaskElt(unsigned Idx) const
static void commuteMask(MutableArrayRef< int > Mask)
Change values in a shuffle permute mask assuming the two vector operands have swapped position.
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void resize(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:584
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition ISDOpcodes.h:649
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition ISDOpcodes.h:614
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ Switch
The "resume-switch" lowering, where there are separate resume and destroy functions that are shared b...
Definition CoroShape.h:31
DXILDebugInfoMap run(Module &M)
constexpr double e
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
bool empty() const
Definition BasicBlock.h:101
iterator end() const
Definition BasicBlock.h:89
LLVM_ABI iterator begin() const
This is an optimization pass for GlobalISel generic memory operations.
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
@ Offset
Definition DWP.cpp:558
@ Length
Definition DWP.cpp:558
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1738
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1668
@ Undef
Value of the register doesn't matter.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
@ Done
Definition Threading.h:60
SmallVectorImpl< T >::const_pointer c_str(SmallVectorImpl< T > &str)
constexpr NextUseDistance min(NextUseDistance A, NextUseDistance B)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2207
constexpr int popcount(T Value) noexcept
Count the number of set bits in a value.
Definition bit.h:156
void erase(Container &C, ValueType V)
Wrapper function to remove a value from a container:
Definition STLExtras.h:2199
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:331
auto reverse(ContainerTy &&C)
Definition STLExtras.h:407
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1635
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition Format.h:129
MutableArrayRef(T &OneElt) -> MutableArrayRef< T >
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:221
void replace(R &&Range, const T &OldValue, const T &NewValue)
Provide wrappers to std::replace which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1909
@ Sub
Subtraction of integers.
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
ArrayRef(const T &OneElt) -> ArrayRef< T >
OutputIt copy(R &&Range, OutputIt Out)
Definition STLExtras.h:1884
constexpr int32_t SignExtend32(uint32_t X)
Sign-extend the number in the bottom B bits of X to a 32-bit integer.
Definition MathExtras.h:554
auto count_if(R &&Range, UnaryPredicate P)
Wrapper function around std::count_if to count the number of times an element satisfying a given pred...
Definition STLExtras.h:2018
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1946
MaskT vshuffvdd(ArrayRef< int > Vu, ArrayRef< int > Vv, unsigned Rt)
MaskT vpack(ArrayRef< int > Vu, ArrayRef< int > Vv, unsigned Size, bool TakeOdd)
ArrayRef< int > hi(ArrayRef< int > Vuu)
auto mask(ShuffFunc S, unsigned Length, OptArgs... args) -> MaskT
MaskT vshuff(ArrayRef< int > Vu, ArrayRef< int > Vv, unsigned Size, bool TakeOdd)
MaskT vdealb4w(ArrayRef< int > Vu, ArrayRef< int > Vv)
SmallVector< int, 128 > MaskT
MaskT vdeal(ArrayRef< int > Vu, ArrayRef< int > Vv, unsigned Size, bool TakeOdd)
ArrayRef< int > lo(ArrayRef< int > Vuu)
MaskT vdealvdd(ArrayRef< int > Vu, ArrayRef< int > Vv, unsigned Rt)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:862
#define N
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:339
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:351
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:359
HvxSelector(HexagonDAGToDAGISel &HS, SelectionDAG &G)
MVT getSingleVT(MVT ElemTy) const
static SmallVector< uint32_t, 8 > completeToPerfect(ArrayRef< uint32_t > Completions, unsigned Width)
HexagonDAGToDAGISel & ISel
const HexagonTargetLowering & Lower
void selectExtractSubvector(SDNode *N)
static SmallVector< uint32_t, 8 > getPerfectCompletions(ShuffleMask SM, unsigned Width)
MVT getPairVT(MVT ElemTy) const
const HexagonSubtarget & HST
static std::optional< int > rotationDistance(ShuffleMask SM, unsigned WrapAt)
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
std::size_t operator()(SDValue V) const