LLVM  16.0.0git
HexagonISelDAGToDAGHVX.cpp
Go to the documentation of this file.
1 //===-- HexagonISelDAGToDAGHVX.cpp ----------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "Hexagon.h"
10 #include "HexagonISelDAGToDAG.h"
11 #include "HexagonISelLowering.h"
12 #include "HexagonTargetMachine.h"
13 #include "llvm/ADT/SetVector.h"
16 #include "llvm/IR/Intrinsics.h"
17 #include "llvm/IR/IntrinsicsHexagon.h"
19 #include "llvm/Support/Debug.h"
20 
21 #include <deque>
22 #include <map>
23 #include <set>
24 #include <utility>
25 #include <vector>
26 
27 #define DEBUG_TYPE "hexagon-isel"
28 
29 using namespace llvm;
30 
31 namespace {
32 
33 // --------------------------------------------------------------------
34 // Implementation of permutation networks.
35 
36 // Implementation of the node routing through butterfly networks:
37 // - Forward delta.
38 // - Reverse delta.
39 // - Benes.
40 //
41 //
42 // Forward delta network consists of log(N) steps, where N is the number
43 // of inputs. In each step, an input can stay in place, or it can get
44 // routed to another position[1]. The step after that consists of two
45 // networks, each half in size in terms of the number of nodes. In those
46 // terms, in the given step, an input can go to either the upper or the
47 // lower network in the next step.
48 //
49 // [1] Hexagon's vdelta/vrdelta allow an element to be routed to both
50 // positions as long as there is no conflict.
51 
52 // Here's a delta network for 8 inputs, only the switching routes are
53 // shown:
54 //
55 // Steps:
56 // |- 1 ---------------|- 2 -----|- 3 -|
57 //
58 // Inp[0] *** *** *** *** Out[0]
59 // \ / \ / \ /
60 // \ / \ / X
61 // \ / \ / / \
62 // Inp[1] *** \ / *** X *** *** Out[1]
63 // \ \ / / \ / \ /
64 // \ \ / / X X
65 // \ \ / / / \ / \
66 // Inp[2] *** \ \ / / *** X *** *** Out[2]
67 // \ \ X / / / \ \ /
68 // \ \ / \ / / / \ X
69 // \ X X / / \ / \
70 // Inp[3] *** \ / \ / \ / *** *** *** Out[3]
71 // \ X X X /
72 // \ / \ / \ / \ /
73 // X X X X
74 // / \ / \ / \ / \
75 // / X X X \
76 // Inp[4] *** / \ / \ / \ *** *** *** Out[4]
77 // / X X \ \ / \ /
78 // / / \ / \ \ \ / X
79 // / / X \ \ \ / / \
80 // Inp[5] *** / / \ \ *** X *** *** Out[5]
81 // / / \ \ \ / \ /
82 // / / \ \ X X
83 // / / \ \ / \ / \
84 // Inp[6] *** / \ *** X *** *** Out[6]
85 // / \ / \ \ /
86 // / \ / \ X
87 // / \ / \ / \
88 // Inp[7] *** *** *** *** Out[7]
89 //
90 //
91 // Reverse delta network is same as delta network, with the steps in
92 // the opposite order.
93 //
94 //
95 // Benes network is a forward delta network immediately followed by
96 // a reverse delta network.
97 
98 enum class ColorKind { None, Red, Black };
99 
100 // Graph coloring utility used to partition nodes into two groups:
101 // they will correspond to nodes routed to the upper and lower networks.
102 struct Coloring {
103  using Node = int;
104  using MapType = std::map<Node, ColorKind>;
105  static constexpr Node Ignore = Node(-1);
106 
107  Coloring(ArrayRef<Node> Ord) : Order(Ord) {
108  build();
109  if (!color())
110  Colors.clear();
111  }
112 
113  const MapType &colors() const {
114  return Colors;
115  }
116 
117  ColorKind other(ColorKind Color) {
118  if (Color == ColorKind::None)
119  return ColorKind::Red;
120  return Color == ColorKind::Red ? ColorKind::Black : ColorKind::Red;
121  }
122 
123  LLVM_DUMP_METHOD void dump() const;
124 
125 private:
126  ArrayRef<Node> Order;
127  MapType Colors;
128  std::set<Node> Needed;
129 
130  using NodeSet = std::set<Node>;
131  std::map<Node,NodeSet> Edges;
132 
133  Node conj(Node Pos) {
134  Node Num = Order.size();
135  return (Pos < Num/2) ? Pos + Num/2 : Pos - Num/2;
136  }
137 
138  ColorKind getColor(Node N) {
139  auto F = Colors.find(N);
140  return F != Colors.end() ? F->second : ColorKind::None;
141  }
142 
143  std::pair<bool, ColorKind> getUniqueColor(const NodeSet &Nodes);
144 
145  void build();
146  bool color();
147 };
148 } // namespace
149 
150 std::pair<bool, ColorKind> Coloring::getUniqueColor(const NodeSet &Nodes) {
151  auto Color = ColorKind::None;
152  for (Node N : Nodes) {
153  ColorKind ColorN = getColor(N);
154  if (ColorN == ColorKind::None)
155  continue;
156  if (Color == ColorKind::None)
157  Color = ColorN;
158  else if (Color != ColorKind::None && Color != ColorN)
159  return { false, ColorKind::None };
160  }
161  return { true, Color };
162 }
163 
164 void Coloring::build() {
165  // Add Order[P] and Order[conj(P)] to Edges.
166  for (unsigned P = 0; P != Order.size(); ++P) {
167  Node I = Order[P];
168  if (I != Ignore) {
169  Needed.insert(I);
170  Node PC = Order[conj(P)];
171  if (PC != Ignore && PC != I)
172  Edges[I].insert(PC);
173  }
174  }
175  // Add I and conj(I) to Edges.
176  for (unsigned I = 0; I != Order.size(); ++I) {
177  if (!Needed.count(I))
178  continue;
179  Node C = conj(I);
180  // This will create an entry in the edge table, even if I is not
181  // connected to any other node. This is necessary, because it still
182  // needs to be colored.
183  NodeSet &Is = Edges[I];
184  if (Needed.count(C))
185  Is.insert(C);
186  }
187 }
188 
189 bool Coloring::color() {
190  SetVector<Node> FirstQ;
191  auto Enqueue = [this,&FirstQ] (Node N) {
192  SetVector<Node> Q;
193  Q.insert(N);
194  for (unsigned I = 0; I != Q.size(); ++I) {
195  NodeSet &Ns = Edges[Q[I]];
196  Q.insert(Ns.begin(), Ns.end());
197  }
198  FirstQ.insert(Q.begin(), Q.end());
199  };
200  for (Node N : Needed)
201  Enqueue(N);
202 
203  for (Node N : FirstQ) {
204  if (Colors.count(N))
205  continue;
206  NodeSet &Ns = Edges[N];
207  auto P = getUniqueColor(Ns);
208  if (!P.first)
209  return false;
210  Colors[N] = other(P.second);
211  }
212 
213  // First, color nodes that don't have any dups.
214  for (auto E : Edges) {
215  Node N = E.first;
216  if (!Needed.count(conj(N)) || Colors.count(N))
217  continue;
218  auto P = getUniqueColor(E.second);
219  if (!P.first)
220  return false;
221  Colors[N] = other(P.second);
222  }
223 
224  // Now, nodes that are still uncolored. Since the graph can be modified
225  // in this step, create a work queue.
226  std::vector<Node> WorkQ;
227  for (auto E : Edges) {
228  Node N = E.first;
229  if (!Colors.count(N))
230  WorkQ.push_back(N);
231  }
232 
233  for (Node N : WorkQ) {
234  NodeSet &Ns = Edges[N];
235  auto P = getUniqueColor(Ns);
236  if (P.first) {
237  Colors[N] = other(P.second);
238  continue;
239  }
240 
241  // Coloring failed. Split this node.
242  Node C = conj(N);
243  ColorKind ColorN = other(ColorKind::None);
244  ColorKind ColorC = other(ColorN);
245  NodeSet &Cs = Edges[C];
246  NodeSet CopyNs = Ns;
247  for (Node M : CopyNs) {
248  ColorKind ColorM = getColor(M);
249  if (ColorM == ColorC) {
250  // Connect M with C, disconnect M from N.
251  Cs.insert(M);
252  Edges[M].insert(C);
253  Ns.erase(M);
254  Edges[M].erase(N);
255  }
256  }
257  Colors[N] = ColorN;
258  Colors[C] = ColorC;
259  }
260 
261  // Explicitly assign "None" to all uncolored nodes.
262  for (unsigned I = 0; I != Order.size(); ++I)
263  if (Colors.count(I) == 0)
264  Colors[I] = ColorKind::None;
265 
266  return true;
267 }
268 
269 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
270 void Coloring::dump() const {
271  dbgs() << "{ Order: {";
272  for (Node P : Order) {
273  if (P != Ignore)
274  dbgs() << ' ' << P;
275  else
276  dbgs() << " -";
277  }
278  dbgs() << " }\n";
279  dbgs() << " Needed: {";
280  for (Node N : Needed)
281  dbgs() << ' ' << N;
282  dbgs() << " }\n";
283 
284  dbgs() << " Edges: {\n";
285  for (auto E : Edges) {
286  dbgs() << " " << E.first << " -> {";
287  for (auto N : E.second)
288  dbgs() << ' ' << N;
289  dbgs() << " }\n";
290  }
291  dbgs() << " }\n";
292 
293  auto ColorKindToName = [](ColorKind C) {
294  switch (C) {
295  case ColorKind::None:
296  return "None";
297  case ColorKind::Red:
298  return "Red";
299  case ColorKind::Black:
300  return "Black";
301  }
302  llvm_unreachable("all ColorKinds should be handled by the switch above");
303  };
304 
305  dbgs() << " Colors: {\n";
306  for (auto C : Colors)
307  dbgs() << " " << C.first << " -> " << ColorKindToName(C.second) << "\n";
308  dbgs() << " }\n}\n";
309 }
310 #endif
311 
312 namespace {
313 // Base class of for reordering networks. They don't strictly need to be
314 // permutations, as outputs with repeated occurrences of an input element
315 // are allowed.
316 struct PermNetwork {
317  using Controls = std::vector<uint8_t>;
318  using ElemType = int;
319  static constexpr ElemType Ignore = ElemType(-1);
320 
321  enum : uint8_t {
322  None,
323  Pass,
324  Switch
325  };
326  enum : uint8_t {
327  Forward,
328  Reverse
329  };
330 
331  PermNetwork(ArrayRef<ElemType> Ord, unsigned Mult = 1) {
332  Order.assign(Ord.data(), Ord.data()+Ord.size());
333  Log = 0;
334 
335  unsigned S = Order.size();
336  while (S >>= 1)
337  ++Log;
338 
339  Table.resize(Order.size());
340  for (RowType &Row : Table)
341  Row.resize(Mult*Log, None);
342  }
343 
344  void getControls(Controls &V, unsigned StartAt, uint8_t Dir) const {
345  unsigned Size = Order.size();
346  V.resize(Size);
347  for (unsigned I = 0; I != Size; ++I) {
348  unsigned W = 0;
349  for (unsigned L = 0; L != Log; ++L) {
350  unsigned C = ctl(I, StartAt+L) == Switch;
351  if (Dir == Forward)
352  W |= C << (Log-1-L);
353  else
354  W |= C << L;
355  }
356  assert(isUInt<8>(W));
357  V[I] = uint8_t(W);
358  }
359  }
360 
361  uint8_t ctl(ElemType Pos, unsigned Step) const {
362  return Table[Pos][Step];
363  }
364  unsigned size() const {
365  return Order.size();
366  }
367  unsigned steps() const {
368  return Log;
369  }
370 
371 protected:
372  unsigned Log;
373  std::vector<ElemType> Order;
374  using RowType = std::vector<uint8_t>;
375  std::vector<RowType> Table;
376 };
377 
378 struct ForwardDeltaNetwork : public PermNetwork {
379  ForwardDeltaNetwork(ArrayRef<ElemType> Ord) : PermNetwork(Ord) {}
380 
381  bool run(Controls &V) {
382  if (!route(Order.data(), Table.data(), size(), 0))
383  return false;
384  getControls(V, 0, Forward);
385  return true;
386  }
387 
388 private:
389  bool route(ElemType *P, RowType *T, unsigned Size, unsigned Step);
390 };
391 
392 struct ReverseDeltaNetwork : public PermNetwork {
393  ReverseDeltaNetwork(ArrayRef<ElemType> Ord) : PermNetwork(Ord) {}
394 
395  bool run(Controls &V) {
396  if (!route(Order.data(), Table.data(), size(), 0))
397  return false;
398  getControls(V, 0, Reverse);
399  return true;
400  }
401 
402 private:
403  bool route(ElemType *P, RowType *T, unsigned Size, unsigned Step);
404 };
405 
406 struct BenesNetwork : public PermNetwork {
407  BenesNetwork(ArrayRef<ElemType> Ord) : PermNetwork(Ord, 2) {}
408 
409  bool run(Controls &F, Controls &R) {
410  if (!route(Order.data(), Table.data(), size(), 0))
411  return false;
412 
413  getControls(F, 0, Forward);
414  getControls(R, Log, Reverse);
415  return true;
416  }
417 
418 private:
419  bool route(ElemType *P, RowType *T, unsigned Size, unsigned Step);
420 };
421 } // namespace
422 
423 bool ForwardDeltaNetwork::route(ElemType *P, RowType *T, unsigned Size,
424  unsigned Step) {
425  bool UseUp = false, UseDown = false;
426  ElemType Num = Size;
427 
428  // Cannot use coloring here, because coloring is used to determine
429  // the "big" switch, i.e. the one that changes halves, and in a forward
430  // network, a color can be simultaneously routed to both halves in the
431  // step we're working on.
432  for (ElemType J = 0; J != Num; ++J) {
433  ElemType I = P[J];
434  // I is the position in the input,
435  // J is the position in the output.
436  if (I == Ignore)
437  continue;
438  uint8_t S;
439  if (I < Num/2)
440  S = (J < Num/2) ? Pass : Switch;
441  else
442  S = (J < Num/2) ? Switch : Pass;
443 
444  // U is the element in the table that needs to be updated.
445  ElemType U = (S == Pass) ? I : (I < Num/2 ? I+Num/2 : I-Num/2);
446  if (U < Num/2)
447  UseUp = true;
448  else
449  UseDown = true;
450  if (T[U][Step] != S && T[U][Step] != None)
451  return false;
452  T[U][Step] = S;
453  }
454 
455  for (ElemType J = 0; J != Num; ++J)
456  if (P[J] != Ignore && P[J] >= Num/2)
457  P[J] -= Num/2;
458 
459  if (Step+1 < Log) {
460  if (UseUp && !route(P, T, Size/2, Step+1))
461  return false;
462  if (UseDown && !route(P+Size/2, T+Size/2, Size/2, Step+1))
463  return false;
464  }
465  return true;
466 }
467 
468 bool ReverseDeltaNetwork::route(ElemType *P, RowType *T, unsigned Size,
469  unsigned Step) {
470  unsigned Pets = Log-1 - Step;
471  bool UseUp = false, UseDown = false;
472  ElemType Num = Size;
473 
474  // In this step half-switching occurs, so coloring can be used.
475  Coloring G({P,Size});
476  const Coloring::MapType &M = G.colors();
477  if (M.empty())
478  return false;
479 
480  ColorKind ColorUp = ColorKind::None;
481  for (ElemType J = 0; J != Num; ++J) {
482  ElemType I = P[J];
483  // I is the position in the input,
484  // J is the position in the output.
485  if (I == Ignore)
486  continue;
487  ColorKind C = M.at(I);
488  if (C == ColorKind::None)
489  continue;
490  // During "Step", inputs cannot switch halves, so if the "up" color
491  // is still unknown, make sure that it is selected in such a way that
492  // "I" will stay in the same half.
493  bool InpUp = I < Num/2;
494  if (ColorUp == ColorKind::None)
495  ColorUp = InpUp ? C : G.other(C);
496  if ((C == ColorUp) != InpUp) {
497  // If I should go to a different half than where is it now, give up.
498  return false;
499  }
500 
501  uint8_t S;
502  if (InpUp) {
503  S = (J < Num/2) ? Pass : Switch;
504  UseUp = true;
505  } else {
506  S = (J < Num/2) ? Switch : Pass;
507  UseDown = true;
508  }
509  T[J][Pets] = S;
510  }
511 
512  // Reorder the working permutation according to the computed switch table
513  // for the last step (i.e. Pets).
514  for (ElemType J = 0, E = Size / 2; J != E; ++J) {
515  ElemType PJ = P[J]; // Current values of P[J]
516  ElemType PC = P[J+Size/2]; // and P[conj(J)]
517  ElemType QJ = PJ; // New values of P[J]
518  ElemType QC = PC; // and P[conj(J)]
519  if (T[J][Pets] == Switch)
520  QC = PJ;
521  if (T[J+Size/2][Pets] == Switch)
522  QJ = PC;
523  P[J] = QJ;
524  P[J+Size/2] = QC;
525  }
526 
527  for (ElemType J = 0; J != Num; ++J)
528  if (P[J] != Ignore && P[J] >= Num/2)
529  P[J] -= Num/2;
530 
531  if (Step+1 < Log) {
532  if (UseUp && !route(P, T, Size/2, Step+1))
533  return false;
534  if (UseDown && !route(P+Size/2, T+Size/2, Size/2, Step+1))
535  return false;
536  }
537  return true;
538 }
539 
540 bool BenesNetwork::route(ElemType *P, RowType *T, unsigned Size,
541  unsigned Step) {
542  Coloring G({P,Size});
543  const Coloring::MapType &M = G.colors();
544  if (M.empty())
545  return false;
546  ElemType Num = Size;
547 
548  unsigned Pets = 2*Log-1 - Step;
549  bool UseUp = false, UseDown = false;
550 
551  // Both assignments, i.e. Red->Up and Red->Down are valid, but they will
552  // result in different controls. Let's pick the one where the first
553  // control will be "Pass".
554  ColorKind ColorUp = ColorKind::None;
555  for (ElemType J = 0; J != Num; ++J) {
556  ElemType I = P[J];
557  if (I == Ignore)
558  continue;
559  ColorKind C = M.at(I);
560  if (C == ColorKind::None)
561  continue;
562  if (ColorUp == ColorKind::None) {
563  ColorUp = (I < Num / 2) ? ColorKind::Red : ColorKind::Black;
564  }
565  unsigned CI = (I < Num/2) ? I+Num/2 : I-Num/2;
566  if (C == ColorUp) {
567  if (I < Num/2)
568  T[I][Step] = Pass;
569  else
570  T[CI][Step] = Switch;
571  T[J][Pets] = (J < Num/2) ? Pass : Switch;
572  UseUp = true;
573  } else { // Down
574  if (I < Num/2)
575  T[CI][Step] = Switch;
576  else
577  T[I][Step] = Pass;
578  T[J][Pets] = (J < Num/2) ? Switch : Pass;
579  UseDown = true;
580  }
581  }
582 
583  // Reorder the working permutation according to the computed switch table
584  // for the last step (i.e. Pets).
585  for (ElemType J = 0; J != Num/2; ++J) {
586  ElemType PJ = P[J]; // Current values of P[J]
587  ElemType PC = P[J+Num/2]; // and P[conj(J)]
588  ElemType QJ = PJ; // New values of P[J]
589  ElemType QC = PC; // and P[conj(J)]
590  if (T[J][Pets] == Switch)
591  QC = PJ;
592  if (T[J+Num/2][Pets] == Switch)
593  QJ = PC;
594  P[J] = QJ;
595  P[J+Num/2] = QC;
596  }
597 
598  for (ElemType J = 0; J != Num; ++J)
599  if (P[J] != Ignore && P[J] >= Num/2)
600  P[J] -= Num/2;
601 
602  if (Step+1 < Log) {
603  if (UseUp && !route(P, T, Size/2, Step+1))
604  return false;
605  if (UseDown && !route(P+Size/2, T+Size/2, Size/2, Step+1))
606  return false;
607  }
608  return true;
609 }
610 
611 // --------------------------------------------------------------------
612 // Support for building selection results (output instructions that are
613 // parts of the final selection).
614 
615 namespace {
616 struct OpRef {
617  OpRef(SDValue V) : OpV(V) {}
618  bool isValue() const { return OpV.getNode() != nullptr; }
619  bool isValid() const { return isValue() || !(OpN & Invalid); }
620  static OpRef res(int N) { return OpRef(Whole | (N & Index)); }
621  static OpRef fail() { return OpRef(Invalid); }
622 
623  static OpRef lo(const OpRef &R) {
624  assert(!R.isValue());
625  return OpRef(R.OpN & (Undef | Index | LoHalf));
626  }
627  static OpRef hi(const OpRef &R) {
628  assert(!R.isValue());
629  return OpRef(R.OpN & (Undef | Index | HiHalf));
630  }
631  static OpRef undef(MVT Ty) { return OpRef(Undef | Ty.SimpleTy); }
632 
633  // Direct value.
634  SDValue OpV = SDValue();
635 
636  // Reference to the operand of the input node:
637  // If the 31st bit is 1, it's undef, otherwise, bits 28..0 are the
638  // operand index:
639  // If bit 30 is set, it's the high half of the operand.
640  // If bit 29 is set, it's the low half of the operand.
641  unsigned OpN = 0;
642 
643  enum : unsigned {
644  Invalid = 0x10000000,
645  LoHalf = 0x20000000,
646  HiHalf = 0x40000000,
647  Whole = LoHalf | HiHalf,
648  Undef = 0x80000000,
649  Index = 0x0FFFFFFF, // Mask of the index value.
650  IndexBits = 28,
651  };
652 
654  void print(raw_ostream &OS, const SelectionDAG &G) const;
655 
656 private:
657  OpRef(unsigned N) : OpN(N) {}
658 };
659 
660 struct NodeTemplate {
661  NodeTemplate() = default;
662  unsigned Opc = 0;
663  MVT Ty = MVT::Other;
664  std::vector<OpRef> Ops;
665 
666  LLVM_DUMP_METHOD void print(raw_ostream &OS, const SelectionDAG &G) const;
667 };
668 
669 struct ResultStack {
670  ResultStack(SDNode *Inp)
671  : InpNode(Inp), InpTy(Inp->getValueType(0).getSimpleVT()) {}
672  SDNode *InpNode;
673  MVT InpTy;
674  unsigned push(const NodeTemplate &Res) {
675  List.push_back(Res);
676  return List.size()-1;
677  }
678  unsigned push(unsigned Opc, MVT Ty, std::vector<OpRef> &&Ops) {
679  NodeTemplate Res;
680  Res.Opc = Opc;
681  Res.Ty = Ty;
682  Res.Ops = Ops;
683  return push(Res);
684  }
685  bool empty() const { return List.empty(); }
686  unsigned size() const { return List.size(); }
687  unsigned top() const { return size()-1; }
688  const NodeTemplate &operator[](unsigned I) const { return List[I]; }
689  unsigned reset(unsigned NewTop) {
690  List.resize(NewTop+1);
691  return NewTop;
692  }
693 
694  using BaseType = std::vector<NodeTemplate>;
695  BaseType::iterator begin() { return List.begin(); }
696  BaseType::iterator end() { return List.end(); }
697  BaseType::const_iterator begin() const { return List.begin(); }
698  BaseType::const_iterator end() const { return List.end(); }
699 
700  BaseType List;
701 
703  void print(raw_ostream &OS, const SelectionDAG &G) const;
704 };
705 } // namespace
706 
707 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
708 void OpRef::print(raw_ostream &OS, const SelectionDAG &G) const {
709  if (isValue()) {
710  OpV.getNode()->print(OS, &G);
711  return;
712  }
713  if (OpN & Invalid) {
714  OS << "invalid";
715  return;
716  }
717  if (OpN & Undef) {
718  OS << "undef";
719  return;
720  }
721  if ((OpN & Whole) != Whole) {
722  assert((OpN & Whole) == LoHalf || (OpN & Whole) == HiHalf);
723  if (OpN & LoHalf)
724  OS << "lo ";
725  else
726  OS << "hi ";
727  }
728  OS << '#' << SignExtend32(OpN & Index, IndexBits);
729 }
730 
731 void NodeTemplate::print(raw_ostream &OS, const SelectionDAG &G) const {
732  const TargetInstrInfo &TII = *G.getSubtarget().getInstrInfo();
733  OS << format("%8s", EVT(Ty).getEVTString().c_str()) << " "
734  << TII.getName(Opc);
735  bool Comma = false;
736  for (const auto &R : Ops) {
737  if (Comma)
738  OS << ',';
739  Comma = true;
740  OS << ' ';
741  R.print(OS, G);
742  }
743 }
744 
745 void ResultStack::print(raw_ostream &OS, const SelectionDAG &G) const {
746  OS << "Input node:\n";
747 #ifndef NDEBUG
748  InpNode->dumpr(&G);
749 #endif
750  OS << "Result templates:\n";
751  for (unsigned I = 0, E = List.size(); I != E; ++I) {
752  OS << '[' << I << "] ";
753  List[I].print(OS, G);
754  OS << '\n';
755  }
756 }
757 #endif
758 
759 namespace {
760 struct ShuffleMask {
761  ShuffleMask(ArrayRef<int> M) : Mask(M) {
762  for (int M : Mask) {
763  if (M == -1)
764  continue;
765  MinSrc = (MinSrc == -1) ? M : std::min(MinSrc, M);
766  MaxSrc = (MaxSrc == -1) ? M : std::max(MaxSrc, M);
767  }
768  }
769 
771  int MinSrc = -1, MaxSrc = -1;
772 
773  ShuffleMask lo() const {
774  size_t H = Mask.size()/2;
775  return ShuffleMask(Mask.take_front(H));
776  }
777  ShuffleMask hi() const {
778  size_t H = Mask.size()/2;
779  return ShuffleMask(Mask.take_back(H));
780  }
781 
782  void print(raw_ostream &OS) const {
783  OS << "MinSrc:" << MinSrc << ", MaxSrc:" << MaxSrc << " {";
784  for (int M : Mask)
785  OS << ' ' << M;
786  OS << " }";
787  }
788 };
789 
791 raw_ostream &operator<<(raw_ostream &OS, const ShuffleMask &SM) {
792  SM.print(OS);
793  return OS;
794 }
795 } // namespace
796 
797 // --------------------------------------------------------------------
798 // The HvxSelector class.
799 
801  return static_cast<const HexagonTargetLowering&>(G.getTargetLoweringInfo());
802 }
804  return G.getSubtarget<HexagonSubtarget>();
805 }
806 
807 namespace llvm {
808  struct HvxSelector {
813  const unsigned HwLen;
814 
817  HST(getHexagonSubtarget(G)), HwLen(HST.getVectorLength()) {}
818 
819  MVT getSingleVT(MVT ElemTy) const {
820  assert(ElemTy != MVT::i1 && "Use getBoolVT for predicates");
821  unsigned NumElems = HwLen / (ElemTy.getSizeInBits()/8);
822  return MVT::getVectorVT(ElemTy, NumElems);
823  }
824 
825  MVT getPairVT(MVT ElemTy) const {
826  assert(ElemTy != MVT::i1); // Suspicious: there are no predicate pairs.
827  unsigned NumElems = (2*HwLen) / (ElemTy.getSizeInBits()/8);
828  return MVT::getVectorVT(ElemTy, NumElems);
829  }
830 
831  MVT getBoolVT() const {
832  // Return HwLen x i1.
833  return MVT::getVectorVT(MVT::i1, HwLen);
834  }
835 
837  void selectShuffle(SDNode *N);
838  void selectRor(SDNode *N);
839  void selectVAlign(SDNode *N);
840 
841  private:
842  void select(SDNode *ISelN);
843  void materialize(const ResultStack &Results);
844 
845  SDValue getConst32(int Val, const SDLoc &dl);
846  SDValue getVectorConstant(ArrayRef<uint8_t> Data, const SDLoc &dl);
847 
848  enum : unsigned {
849  None,
850  PackMux,
851  };
852  OpRef concats(OpRef Va, OpRef Vb, ResultStack &Results);
853  OpRef packs(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results,
854  MutableArrayRef<int> NewMask, unsigned Options = None);
855  OpRef packp(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results,
856  MutableArrayRef<int> NewMask);
857  OpRef vmuxs(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,
858  ResultStack &Results);
859  OpRef vmuxp(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,
860  ResultStack &Results);
861 
862  OpRef shuffs1(ShuffleMask SM, OpRef Va, ResultStack &Results);
863  OpRef shuffs2(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results);
864  OpRef shuffp1(ShuffleMask SM, OpRef Va, ResultStack &Results);
865  OpRef shuffp2(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results);
866 
867  OpRef butterfly(ShuffleMask SM, OpRef Va, ResultStack &Results);
868  OpRef contracting(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results);
869  OpRef expanding(ShuffleMask SM, OpRef Va, ResultStack &Results);
870  OpRef perfect(ShuffleMask SM, OpRef Va, ResultStack &Results);
871 
872  bool selectVectorConstants(SDNode *N);
873  bool scalarizeShuffle(ArrayRef<int> Mask, const SDLoc &dl, MVT ResTy,
874  SDValue Va, SDValue Vb, SDNode *N);
875 
876  };
877 }
878 
880  MutableArrayRef<int> MaskR) {
881  unsigned VecLen = Mask.size();
882  assert(MaskL.size() == VecLen && MaskR.size() == VecLen);
883  for (unsigned I = 0; I != VecLen; ++I) {
884  int M = Mask[I];
885  if (M < 0) {
886  MaskL[I] = MaskR[I] = -1;
887  } else if (unsigned(M) < VecLen) {
888  MaskL[I] = M;
889  MaskR[I] = -1;
890  } else {
891  MaskL[I] = -1;
892  MaskR[I] = M-VecLen;
893  }
894  }
895 }
896 
897 static std::pair<int,unsigned> findStrip(ArrayRef<int> A, int Inc,
898  unsigned MaxLen) {
899  assert(A.size() > 0 && A.size() >= MaxLen);
900  int F = A[0];
901  int E = F;
902  for (unsigned I = 1; I != MaxLen; ++I) {
903  if (A[I] - E != Inc)
904  return { F, I };
905  E = A[I];
906  }
907  return { F, MaxLen };
908 }
909 
910 static bool isUndef(ArrayRef<int> Mask) {
911  for (int Idx : Mask)
912  if (Idx != -1)
913  return false;
914  return true;
915 }
916 
918  for (int I = 0, E = Mask.size(); I != E; ++I) {
919  int M = Mask[I];
920  if (M >= 0 && M != I)
921  return false;
922  }
923  return true;
924 }
925 
927  unsigned SegLen) {
928  assert(isPowerOf2_32(SegLen));
929  SmallVector<unsigned, 4> SegList;
930  if (SM.MaxSrc == -1)
931  return SegList;
932 
933  unsigned Shift = Log2_32(SegLen);
934  BitVector Segs(alignTo(SM.MaxSrc + 1, SegLen) >> Shift);
935 
936  for (int M : SM.Mask) {
937  if (M >= 0)
938  Segs.set(M >> Shift);
939  }
940 
941  for (unsigned B : Segs.set_bits())
942  SegList.push_back(B);
943  return SegList;
944 }
945 
947  unsigned SegLen) {
948  // Calculate the layout of the output segments in terms of the input
949  // segments.
950  // For example [1,3,1,0] means that the output consists of 4 output
951  // segments, where the first output segment has only elements of the
952  // input segment at index 1. The next output segment only has elements
953  // of the input segment 3, etc.
954  // If an output segment only has undef elements, the value will be ~0u.
955  // If an output segment has elements from more than one input segment,
956  // the corresponding value will be ~1u.
957  unsigned MaskLen = SM.Mask.size();
958  assert(MaskLen % SegLen == 0);
959  SmallVector<unsigned, 4> Map(MaskLen / SegLen);
960 
961  for (int S = 0, E = Map.size(); S != E; ++S) {
962  unsigned Idx = ~0u;
963  for (int I = 0; I != static_cast<int>(SegLen); ++I) {
964  int M = SM.Mask[S*SegLen + I];
965  if (M < 0)
966  continue;
967  unsigned G = M / SegLen; // Input segment of this element.
968  if (Idx == ~0u) {
969  Idx = G;
970  } else if (Idx != G) {
971  Idx = ~1u;
972  break;
973  }
974  }
975  Map[S] = Idx;
976  }
977 
978  return Map;
979 }
980 
982  unsigned SegLen, MutableArrayRef<int> PackedMask) {
984  for (int I = OutSegMap.size() - 1; I >= 0; --I) {
985  unsigned S = OutSegMap[I];
986  assert(S != ~0u && "Unexpected undef");
987  assert(S != ~1u && "Unexpected multi");
988  if (InvMap.size() <= S)
989  InvMap.resize(S+1);
990  InvMap[S] = I;
991  }
992 
993  unsigned Shift = Log2_32(SegLen);
994  for (int I = 0, E = Mask.size(); I != E; ++I) {
995  int M = Mask[I];
996  if (M >= 0) {
997  int OutIdx = InvMap[M >> Shift];
998  M = (M & (SegLen-1)) + SegLen*OutIdx;
999  }
1000  PackedMask[I] = M;
1001  }
1002 }
1003 
1005  // Check by adding all numbers only works if there is no overflow.
1006  assert(Mask.size() < 0x00007FFF && "Overflow failure");
1007  int Sum = 0;
1008  for (int Idx : Mask) {
1009  if (Idx == -1)
1010  return false;
1011  Sum += Idx;
1012  }
1013  int N = Mask.size();
1014  return 2*Sum == N*(N-1);
1015 }
1016 
1017 bool HvxSelector::selectVectorConstants(SDNode *N) {
1018  // Constant vectors are generated as loads from constant pools or as
1019  // splats of a constant value. Since they are generated during the
1020  // selection process, the main selection algorithm is not aware of them.
1021  // Select them directly here.
1022  SmallVector<SDNode*,4> Nodes;
1023  SetVector<SDNode*> WorkQ;
1024 
1025  // The DAG can change (due to CSE) during selection, so cache all the
1026  // unselected nodes first to avoid traversing a mutating DAG.
1027  WorkQ.insert(N);
1028  for (unsigned i = 0; i != WorkQ.size(); ++i) {
1029  SDNode *W = WorkQ[i];
1030  if (!W->isMachineOpcode() && W->getOpcode() == HexagonISD::ISEL)
1031  Nodes.push_back(W);
1032  for (unsigned j = 0, f = W->getNumOperands(); j != f; ++j)
1033  WorkQ.insert(W->getOperand(j).getNode());
1034  }
1035 
1036  for (SDNode *L : Nodes)
1037  select(L);
1038 
1039  return !Nodes.empty();
1040 }
1041 
1042 void HvxSelector::materialize(const ResultStack &Results) {
1043  DEBUG_WITH_TYPE("isel", {
1044  dbgs() << "Materializing\n";
1045  Results.print(dbgs(), DAG);
1046  });
1047  if (Results.empty())
1048  return;
1049  const SDLoc &dl(Results.InpNode);
1050  std::vector<SDValue> Output;
1051 
1052  for (unsigned I = 0, E = Results.size(); I != E; ++I) {
1053  const NodeTemplate &Node = Results[I];
1054  std::vector<SDValue> Ops;
1055  for (const OpRef &R : Node.Ops) {
1056  assert(R.isValid());
1057  if (R.isValue()) {
1058  Ops.push_back(R.OpV);
1059  continue;
1060  }
1061  if (R.OpN & OpRef::Undef) {
1063  Ops.push_back(ISel.selectUndef(dl, MVT(SVT)));
1064  continue;
1065  }
1066  // R is an index of a result.
1067  unsigned Part = R.OpN & OpRef::Whole;
1068  int Idx = SignExtend32(R.OpN & OpRef::Index, OpRef::IndexBits);
1069  if (Idx < 0)
1070  Idx += I;
1071  assert(Idx >= 0 && unsigned(Idx) < Output.size());
1072  SDValue Op = Output[Idx];
1073  MVT OpTy = Op.getValueType().getSimpleVT();
1074  if (Part != OpRef::Whole) {
1075  assert(Part == OpRef::LoHalf || Part == OpRef::HiHalf);
1076  MVT HalfTy = MVT::getVectorVT(OpTy.getVectorElementType(),
1077  OpTy.getVectorNumElements()/2);
1078  unsigned Sub = (Part == OpRef::LoHalf) ? Hexagon::vsub_lo
1079  : Hexagon::vsub_hi;
1080  Op = DAG.getTargetExtractSubreg(Sub, dl, HalfTy, Op);
1081  }
1082  Ops.push_back(Op);
1083  } // for (Node : Results)
1084 
1085  assert(Node.Ty != MVT::Other);
1086  SDNode *ResN = (Node.Opc == TargetOpcode::COPY)
1087  ? Ops.front().getNode()
1088  : DAG.getMachineNode(Node.Opc, dl, Node.Ty, Ops);
1089  Output.push_back(SDValue(ResN, 0));
1090  }
1091 
1092  SDNode *OutN = Output.back().getNode();
1093  SDNode *InpN = Results.InpNode;
1094  DEBUG_WITH_TYPE("isel", {
1095  dbgs() << "Generated node:\n";
1096  OutN->dumpr(&DAG);
1097  });
1098 
1099  ISel.ReplaceNode(InpN, OutN);
1100  selectVectorConstants(OutN);
1101  DAG.RemoveDeadNodes();
1102 }
1103 
1104 OpRef HvxSelector::concats(OpRef Lo, OpRef Hi, ResultStack &Results) {
1105  DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1106  const SDLoc &dl(Results.InpNode);
1107  Results.push(TargetOpcode::REG_SEQUENCE, getPairVT(MVT::i8), {
1108  getConst32(Hexagon::HvxWRRegClassID, dl),
1109  Lo, getConst32(Hexagon::vsub_lo, dl),
1110  Hi, getConst32(Hexagon::vsub_hi, dl),
1111  });
1112  return OpRef::res(Results.top());
1113 }
1114 
1115 // Va, Vb are single vectors. If SM only uses two vector halves from Va/Vb,
1116 // pack these halves into a single vector, and remap SM into NewMask to use
1117 // the new vector instead.
1118 OpRef HvxSelector::packs(ShuffleMask SM, OpRef Va, OpRef Vb,
1119  ResultStack &Results, MutableArrayRef<int> NewMask,
1120  unsigned Options) {
1121  DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1122  if (!Va.isValid() || !Vb.isValid())
1123  return OpRef::fail();
1124 
1125  MVT Ty = getSingleVT(MVT::i8);
1127  OpRef Inp[2] = {Va, Vb};
1128  unsigned VecLen = SM.Mask.size();
1129 
1130  auto valign = [this](OpRef Lo, OpRef Hi, unsigned Amt, MVT Ty,
1131  ResultStack &Results) {
1132  if (Amt == 0)
1133  return Lo;
1134  const SDLoc &dl(Results.InpNode);
1135  if (isUInt<3>(Amt) || isUInt<3>(HwLen - Amt)) {
1136  bool IsRight = isUInt<3>(Amt); // Right align.
1137  SDValue S = getConst32(IsRight ? Amt : HwLen - Amt, dl);
1138  unsigned Opc = IsRight ? Hexagon::V6_valignbi : Hexagon::V6_vlalignbi;
1139  Results.push(Opc, Ty, {Hi, Lo, S});
1140  return OpRef::res(Results.top());
1141  }
1142  Results.push(Hexagon::A2_tfrsi, MVT::i32, {getConst32(Amt, dl)});
1143  OpRef A = OpRef::res(Results.top());
1144  Results.push(Hexagon::V6_valignb, Ty, {Hi, Lo, A});
1145  return OpRef::res(Results.top());
1146  };
1147 
1148  // Segment is a vector half.
1149  unsigned SegLen = HwLen / 2;
1150 
1151  // Check if we can shuffle vector halves around to get the used elements
1152  // into a single vector.
1153  SmallVector<int, 128> MaskH(SM.Mask);
1154  SmallVector<unsigned, 4> SegList = getInputSegmentList(SM.Mask, SegLen);
1155  unsigned SegCount = SegList.size();
1156  SmallVector<unsigned, 4> SegMap = getOutputSegmentMap(SM.Mask, SegLen);
1157 
1158  if (SegList.empty())
1159  return OpRef::undef(Ty);
1160 
1161  // NOTE:
1162  // In the following part of the function, where the segments are rearranged,
1163  // the shuffle mask SM can be of any length that is a multiple of a vector
1164  // (i.e. a multiple of 2*SegLen), and non-zero.
1165  // The output segment map is computed, and it may have any even number of
1166  // entries, but the rearrangement of input segments will be done based only
1167  // on the first two (non-undef) entries in the segment map.
1168  // For example, if the output map is 3, 1, 1, 3 (it can have at most two
1169  // distinct entries!), the segments 1 and 3 of Va/Vb will be packaged into
1170  // a single vector V = 3:1. The output mask will then be updated to use
1171  // seg(0,V), seg(1,V), seg(1,V), seg(0,V).
1172  //
1173  // Picking the segments based on the output map is an optimization. For
1174  // correctness it is only necessary that Seg0 and Seg1 are the two input
1175  // segments that are used in the output.
1176 
1177  unsigned Seg0 = ~0u, Seg1 = ~0u;
1178  for (int I = 0, E = SegMap.size(); I != E; ++I) {
1179  unsigned X = SegMap[I];
1180  if (X == ~0u)
1181  continue;
1182  if (Seg0 == ~0u)
1183  Seg0 = X;
1184  else if (Seg1 != ~0u)
1185  break;
1186  if (X == ~1u || X != Seg0)
1187  Seg1 = X;
1188  }
1189 
1190  if (SegCount == 1) {
1191  unsigned SrcOp = SegList[0] / 2;
1192  for (int I = 0; I != static_cast<int>(VecLen); ++I) {
1193  int M = SM.Mask[I];
1194  if (M >= 0) {
1195  M -= SrcOp * HwLen;
1196  assert(M >= 0);
1197  }
1198  NewMask[I] = M;
1199  }
1200  return Inp[SrcOp];
1201  }
1202 
1203  if (SegCount == 2) {
1204  // Seg0 should not be undef here: this would imply a SegList
1205  // with <= 1 elements, which was checked earlier.
1206  assert(Seg0 != ~0u);
1207 
1208  // If Seg0 or Seg1 are "multi-defined", pick them from the input
1209  // segment list instead.
1210  if (Seg0 == ~1u || Seg1 == ~1u) {
1211  if (Seg0 == Seg1) {
1212  Seg0 = SegList[0];
1213  Seg1 = SegList[1];
1214  } else if (Seg0 == ~1u) {
1215  Seg0 = SegList[0] != Seg1 ? SegList[0] : SegList[1];
1216  } else {
1217  assert(Seg1 == ~1u);
1218  Seg1 = SegList[0] != Seg0 ? SegList[0] : SegList[1];
1219  }
1220  }
1221  assert(Seg0 != ~1u && Seg1 != ~1u);
1222 
1223  assert(Seg0 != Seg1 && "Expecting different segments");
1224  const SDLoc &dl(Results.InpNode);
1225  Results.push(Hexagon::A2_tfrsi, MVT::i32, {getConst32(SegLen, dl)});
1226  OpRef HL = OpRef::res(Results.top());
1227 
1228  // Va = AB, Vb = CD
1229 
1230  if (Seg0 / 2 == Seg1 / 2) {
1231  // Same input vector.
1232  Va = Inp[Seg0 / 2];
1233  if (Seg0 > Seg1) {
1234  // Swap halves.
1235  Results.push(Hexagon::V6_vror, Ty, {Inp[Seg0 / 2], HL});
1236  Va = OpRef::res(Results.top());
1237  }
1238  packSegmentMask(SM.Mask, {Seg0, Seg1}, SegLen, MaskH);
1239  } else if (Seg0 % 2 == Seg1 % 2) {
1240  // Picking AC, BD, CA, or DB.
1241  // vshuff(CD,AB,HL) -> BD:AC
1242  // vshuff(AB,CD,HL) -> DB:CA
1243  auto Vs = (Seg0 == 0 || Seg0 == 1) ? std::make_pair(Vb, Va) // AC or BD
1244  : std::make_pair(Va, Vb); // CA or DB
1245  Results.push(Hexagon::V6_vshuffvdd, PairTy, {Vs.first, Vs.second, HL});
1246  OpRef P = OpRef::res(Results.top());
1247  Va = (Seg0 == 0 || Seg0 == 2) ? OpRef::lo(P) : OpRef::hi(P);
1248  packSegmentMask(SM.Mask, {Seg0, Seg1}, SegLen, MaskH);
1249  } else {
1250  // Picking AD, BC, CB, or DA.
1251  if ((Seg0 == 0 && Seg1 == 3) || (Seg0 == 2 && Seg1 == 1)) {
1252  // AD or BC: this can be done using vmux.
1253  // Q = V6_pred_scalar2 SegLen
1254  // V = V6_vmux Q, (Va, Vb) or (Vb, Va)
1255  Results.push(Hexagon::V6_pred_scalar2, getBoolVT(), {HL});
1256  OpRef Qt = OpRef::res(Results.top());
1257  auto Vs = (Seg0 == 0) ? std::make_pair(Va, Vb) // AD
1258  : std::make_pair(Vb, Va); // CB
1259  Results.push(Hexagon::V6_vmux, Ty, {Qt, Vs.first, Vs.second});
1260  Va = OpRef::res(Results.top());
1261  packSegmentMask(SM.Mask, {Seg0, Seg1}, SegLen, MaskH);
1262  } else {
1263  // BC or DA: this could be done via valign by SegLen.
1264  // Do nothing here, because valign (if possible) will be generated
1265  // later on (make sure the Seg0 values are as expected).
1266  assert(Seg0 == 1 || Seg0 == 3);
1267  }
1268  }
1269  }
1270 
1271  // Check if the arguments can be packed by valign(Va,Vb) or valign(Vb,Va).
1272 
1273  ShuffleMask SMH(MaskH);
1274  assert(SMH.Mask.size() == VecLen);
1275  SmallVector<int, 128> MaskA(SMH.Mask);
1276 
1277  if (SMH.MaxSrc - SMH.MinSrc >= static_cast<int>(HwLen)) {
1278  // valign(Lo=Va,Hi=Vb) won't work. Try swapping Va/Vb.
1279  SmallVector<int, 128> Swapped(SMH.Mask);
1281  ShuffleMask SW(Swapped);
1282  if (SW.MaxSrc - SW.MinSrc < static_cast<int>(HwLen)) {
1283  MaskA.assign(SW.Mask.begin(), SW.Mask.end());
1284  std::swap(Va, Vb);
1285  }
1286  }
1287  ShuffleMask SMA(MaskA);
1288  assert(SMA.Mask.size() == VecLen);
1289 
1290  if (SMA.MaxSrc - SMA.MinSrc < static_cast<int>(HwLen)) {
1291  int ShiftR = SMA.MinSrc;
1292  if (ShiftR >= static_cast<int>(HwLen)) {
1293  Va = Vb;
1294  Vb = OpRef::undef(Ty);
1295  ShiftR -= HwLen;
1296  }
1297  OpRef RetVal = valign(Va, Vb, ShiftR, Ty, Results);
1298 
1299  for (int I = 0; I != static_cast<int>(VecLen); ++I) {
1300  int M = SMA.Mask[I];
1301  if (M != -1)
1302  M -= SMA.MinSrc;
1303  NewMask[I] = M;
1304  }
1305  return RetVal;
1306  }
1307 
1308  // By here, packing by segment (half-vector) shuffling, and vector alignment
1309  // failed. Try vmux.
1310  // Note: since this is using the original mask, Va and Vb must not have been
1311  // modified.
1312 
1313  if (Options & PackMux) {
1314  // If elements picked from Va and Vb have all different (source) indexes
1315  // (relative to the start of the argument), do a mux, and update the mask.
1316  BitVector Picked(HwLen);
1317  SmallVector<uint8_t,128> MuxBytes(HwLen);
1318  bool CanMux = true;
1319  for (int I = 0; I != static_cast<int>(VecLen); ++I) {
1320  int M = SM.Mask[I];
1321  if (M == -1)
1322  continue;
1323  if (M >= static_cast<int>(HwLen))
1324  M -= HwLen;
1325  else
1326  MuxBytes[M] = 0xFF;
1327  if (Picked[M]) {
1328  CanMux = false;
1329  break;
1330  }
1331  NewMask[I] = M;
1332  }
1333  if (CanMux)
1334  return vmuxs(MuxBytes, Va, Vb, Results);
1335  }
1336  return OpRef::fail();
1337 }
1338 
1339 // Va, Vb are vector pairs. If SM only uses two single vectors from Va/Vb,
1340 // pack these vectors into a pair, and remap SM into NewMask to use the
1341 // new pair instead.
1342 OpRef HvxSelector::packp(ShuffleMask SM, OpRef Va, OpRef Vb,
1343  ResultStack &Results, MutableArrayRef<int> NewMask) {
1344  DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1346  if (SegList.empty())
1347  return OpRef::undef(getPairVT(MVT::i8));
1348 
1349  // If more than two halves are used, bail.
1350  // TODO: be more aggressive here?
1351  unsigned SegCount = SegList.size();
1352  if (SegCount > 2)
1353  return OpRef::fail();
1354 
1355  MVT HalfTy = getSingleVT(MVT::i8);
1356 
1357  OpRef Inp[2] = { Va, Vb };
1358  OpRef Out[2] = { OpRef::undef(HalfTy), OpRef::undef(HalfTy) };
1359 
1360  // Really make sure we have at most 2 vectors used in the mask.
1361  assert(SegCount <= 2);
1362 
1363  for (int I = 0, E = SegList.size(); I != E; ++I) {
1364  unsigned S = SegList[I];
1365  OpRef Op = Inp[S / 2];
1366  Out[I] = (S & 1) ? OpRef::hi(Op) : OpRef::lo(Op);
1367  }
1368 
1369  // NOTE: Using SegList as the packing map here (not SegMap). This works,
1370  // because we're not concerned here about the order of the segments (i.e.
1371  // single vectors) in the output pair. Changing the order of vectors is
1372  // free (as opposed to changing the order of vector halves as in packs),
1373  // and so there is no extra cost added in case the order needs to be
1374  // changed later.
1375  packSegmentMask(SM.Mask, SegList, HwLen, NewMask);
1376  return concats(Out[0], Out[1], Results);
1377 }
1378 
1379 OpRef HvxSelector::vmuxs(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,
1380  ResultStack &Results) {
1381  DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1382  MVT ByteTy = getSingleVT(MVT::i8);
1383  MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
1384  const SDLoc &dl(Results.InpNode);
1385  SDValue B = getVectorConstant(Bytes, dl);
1386  Results.push(Hexagon::V6_vd0, ByteTy, {});
1387  Results.push(Hexagon::V6_veqb, BoolTy, {OpRef(B), OpRef::res(-1)});
1388  Results.push(Hexagon::V6_vmux, ByteTy, {OpRef::res(-1), Vb, Va});
1389  return OpRef::res(Results.top());
1390 }
1391 
1392 OpRef HvxSelector::vmuxp(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,
1393  ResultStack &Results) {
1394  DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1395  size_t S = Bytes.size() / 2;
1396  OpRef L = vmuxs(Bytes.take_front(S), OpRef::lo(Va), OpRef::lo(Vb), Results);
1397  OpRef H = vmuxs(Bytes.drop_front(S), OpRef::hi(Va), OpRef::hi(Vb), Results);
1398  return concats(L, H, Results);
1399 }
1400 
1401 OpRef HvxSelector::shuffs1(ShuffleMask SM, OpRef Va, ResultStack &Results) {
1402  DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1403  unsigned VecLen = SM.Mask.size();
1404  assert(HwLen == VecLen);
1405  (void)VecLen;
1406  assert(all_of(SM.Mask, [this](int M) { return M == -1 || M < int(HwLen); }));
1407 
1408  if (isIdentity(SM.Mask))
1409  return Va;
1410  if (isUndef(SM.Mask))
1411  return OpRef::undef(getSingleVT(MVT::i8));
1412 
1413  unsigned HalfLen = HwLen / 2;
1414  assert(isPowerOf2_32(HalfLen));
1415 
1416  // Handle special case where the output is the same half of the input
1417  // repeated twice, i.e. if Va = AB, then handle the output of AA or BB.
1418  std::pair<int, unsigned> Strip1 = findStrip(SM.Mask, 1, HalfLen);
1419  if ((Strip1.first & ~HalfLen) == 0 && Strip1.second == HalfLen) {
1420  std::pair<int, unsigned> Strip2 =
1421  findStrip(SM.Mask.drop_front(HalfLen), 1, HalfLen);
1422  if (Strip1 == Strip2) {
1423  const SDLoc &dl(Results.InpNode);
1424  Results.push(Hexagon::A2_tfrsi, MVT::i32, {getConst32(HalfLen, dl)});
1425  Results.push(Hexagon::V6_vshuffvdd, getPairVT(MVT::i8),
1426  {Va, Va, OpRef::res(Results.top())});
1427  OpRef S = OpRef::res(Results.top());
1428  return (Strip1.first == 0) ? OpRef::lo(S) : OpRef::hi(S);
1429  }
1430  }
1431 
1432  OpRef P = perfect(SM, Va, Results);
1433  if (P.isValid())
1434  return P;
1435  return butterfly(SM, Va, Results);
1436 }
1437 
1438 OpRef HvxSelector::shuffs2(ShuffleMask SM, OpRef Va, OpRef Vb,
1439  ResultStack &Results) {
1440  DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1441  if (isUndef(SM.Mask))
1442  return OpRef::undef(getSingleVT(MVT::i8));
1443 
1444  OpRef C = contracting(SM, Va, Vb, Results);
1445  if (C.isValid())
1446  return C;
1447 
1448  int VecLen = SM.Mask.size();
1449  SmallVector<int,128> PackedMask(VecLen);
1450  OpRef P = packs(SM, Va, Vb, Results, PackedMask);
1451  if (P.isValid())
1452  return shuffs1(ShuffleMask(PackedMask), P, Results);
1453 
1454  // TODO: Before we split the mask, try perfect shuffle on concatenated
1455  // operands. This won't work now, because the perfect code does not
1456  // tolerate undefs in the mask.
1457 
1458  SmallVector<int,128> MaskL(VecLen), MaskR(VecLen);
1459  splitMask(SM.Mask, MaskL, MaskR);
1460 
1461  OpRef L = shuffs1(ShuffleMask(MaskL), Va, Results);
1462  OpRef R = shuffs1(ShuffleMask(MaskR), Vb, Results);
1463  if (!L.isValid() || !R.isValid())
1464  return OpRef::fail();
1465 
1466  SmallVector<uint8_t,128> Bytes(VecLen);
1467  for (int I = 0; I != VecLen; ++I) {
1468  if (MaskL[I] != -1)
1469  Bytes[I] = 0xFF;
1470  }
1471  return vmuxs(Bytes, L, R, Results);
1472 }
1473 
1474 OpRef HvxSelector::shuffp1(ShuffleMask SM, OpRef Va, ResultStack &Results) {
1475  DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1476  int VecLen = SM.Mask.size();
1477 
1478  if (isIdentity(SM.Mask))
1479  return Va;
1480  if (isUndef(SM.Mask))
1481  return OpRef::undef(getPairVT(MVT::i8));
1482 
1483  SmallVector<int,128> PackedMask(VecLen);
1484  OpRef P = packs(SM, OpRef::lo(Va), OpRef::hi(Va), Results, PackedMask);
1485  if (P.isValid()) {
1486  ShuffleMask PM(PackedMask);
1487  OpRef E = expanding(PM, P, Results);
1488  if (E.isValid())
1489  return E;
1490 
1491  OpRef L = shuffs1(PM.lo(), P, Results);
1492  OpRef H = shuffs1(PM.hi(), P, Results);
1493  if (L.isValid() && H.isValid())
1494  return concats(L, H, Results);
1495  }
1496 
1497  OpRef R = perfect(SM, Va, Results);
1498  if (R.isValid())
1499  return R;
1500  // TODO commute the mask and try the opposite order of the halves.
1501 
1502  OpRef L = shuffs2(SM.lo(), OpRef::lo(Va), OpRef::hi(Va), Results);
1503  OpRef H = shuffs2(SM.hi(), OpRef::lo(Va), OpRef::hi(Va), Results);
1504  if (L.isValid() && H.isValid())
1505  return concats(L, H, Results);
1506 
1507  return OpRef::fail();
1508 }
1509 
1510 OpRef HvxSelector::shuffp2(ShuffleMask SM, OpRef Va, OpRef Vb,
1511  ResultStack &Results) {
1512  DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1513  if (isUndef(SM.Mask))
1514  return OpRef::undef(getPairVT(MVT::i8));
1515 
1516  int VecLen = SM.Mask.size();
1517  SmallVector<int,256> PackedMask(VecLen);
1518  OpRef P = packp(SM, Va, Vb, Results, PackedMask);
1519  if (P.isValid())
1520  return shuffp1(ShuffleMask(PackedMask), P, Results);
1521 
1522  SmallVector<int,256> MaskL(VecLen), MaskR(VecLen);
1523  splitMask(SM.Mask, MaskL, MaskR);
1524 
1525  OpRef L = shuffp1(ShuffleMask(MaskL), Va, Results);
1526  OpRef R = shuffp1(ShuffleMask(MaskR), Vb, Results);
1527  if (!L.isValid() || !R.isValid())
1528  return OpRef::fail();
1529 
1530  // Mux the results.
1531  SmallVector<uint8_t,256> Bytes(VecLen);
1532  for (int I = 0; I != VecLen; ++I) {
1533  if (MaskL[I] != -1)
1534  Bytes[I] = 0xFF;
1535  }
1536  return vmuxp(Bytes, L, R, Results);
1537 }
1538 
1539 namespace {
1540  struct Deleter : public SelectionDAG::DAGNodeDeletedListener {
1541  template <typename T>
1542  Deleter(SelectionDAG &D, T &C)
1543  : SelectionDAG::DAGNodeDeletedListener(D, [&C] (SDNode *N, SDNode *E) {
1544  C.erase(N);
1545  }) {}
1546  };
1547 
1548  template <typename T>
1549  struct NullifyingVector : public T {
1551  NullifyingVector(T &&V) : T(V) {
1552  for (unsigned i = 0, e = T::size(); i != e; ++i) {
1553  SDNode *&N = T::operator[](i);
1554  Refs[N] = &N;
1555  }
1556  }
1557  void erase(SDNode *N) {
1558  auto F = Refs.find(N);
1559  if (F != Refs.end())
1560  *F->second = nullptr;
1561  }
1562  };
1563 }
1564 
1565 void HvxSelector::select(SDNode *ISelN) {
1566  // What's important here is to select the right set of nodes. The main
1567  // selection algorithm loops over nodes in a topological order, i.e. users
1568  // are visited before their operands.
1569  //
1570  // It is an error to have an unselected node with a selected operand, and
1571  // there is an assertion in the main selector code to enforce that.
1572  //
1573  // Such a situation could occur if we selected a node, which is both a
1574  // subnode of ISelN, and a subnode of an unrelated (and yet unselected)
1575  // node in the DAG.
1576  assert(ISelN->getOpcode() == HexagonISD::ISEL);
1577  SDNode *N0 = ISelN->getOperand(0).getNode();
1578  if (N0->isMachineOpcode()) {
1579  ISel.ReplaceNode(ISelN, N0);
1580  return;
1581  }
1582 
1583  // There could have been nodes created (i.e. inserted into the DAG)
1584  // that are now dead. Remove them, in case they use any of the nodes
1585  // to select (and make them look shared).
1586  DAG.RemoveDeadNodes();
1587 
1588  SetVector<SDNode*> SubNodes, TmpQ;
1589  std::map<SDNode*,unsigned> NumOps;
1590 
1591  // Don't want to select N0 if it's shared with another node, except if
1592  // it's shared with other ISELs.
1593  auto IsISelN = [](SDNode *T) { return T->getOpcode() == HexagonISD::ISEL; };
1594  if (llvm::all_of(N0->uses(), IsISelN))
1595  SubNodes.insert(N0);
1596 
1597  auto InSubNodes = [&SubNodes](SDNode *T) { return SubNodes.count(T); };
1598  for (unsigned I = 0; I != SubNodes.size(); ++I) {
1599  SDNode *S = SubNodes[I];
1600  unsigned OpN = 0;
1601  // Only add subnodes that are only reachable from N0.
1602  for (SDValue Op : S->ops()) {
1603  SDNode *O = Op.getNode();
1604  if (llvm::all_of(O->uses(), InSubNodes)) {
1605  SubNodes.insert(O);
1606  ++OpN;
1607  }
1608  }
1609  NumOps.insert({S, OpN});
1610  if (OpN == 0)
1611  TmpQ.insert(S);
1612  }
1613 
1614  for (unsigned I = 0; I != TmpQ.size(); ++I) {
1615  SDNode *S = TmpQ[I];
1616  for (SDNode *U : S->uses()) {
1617  if (U == ISelN)
1618  continue;
1619  auto F = NumOps.find(U);
1620  assert(F != NumOps.end());
1621  if (F->second > 0 && !--F->second)
1622  TmpQ.insert(F->first);
1623  }
1624  }
1625 
1626  // Remove the marker.
1627  ISel.ReplaceNode(ISelN, N0);
1628 
1629  assert(SubNodes.size() == TmpQ.size());
1630  NullifyingVector<decltype(TmpQ)::vector_type> Queue(TmpQ.takeVector());
1631 
1632  Deleter DUQ(DAG, Queue);
1633  for (SDNode *S : reverse(Queue)) {
1634  if (S == nullptr)
1635  continue;
1636  DEBUG_WITH_TYPE("isel", {dbgs() << "HVX selecting: "; S->dump(&DAG);});
1637  ISel.Select(S);
1638  }
1639 }
1640 
1641 bool HvxSelector::scalarizeShuffle(ArrayRef<int> Mask, const SDLoc &dl,
1642  MVT ResTy, SDValue Va, SDValue Vb,
1643  SDNode *N) {
1644  DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1645  MVT ElemTy = ResTy.getVectorElementType();
1646  assert(ElemTy == MVT::i8);
1647  unsigned VecLen = Mask.size();
1648  bool HavePairs = (2*HwLen == VecLen);
1649  MVT SingleTy = getSingleVT(MVT::i8);
1650 
1651  // The prior attempts to handle this shuffle may have left a bunch of
1652  // dead nodes in the DAG (such as constants). These nodes will be added
1653  // at the end of DAG's node list, which at that point had already been
1654  // sorted topologically. In the main selection loop, the node list is
1655  // traversed backwards from the root node, which means that any new
1656  // nodes (from the end of the list) will not be visited.
1657  // Scalarization will replace the shuffle node with the scalarized
1658  // expression, and if that expression reused any if the leftoever (dead)
1659  // nodes, these nodes would not be selected (since the "local" selection
1660  // only visits nodes that are not in AllNodes).
1661  // To avoid this issue, remove all dead nodes from the DAG now.
1662 // DAG.RemoveDeadNodes();
1663 
1665  LLVMContext &Ctx = *DAG.getContext();
1666  MVT LegalTy = Lower.getTypeToTransformTo(Ctx, ElemTy).getSimpleVT();
1667  for (int I : Mask) {
1668  if (I < 0) {
1669  Ops.push_back(ISel.selectUndef(dl, LegalTy));
1670  continue;
1671  }
1672  SDValue Vec;
1673  unsigned M = I;
1674  if (M < VecLen) {
1675  Vec = Va;
1676  } else {
1677  Vec = Vb;
1678  M -= VecLen;
1679  }
1680  if (HavePairs) {
1681  if (M < HwLen) {
1682  Vec = DAG.getTargetExtractSubreg(Hexagon::vsub_lo, dl, SingleTy, Vec);
1683  } else {
1684  Vec = DAG.getTargetExtractSubreg(Hexagon::vsub_hi, dl, SingleTy, Vec);
1685  M -= HwLen;
1686  }
1687  }
1688  SDValue Idx = DAG.getConstant(M, dl, MVT::i32);
1689  SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, LegalTy, {Vec, Idx});
1690  SDValue L = Lower.LowerOperation(Ex, DAG);
1691  assert(L.getNode());
1692  Ops.push_back(L);
1693  }
1694 
1695  SDValue LV;
1696  if (2*HwLen == VecLen) {
1697  SDValue B0 = DAG.getBuildVector(SingleTy, dl, {Ops.data(), HwLen});
1698  SDValue L0 = Lower.LowerOperation(B0, DAG);
1699  SDValue B1 = DAG.getBuildVector(SingleTy, dl, {Ops.data()+HwLen, HwLen});
1700  SDValue L1 = Lower.LowerOperation(B1, DAG);
1701  // XXX CONCAT_VECTORS is legal for HVX vectors. Legalizing (lowering)
1702  // functions may expect to be called only for illegal operations, so
1703  // make sure that they are not called for legal ones. Develop a better
1704  // mechanism for dealing with this.
1705  LV = DAG.getNode(ISD::CONCAT_VECTORS, dl, ResTy, {L0, L1});
1706  } else {
1707  SDValue BV = DAG.getBuildVector(ResTy, dl, Ops);
1708  LV = Lower.LowerOperation(BV, DAG);
1709  }
1710 
1711  assert(!N->use_empty());
1712  SDValue IS = DAG.getNode(HexagonISD::ISEL, dl, ResTy, LV);
1713  ISel.ReplaceNode(N, IS.getNode());
1714  select(IS.getNode());
1715  DAG.RemoveDeadNodes();
1716  return true;
1717 }
1718 
1719 OpRef HvxSelector::contracting(ShuffleMask SM, OpRef Va, OpRef Vb,
1720  ResultStack &Results) {
1721  DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1722  if (!Va.isValid() || !Vb.isValid())
1723  return OpRef::fail();
1724 
1725  // Contracting shuffles, i.e. instructions that always discard some bytes
1726  // from the operand vectors.
1727  //
1728  // V6_vshuff{e,o}b
1729  // V6_vdealb4w
1730  // V6_vpack{e,o}{b,h}
1731 
1732  int VecLen = SM.Mask.size();
1733  std::pair<int,unsigned> Strip = findStrip(SM.Mask, 1, VecLen);
1734  MVT ResTy = getSingleVT(MVT::i8);
1735 
1736  // The following shuffles only work for bytes and halfwords. This requires
1737  // the strip length to be 1 or 2.
1738  // FIXME: Collecting even/odd elements of any power-of-2 length could be
1739  // done by taking half of a deal operation. This should be handled in
1740  // perfect shuffle generation, but currently that code requires an exact
1741  // mask to work. To work with contracting perfect shuffles, it would need
1742  // to be able to complete an incomplete mask.
1743  // Once that's done, remove the handling of L=4.
1744  if (Strip.second != 1 && Strip.second != 2 && /*FIXME*/Strip.second != 4)
1745  return OpRef::fail();
1746 
1747  // The patterns for the shuffles, in terms of the starting offsets of the
1748  // consecutive strips (L = length of the strip, N = VecLen):
1749  //
1750  // vpacke: 0, 2L, 4L ... N+0, N+2L, N+4L ... L = 1 or 2
1751  // vpacko: L, 3L, 5L ... N+L, N+3L, N+5L ... L = 1 or 2
1752  //
1753  // vshuffe: 0, N+0, 2L, N+2L, 4L ... L = 1 or 2
1754  // vshuffo: L, N+L, 3L, N+3L, 5L ... L = 1 or 2
1755  //
1756  // vdealb4w: 0, 4, 8 ... 2, 6, 10 ... N+0, N+4, N+8 ... N+2, N+6, N+10 ...
1757 
1758  // The value of the element in the mask following the strip will decide
1759  // what kind of a shuffle this can be.
1760  int NextInMask = SM.Mask[Strip.second];
1761 
1762  // Check if NextInMask could be 2L, 3L or 4, i.e. if it could be a mask
1763  // for vpack or vdealb4w. VecLen > 4, so NextInMask for vdealb4w would
1764  // satisfy this.
1765  if (NextInMask < VecLen) {
1766  // vpack{e,o} or vdealb4w
1767  if (Strip.first == 0 && Strip.second == 1 && NextInMask == 4) {
1768  int N = VecLen;
1769  // Check if this is vdealb4w (L=1).
1770  for (int I = 0; I != N/4; ++I)
1771  if (SM.Mask[I] != 4*I)
1772  return OpRef::fail();
1773  for (int I = 0; I != N/4; ++I)
1774  if (SM.Mask[I+N/4] != 2 + 4*I)
1775  return OpRef::fail();
1776  for (int I = 0; I != N/4; ++I)
1777  if (SM.Mask[I+N/2] != N + 4*I)
1778  return OpRef::fail();
1779  for (int I = 0; I != N/4; ++I)
1780  if (SM.Mask[I+3*N/4] != N+2 + 4*I)
1781  return OpRef::fail();
1782  // Matched mask for vdealb4w.
1783  Results.push(Hexagon::V6_vdealb4w, ResTy, {Vb, Va});
1784  return OpRef::res(Results.top());
1785  }
1786 
1787  // Check if this is vpack{e,o}.
1788  int N = VecLen;
1789  int L = Strip.second;
1790  // Check if the first strip starts at 0 or at L.
1791  if (Strip.first != 0 && Strip.first != L)
1792  return OpRef::fail();
1793  // Examine the rest of the mask.
1794  for (int I = L; I < N; I += L) {
1795  auto S = findStrip(SM.Mask.drop_front(I), 1, N-I);
1796  // Check whether the mask element at the beginning of each strip
1797  // increases by 2L each time.
1798  if (S.first - Strip.first != 2*I)
1799  return OpRef::fail();
1800  // Check whether each strip is of the same length.
1801  if (S.second != unsigned(L))
1802  return OpRef::fail();
1803  }
1804 
1805  // Strip.first == 0 => vpacke
1806  // Strip.first == L => vpacko
1807  assert(Strip.first == 0 || Strip.first == L);
1808  using namespace Hexagon;
1809  NodeTemplate Res;
1810  // FIXME: remove L=4 case after adding perfect mask completion.
1811  if (L == 4) {
1812  const SDLoc &dl(Results.InpNode);
1813  Results.push(Hexagon::A2_tfrsi, MVT::i32, {getConst32(-L, dl)});
1814  OpRef C = OpRef::res(Results.top());
1815  MVT JoinTy = MVT::getVectorVT(ResTy.getVectorElementType(),
1816  2 * ResTy.getVectorNumElements());
1817  Results.push(Hexagon::V6_vdealvdd, JoinTy, {Vb, Va, C});
1818  return Strip.first == 0 ? OpRef::lo(OpRef::res(Results.top()))
1819  : OpRef::hi(OpRef::res(Results.top()));
1820  }
1821  Res.Opc = Strip.second == 1 // Number of bytes.
1822  ? (Strip.first == 0 ? V6_vpackeb : V6_vpackob)
1823  : (Strip.first == 0 ? V6_vpackeh : V6_vpackoh);
1824  Res.Ty = ResTy;
1825  Res.Ops = { Vb, Va };
1826  Results.push(Res);
1827  return OpRef::res(Results.top());
1828  }
1829 
1830  // Check if this is vshuff{e,o}.
1831  int N = VecLen;
1832  int L = Strip.second;
1833  std::pair<int,unsigned> PrevS = Strip;
1834  bool Flip = false;
1835  for (int I = L; I < N; I += L) {
1836  auto S = findStrip(SM.Mask.drop_front(I), 1, N-I);
1837  if (S.second != PrevS.second)
1838  return OpRef::fail();
1839  int Diff = Flip ? PrevS.first - S.first + 2*L
1840  : S.first - PrevS.first;
1841  if (Diff != N)
1842  return OpRef::fail();
1843  Flip ^= true;
1844  PrevS = S;
1845  }
1846  // Strip.first == 0 => vshuffe
1847  // Strip.first == L => vshuffo
1848  assert(Strip.first == 0 || Strip.first == L);
1849  using namespace Hexagon;
1850  NodeTemplate Res;
1851  Res.Opc = Strip.second == 1 // Number of bytes.
1852  ? (Strip.first == 0 ? V6_vshuffeb : V6_vshuffob)
1853  : (Strip.first == 0 ? V6_vshufeh : V6_vshufoh);
1854  Res.Ty = ResTy;
1855  Res.Ops = { Vb, Va };
1856  Results.push(Res);
1857  return OpRef::res(Results.top());
1858 }
1859 
1860 OpRef HvxSelector::expanding(ShuffleMask SM, OpRef Va, ResultStack &Results) {
1861  DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1862  // Expanding shuffles (using all elements and inserting into larger vector):
1863  //
1864  // V6_vunpacku{b,h} [*]
1865  //
1866  // [*] Only if the upper elements (filled with 0s) are "don't care" in Mask.
1867  //
1868  // Note: V6_vunpacko{b,h} are or-ing the high byte/half in the result, so
1869  // they are not shuffles.
1870  //
1871  // The argument is a single vector.
1872 
1873  int VecLen = SM.Mask.size();
1874  assert(2*HwLen == unsigned(VecLen) && "Expecting vector-pair type");
1875 
1876  std::pair<int,unsigned> Strip = findStrip(SM.Mask, 1, VecLen);
1877 
1878  // The patterns for the unpacks, in terms of the starting offsets of the
1879  // consecutive strips (L = length of the strip, N = VecLen):
1880  //
1881  // vunpacku: 0, -1, L, -1, 2L, -1 ...
1882 
1883  if (Strip.first != 0)
1884  return OpRef::fail();
1885 
1886  // The vunpackus only handle byte and half-word.
1887  if (Strip.second != 1 && Strip.second != 2)
1888  return OpRef::fail();
1889 
1890  int N = VecLen;
1891  int L = Strip.second;
1892 
1893  // First, check the non-ignored strips.
1894  for (int I = 2*L; I < N; I += 2*L) {
1895  auto S = findStrip(SM.Mask.drop_front(I), 1, N-I);
1896  if (S.second != unsigned(L))
1897  return OpRef::fail();
1898  if (2*S.first != I)
1899  return OpRef::fail();
1900  }
1901  // Check the -1s.
1902  for (int I = L; I < N; I += 2*L) {
1903  auto S = findStrip(SM.Mask.drop_front(I), 0, N-I);
1904  if (S.first != -1 || S.second != unsigned(L))
1905  return OpRef::fail();
1906  }
1907 
1908  unsigned Opc = Strip.second == 1 ? Hexagon::V6_vunpackub
1909  : Hexagon::V6_vunpackuh;
1910  Results.push(Opc, getPairVT(MVT::i8), {Va});
1911  return OpRef::res(Results.top());
1912 }
1913 
1914 OpRef HvxSelector::perfect(ShuffleMask SM, OpRef Va, ResultStack &Results) {
1915  DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1916  // V6_vdeal{b,h}
1917  // V6_vshuff{b,h}
1918 
1919  // V6_vshufoe{b,h} those are quivalent to vshuffvdd(..,{1,2})
1920  // V6_vshuffvdd (V6_vshuff)
1921  // V6_dealvdd (V6_vdeal)
1922 
1923  int VecLen = SM.Mask.size();
1924  assert(isPowerOf2_32(VecLen) && Log2_32(VecLen) <= 8);
1925  unsigned LogLen = Log2_32(VecLen);
1926  unsigned HwLog = Log2_32(HwLen);
1927  // The result length must be the same as the length of a single vector,
1928  // or a vector pair.
1929  assert(LogLen == HwLog || LogLen == HwLog+1);
1930  bool HavePairs = LogLen == HwLog+1;
1931 
1932  if (!isPermutation(SM.Mask))
1933  return OpRef::fail();
1934 
1935  SmallVector<unsigned,8> Perm(LogLen);
1936 
1937  // Check if this could be a perfect shuffle, or a combination of perfect
1938  // shuffles.
1939  //
1940  // Consider this permutation (using hex digits to make the ASCII diagrams
1941  // easier to read):
1942  // { 0, 8, 1, 9, 2, A, 3, B, 4, C, 5, D, 6, E, 7, F }.
1943  // This is a "deal" operation: divide the input into two halves, and
1944  // create the output by picking elements by alternating between these two
1945  // halves:
1946  // 0 1 2 3 4 5 6 7 --> 0 8 1 9 2 A 3 B 4 C 5 D 6 E 7 F [*]
1947  // 8 9 A B C D E F
1948  //
1949  // Aside from a few special explicit cases (V6_vdealb, etc.), HVX provides
1950  // a somwehat different mechanism that could be used to perform shuffle/
1951  // deal operations: a 2x2 transpose.
1952  // Consider the halves of inputs again, they can be interpreted as a 2x8
1953  // matrix. A 2x8 matrix can be looked at four 2x2 matrices concatenated
1954  // together. Now, when considering 2 elements at a time, it will be a 2x4
1955  // matrix (with elements 01, 23, 45, etc.), or two 2x2 matrices:
1956  // 01 23 45 67
1957  // 89 AB CD EF
1958  // With groups of 4, this will become a single 2x2 matrix, and so on.
1959  //
1960  // The 2x2 transpose instruction works by transposing each of the 2x2
1961  // matrices (or "sub-matrices"), given a specific group size. For example,
1962  // if the group size is 1 (i.e. each element is its own group), there
1963  // will be four transposes of the four 2x2 matrices that form the 2x8.
1964  // For example, with the inputs as above, the result will be:
1965  // 0 8 2 A 4 C 6 E
1966  // 1 9 3 B 5 D 7 F
1967  // Now, this result can be tranposed again, but with the group size of 2:
1968  // 08 19 4C 5D
1969  // 2A 3B 6E 7F
1970  // If we then transpose that result, but with the group size of 4, we get:
1971  // 0819 2A3B
1972  // 4C5D 6E7F
1973  // If we concatenate these two rows, it will be
1974  // 0 8 1 9 2 A 3 B 4 C 5 D 6 E 7 F
1975  // which is the same as the "deal" [*] above.
1976  //
1977  // In general, a "deal" of individual elements is a series of 2x2 transposes,
1978  // with changing group size. HVX has two instructions:
1979  // Vdd = V6_vdealvdd Vu, Vv, Rt
1980  // Vdd = V6_shufvdd Vu, Vv, Rt
1981  // that perform exactly that. The register Rt controls which transposes are
1982  // going to happen: a bit at position n (counting from 0) indicates that a
1983  // transpose with a group size of 2^n will take place. If multiple bits are
1984  // set, multiple transposes will happen: vdealvdd will perform them starting
1985  // with the largest group size, vshuffvdd will do them in the reverse order.
1986  //
1987  // The main observation is that each 2x2 transpose corresponds to swapping
1988  // columns of bits in the binary representation of the values.
1989  //
1990  // The numbers {3,2,1,0} and the log2 of the number of contiguous 1 bits
1991  // in a given column. The * denote the columns that will be swapped.
1992  // The transpose with the group size 2^n corresponds to swapping columns
1993  // 3 (the highest log) and log2(n):
1994  //
1995  // 3 2 1 0 0 2 1 3 0 2 3 1
1996  // * * * * * *
1997  // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
1998  // 1 0 0 0 1 8 1 0 0 0 8 1 0 0 0 8 1 0 0 0
1999  // 2 0 0 1 0 2 0 0 1 0 1 0 0 0 1 1 0 0 0 1
2000  // 3 0 0 1 1 A 1 0 1 0 9 1 0 0 1 9 1 0 0 1
2001  // 4 0 1 0 0 4 0 1 0 0 4 0 1 0 0 2 0 0 1 0
2002  // 5 0 1 0 1 C 1 1 0 0 C 1 1 0 0 A 1 0 1 0
2003  // 6 0 1 1 0 6 0 1 1 0 5 0 1 0 1 3 0 0 1 1
2004  // 7 0 1 1 1 E 1 1 1 0 D 1 1 0 1 B 1 0 1 1
2005  // 8 1 0 0 0 1 0 0 0 1 2 0 0 1 0 4 0 1 0 0
2006  // 9 1 0 0 1 9 1 0 0 1 A 1 0 1 0 C 1 1 0 0
2007  // A 1 0 1 0 3 0 0 1 1 3 0 0 1 1 5 0 1 0 1
2008  // B 1 0 1 1 B 1 0 1 1 B 1 0 1 1 D 1 1 0 1
2009  // C 1 1 0 0 5 0 1 0 1 6 0 1 1 0 6 0 1 1 0
2010  // D 1 1 0 1 D 1 1 0 1 E 1 1 1 0 E 1 1 1 0
2011  // E 1 1 1 0 7 0 1 1 1 7 0 1 1 1 7 0 1 1 1
2012  // F 1 1 1 1 F 1 1 1 1 F 1 1 1 1 F 1 1 1 1
2013 
2014  // There is one special case that is not a perfect shuffle, but
2015  // can be turned into one easily: when the shuffle operates on
2016  // a vector pair, but the two vectors in the pair are swapped.
2017  // The code below that identifies perfect shuffles will reject
2018  // it, unless the order is reversed.
2019  SmallVector<int, 128> MaskStorage(SM.Mask);
2020  bool InvertedPair = false;
2021  if (HavePairs && SM.Mask[0] >= int(HwLen)) {
2022  for (int i = 0, e = SM.Mask.size(); i != e; ++i) {
2023  int M = SM.Mask[i];
2024  MaskStorage[i] = M >= int(HwLen) ? M-HwLen : M+HwLen;
2025  }
2026  InvertedPair = true;
2027  }
2028  ArrayRef<int> LocalMask(MaskStorage);
2029 
2030  auto XorPow2 = [] (ArrayRef<int> Mask, unsigned Num) {
2031  unsigned X = Mask[0] ^ Mask[Num/2];
2032  // Check that the first half has the X's bits clear.
2033  if ((Mask[0] & X) != 0)
2034  return 0u;
2035  for (unsigned I = 1; I != Num/2; ++I) {
2036  if (unsigned(Mask[I] ^ Mask[I+Num/2]) != X)
2037  return 0u;
2038  if ((Mask[I] & X) != 0)
2039  return 0u;
2040  }
2041  return X;
2042  };
2043 
2044  // Create a vector of log2's for each column: Perm[i] corresponds to
2045  // the i-th bit (lsb is 0).
2046  assert(VecLen > 2);
2047  for (unsigned I = VecLen; I >= 2; I >>= 1) {
2048  // Examine the initial segment of Mask of size I.
2049  unsigned X = XorPow2(LocalMask, I);
2050  if (!isPowerOf2_32(X))
2051  return OpRef::fail();
2052  // Check the other segments of Mask.
2053  for (int J = I; J < VecLen; J += I) {
2054  if (XorPow2(LocalMask.slice(J, I), I) != X)
2055  return OpRef::fail();
2056  }
2057  Perm[Log2_32(X)] = Log2_32(I)-1;
2058  }
2059 
2060  // Once we have Perm, represent it as cycles. Denote the maximum log2
2061  // (equal to log2(VecLen)-1) as M. The cycle containing M can then be
2062  // written as (M a1 a2 a3 ... an). That cycle can be broken up into
2063  // simple swaps as (M a1)(M a2)(M a3)...(M an), with the composition
2064  // order being from left to right. Any (contiguous) segment where the
2065  // values ai, ai+1...aj are either all increasing or all decreasing,
2066  // can be implemented via a single vshuffvdd/vdealvdd respectively.
2067  //
2068  // If there is a cycle (a1 a2 ... an) that does not involve M, it can
2069  // be written as (M an)(a1 a2 ... an)(M a1). The first two cycles can
2070  // then be folded to get (M a1 a2 ... an)(M a1), and the above procedure
2071  // can be used to generate a sequence of vshuffvdd/vdealvdd.
2072  //
2073  // Example:
2074  // Assume M = 4 and consider a permutation (0 1)(2 3). It can be written
2075  // as (4 0 1)(4 0) composed with (4 2 3)(4 2), or simply
2076  // (4 0 1)(4 0)(4 2 3)(4 2).
2077  // It can then be expanded into swaps as
2078  // (4 0)(4 1)(4 0)(4 2)(4 3)(4 2),
2079  // and broken up into "increasing" segments as
2080  // [(4 0)(4 1)] [(4 0)(4 2)(4 3)] [(4 2)].
2081  // This is equivalent to
2082  // (4 0 1)(4 0 2 3)(4 2),
2083  // which can be implemented as 3 vshufvdd instructions.
2084 
2085  using CycleType = SmallVector<unsigned,8>;
2086  std::set<CycleType> Cycles;
2087  std::set<unsigned> All;
2088 
2089  for (unsigned I : Perm)
2090  All.insert(I);
2091 
2092  // If the cycle contains LogLen-1, move it to the front of the cycle.
2093  // Otherwise, return the cycle unchanged.
2094  auto canonicalize = [LogLen](const CycleType &C) -> CycleType {
2095  unsigned LogPos, N = C.size();
2096  for (LogPos = 0; LogPos != N; ++LogPos)
2097  if (C[LogPos] == LogLen-1)
2098  break;
2099  if (LogPos == N)
2100  return C;
2101 
2102  CycleType NewC(C.begin()+LogPos, C.end());
2103  NewC.append(C.begin(), C.begin()+LogPos);
2104  return NewC;
2105  };
2106 
2107  auto pfs = [](const std::set<CycleType> &Cs, unsigned Len) {
2108  // Ordering: shuff: 5 0 1 2 3 4, deal: 5 4 3 2 1 0 (for Log=6),
2109  // for bytes zero is included, for halfwords is not.
2110  if (Cs.size() != 1)
2111  return 0u;
2112  const CycleType &C = *Cs.begin();
2113  if (C[0] != Len-1)
2114  return 0u;
2115  int D = Len - C.size();
2116  if (D != 0 && D != 1)
2117  return 0u;
2118 
2119  bool IsDeal = true, IsShuff = true;
2120  for (unsigned I = 1; I != Len-D; ++I) {
2121  if (C[I] != Len-1-I)
2122  IsDeal = false;
2123  if (C[I] != I-(1-D)) // I-1, I
2124  IsShuff = false;
2125  }
2126  // At most one, IsDeal or IsShuff, can be non-zero.
2127  assert(!(IsDeal || IsShuff) || IsDeal != IsShuff);
2128  static unsigned Deals[] = { Hexagon::V6_vdealb, Hexagon::V6_vdealh };
2129  static unsigned Shufs[] = { Hexagon::V6_vshuffb, Hexagon::V6_vshuffh };
2130  return IsDeal ? Deals[D] : (IsShuff ? Shufs[D] : 0);
2131  };
2132 
2133  while (!All.empty()) {
2134  unsigned A = *All.begin();
2135  All.erase(A);
2136  CycleType C;
2137  C.push_back(A);
2138  for (unsigned B = Perm[A]; B != A; B = Perm[B]) {
2139  C.push_back(B);
2140  All.erase(B);
2141  }
2142  if (C.size() <= 1)
2143  continue;
2144  Cycles.insert(canonicalize(C));
2145  }
2146 
2147  MVT SingleTy = getSingleVT(MVT::i8);
2149 
2150  // Recognize patterns for V6_vdeal{b,h} and V6_vshuff{b,h}.
2151  if (unsigned(VecLen) == HwLen) {
2152  if (unsigned SingleOpc = pfs(Cycles, LogLen)) {
2153  Results.push(SingleOpc, SingleTy, {Va});
2154  return OpRef::res(Results.top());
2155  }
2156  }
2157 
2158  // From the cycles, construct the sequence of values that will
2159  // then form the control values for vdealvdd/vshuffvdd, i.e.
2160  // (M a1 a2)(M a3 a4 a5)... -> a1 a2 a3 a4 a5
2161  // This essentially strips the M value from the cycles where
2162  // it's present, and performs the insertion of M (then stripping)
2163  // for cycles without M (as described in an earlier comment).
2164  SmallVector<unsigned,8> SwapElems;
2165  // When the input is extended (i.e. single vector becomes a pair),
2166  // this is done by using an "undef" vector as the second input.
2167  // However, then we get
2168  // input 1: GOODBITS
2169  // input 2: ........
2170  // but we need
2171  // input 1: ....BITS
2172  // input 2: ....GOOD
2173  // Then at the end, this needs to be undone. To accomplish this,
2174  // artificially add "LogLen-1" at both ends of the sequence.
2175  if (!HavePairs)
2176  SwapElems.push_back(LogLen-1);
2177  for (const CycleType &C : Cycles) {
2178  // Do the transformation: (a1..an) -> (M a1..an)(M a1).
2179  unsigned First = (C[0] == LogLen-1) ? 1 : 0;
2180  SwapElems.append(C.begin()+First, C.end());
2181  if (First == 0)
2182  SwapElems.push_back(C[0]);
2183  }
2184  if (!HavePairs)
2185  SwapElems.push_back(LogLen-1);
2186 
2187  const SDLoc &dl(Results.InpNode);
2188  OpRef Arg = HavePairs ? Va
2189  : concats(Va, OpRef::undef(SingleTy), Results);
2190  if (InvertedPair)
2191  Arg = concats(OpRef::hi(Arg), OpRef::lo(Arg), Results);
2192 
2193  for (unsigned I = 0, E = SwapElems.size(); I != E; ) {
2194  bool IsInc = I == E-1 || SwapElems[I] < SwapElems[I+1];
2195  unsigned S = (1u << SwapElems[I]);
2196  if (I < E-1) {
2197  while (++I < E-1 && IsInc == (SwapElems[I] < SwapElems[I+1]))
2198  S |= 1u << SwapElems[I];
2199  // The above loop will not add a bit for the final SwapElems[I+1],
2200  // so add it here.
2201  S |= 1u << SwapElems[I];
2202  }
2203  ++I;
2204 
2205  NodeTemplate Res;
2206  Results.push(Hexagon::A2_tfrsi, MVT::i32, {getConst32(S, dl)});
2207  Res.Opc = IsInc ? Hexagon::V6_vshuffvdd : Hexagon::V6_vdealvdd;
2208  Res.Ty = PairTy;
2209  Res.Ops = { OpRef::hi(Arg), OpRef::lo(Arg), OpRef::res(-1) };
2210  Results.push(Res);
2211  Arg = OpRef::res(Results.top());
2212  }
2213 
2214  return HavePairs ? Arg : OpRef::lo(Arg);
2215 }
2216 
2217 OpRef HvxSelector::butterfly(ShuffleMask SM, OpRef Va, ResultStack &Results) {
2218  DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
2219  // Butterfly shuffles.
2220  //
2221  // V6_vdelta
2222  // V6_vrdelta
2223  // V6_vror
2224 
2225  // The assumption here is that all elements picked by Mask are in the
2226  // first operand to the vector_shuffle. This assumption is enforced
2227  // by the caller.
2228 
2229  MVT ResTy = getSingleVT(MVT::i8);
2230  PermNetwork::Controls FC, RC;
2231  const SDLoc &dl(Results.InpNode);
2232  int VecLen = SM.Mask.size();
2233 
2234  for (int M : SM.Mask) {
2235  if (M != -1 && M >= VecLen)
2236  return OpRef::fail();
2237  }
2238 
2239  // Try the deltas/benes for both single vectors and vector pairs.
2240  ForwardDeltaNetwork FN(SM.Mask);
2241  if (FN.run(FC)) {
2242  SDValue Ctl = getVectorConstant(FC, dl);
2243  Results.push(Hexagon::V6_vdelta, ResTy, {Va, OpRef(Ctl)});
2244  return OpRef::res(Results.top());
2245  }
2246 
2247  // Try reverse delta.
2248  ReverseDeltaNetwork RN(SM.Mask);
2249  if (RN.run(RC)) {
2250  SDValue Ctl = getVectorConstant(RC, dl);
2251  Results.push(Hexagon::V6_vrdelta, ResTy, {Va, OpRef(Ctl)});
2252  return OpRef::res(Results.top());
2253  }
2254 
2255  // Do Benes.
2256  BenesNetwork BN(SM.Mask);
2257  if (BN.run(FC, RC)) {
2258  SDValue CtlF = getVectorConstant(FC, dl);
2259  SDValue CtlR = getVectorConstant(RC, dl);
2260  Results.push(Hexagon::V6_vdelta, ResTy, {Va, OpRef(CtlF)});
2261  Results.push(Hexagon::V6_vrdelta, ResTy,
2262  {OpRef::res(-1), OpRef(CtlR)});
2263  return OpRef::res(Results.top());
2264  }
2265 
2266  return OpRef::fail();
2267 }
2268 
2269 SDValue HvxSelector::getConst32(int Val, const SDLoc &dl) {
2270  return DAG.getTargetConstant(Val, dl, MVT::i32);
2271 }
2272 
2273 SDValue HvxSelector::getVectorConstant(ArrayRef<uint8_t> Data,
2274  const SDLoc &dl) {
2276  for (uint8_t C : Data)
2277  Elems.push_back(DAG.getConstant(C, dl, MVT::i8));
2278  MVT VecTy = MVT::getVectorVT(MVT::i8, Data.size());
2279  SDValue BV = DAG.getBuildVector(VecTy, dl, Elems);
2280  SDValue LV = Lower.LowerOperation(BV, DAG);
2281  DAG.RemoveDeadNode(BV.getNode());
2282  return DAG.getNode(HexagonISD::ISEL, dl, VecTy, LV);
2283 }
2284 
2286  SDValue Inp = N->getOperand(0);
2287  MVT ResTy = N->getValueType(0).getSimpleVT();
2288  auto IdxN = cast<ConstantSDNode>(N->getOperand(1));
2289  unsigned Idx = IdxN->getZExtValue();
2290 #ifndef NDEBUG
2291  MVT InpTy = Inp.getValueType().getSimpleVT();
2292  assert(InpTy.getVectorElementType() == ResTy.getVectorElementType());
2293  unsigned ResLen = ResTy.getVectorNumElements();
2294  assert(2 * ResLen == InpTy.getVectorNumElements());
2295  assert(Idx == 0 || Idx == ResLen);
2296 #endif
2297  unsigned SubReg = Idx == 0 ? Hexagon::vsub_lo : Hexagon::vsub_hi;
2298  SDValue Ext = DAG.getTargetExtractSubreg(SubReg, SDLoc(N), ResTy, Inp);
2299 
2300  ISel.ReplaceNode(N, Ext.getNode());
2301 }
2302 
2304  DEBUG_WITH_TYPE("isel", {
2305  dbgs() << "Starting " << __func__ << " on node:\n";
2306  N->dump(&DAG);
2307  });
2308  MVT ResTy = N->getValueType(0).getSimpleVT();
2309  // Assume that vector shuffles operate on vectors of bytes.
2310  assert(ResTy.isVector() && ResTy.getVectorElementType() == MVT::i8);
2311 
2312  auto *SN = cast<ShuffleVectorSDNode>(N);
2313  std::vector<int> Mask(SN->getMask().begin(), SN->getMask().end());
2314  // This shouldn't really be necessary. Is it?
2315  for (int &Idx : Mask)
2316  if (Idx != -1 && Idx < 0)
2317  Idx = -1;
2318 
2319  unsigned VecLen = Mask.size();
2320  bool HavePairs = (2*HwLen == VecLen);
2321  assert(ResTy.getSizeInBits() / 8 == VecLen);
2322 
2323  // Vd = vector_shuffle Va, Vb, Mask
2324  //
2325 
2326  bool UseLeft = false, UseRight = false;
2327  for (unsigned I = 0; I != VecLen; ++I) {
2328  if (Mask[I] == -1)
2329  continue;
2330  unsigned Idx = Mask[I];
2331  assert(Idx < 2*VecLen);
2332  if (Idx < VecLen)
2333  UseLeft = true;
2334  else
2335  UseRight = true;
2336  }
2337 
2338  DEBUG_WITH_TYPE("isel", {
2339  dbgs() << "VecLen=" << VecLen << " HwLen=" << HwLen << " UseLeft="
2340  << UseLeft << " UseRight=" << UseRight << " HavePairs="
2341  << HavePairs << '\n';
2342  });
2343  // If the mask is all -1's, generate "undef".
2344  if (!UseLeft && !UseRight) {
2345  ISel.ReplaceNode(N, ISel.selectUndef(SDLoc(SN), ResTy).getNode());
2346  return;
2347  }
2348 
2349  SDValue Vec0 = N->getOperand(0);
2350  SDValue Vec1 = N->getOperand(1);
2351  ResultStack Results(SN);
2352  Results.push(TargetOpcode::COPY, ResTy, {Vec0});
2353  Results.push(TargetOpcode::COPY, ResTy, {Vec1});
2354  OpRef Va = OpRef::res(Results.top()-1);
2355  OpRef Vb = OpRef::res(Results.top());
2356 
2357  OpRef Res = !HavePairs ? shuffs2(ShuffleMask(Mask), Va, Vb, Results)
2358  : shuffp2(ShuffleMask(Mask), Va, Vb, Results);
2359 
2360  bool Done = Res.isValid();
2361  if (Done) {
2362  // Make sure that Res is on the stack before materializing.
2363  Results.push(TargetOpcode::COPY, ResTy, {Res});
2364  materialize(Results);
2365  } else {
2366  Done = scalarizeShuffle(Mask, SDLoc(N), ResTy, Vec0, Vec1, N);
2367  }
2368 
2369  if (!Done) {
2370 #ifndef NDEBUG
2371  dbgs() << "Unhandled shuffle:\n";
2372  SN->dumpr(&DAG);
2373 #endif
2374  llvm_unreachable("Failed to select vector shuffle");
2375  }
2376 }
2377 
2379  // If this is a rotation by less than 8, use V6_valignbi.
2380  MVT Ty = N->getValueType(0).getSimpleVT();
2381  const SDLoc &dl(N);
2382  SDValue VecV = N->getOperand(0);
2383  SDValue RotV = N->getOperand(1);
2384  SDNode *NewN = nullptr;
2385 
2386  if (auto *CN = dyn_cast<ConstantSDNode>(RotV.getNode())) {
2387  unsigned S = CN->getZExtValue() % HST.getVectorLength();
2388  if (S == 0) {
2389  NewN = VecV.getNode();
2390  } else if (isUInt<3>(S)) {
2391  NewN = DAG.getMachineNode(Hexagon::V6_valignbi, dl, Ty,
2392  {VecV, VecV, getConst32(S, dl)});
2393  }
2394  }
2395 
2396  if (!NewN)
2397  NewN = DAG.getMachineNode(Hexagon::V6_vror, dl, Ty, {VecV, RotV});
2398 
2399  ISel.ReplaceNode(N, NewN);
2400 }
2401 
2403  SDValue Vv = N->getOperand(0);
2404  SDValue Vu = N->getOperand(1);
2405  SDValue Rt = N->getOperand(2);
2406  SDNode *NewN = DAG.getMachineNode(Hexagon::V6_valignb, SDLoc(N),
2407  N->getValueType(0), {Vv, Vu, Rt});
2408  ISel.ReplaceNode(N, NewN);
2409  DAG.RemoveDeadNode(N);
2410 }
2411 
2412 void HexagonDAGToDAGISel::SelectHvxExtractSubvector(SDNode *N) {
2413  HvxSelector(*this, *CurDAG).selectExtractSubvector(N);
2414 }
2415 
2416 void HexagonDAGToDAGISel::SelectHvxShuffle(SDNode *N) {
2417  HvxSelector(*this, *CurDAG).selectShuffle(N);
2418 }
2419 
2420 void HexagonDAGToDAGISel::SelectHvxRor(SDNode *N) {
2421  HvxSelector(*this, *CurDAG).selectRor(N);
2422 }
2423 
2424 void HexagonDAGToDAGISel::SelectHvxVAlign(SDNode *N) {
2425  HvxSelector(*this, *CurDAG).selectVAlign(N);
2426 }
2427 
2429  const SDLoc &dl(N);
2430  SDValue Chain = N->getOperand(0);
2431  SDValue Address = N->getOperand(2);
2432  SDValue Predicate = N->getOperand(3);
2433  SDValue Base = N->getOperand(4);
2434  SDValue Modifier = N->getOperand(5);
2435  SDValue Offset = N->getOperand(6);
2436  SDValue ImmOperand = CurDAG->getTargetConstant(0, dl, MVT::i32);
2437 
2438  unsigned Opcode;
2439  unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
2440  switch (IntNo) {
2441  default:
2442  llvm_unreachable("Unexpected HVX gather intrinsic.");
2443  case Intrinsic::hexagon_V6_vgathermhq:
2444  case Intrinsic::hexagon_V6_vgathermhq_128B:
2445  Opcode = Hexagon::V6_vgathermhq_pseudo;
2446  break;
2447  case Intrinsic::hexagon_V6_vgathermwq:
2448  case Intrinsic::hexagon_V6_vgathermwq_128B:
2449  Opcode = Hexagon::V6_vgathermwq_pseudo;
2450  break;
2451  case Intrinsic::hexagon_V6_vgathermhwq:
2452  case Intrinsic::hexagon_V6_vgathermhwq_128B:
2453  Opcode = Hexagon::V6_vgathermhwq_pseudo;
2454  break;
2455  }
2456 
2457  SDVTList VTs = CurDAG->getVTList(MVT::Other);
2458  SDValue Ops[] = { Address, ImmOperand,
2459  Predicate, Base, Modifier, Offset, Chain };
2460  SDNode *Result = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
2461 
2462  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2463  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
2464 
2465  ReplaceNode(N, Result);
2466 }
2467 
2469  const SDLoc &dl(N);
2470  SDValue Chain = N->getOperand(0);
2471  SDValue Address = N->getOperand(2);
2472  SDValue Base = N->getOperand(3);
2473  SDValue Modifier = N->getOperand(4);
2474  SDValue Offset = N->getOperand(5);
2475  SDValue ImmOperand = CurDAG->getTargetConstant(0, dl, MVT::i32);
2476 
2477  unsigned Opcode;
2478  unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
2479  switch (IntNo) {
2480  default:
2481  llvm_unreachable("Unexpected HVX gather intrinsic.");
2482  case Intrinsic::hexagon_V6_vgathermh:
2483  case Intrinsic::hexagon_V6_vgathermh_128B:
2484  Opcode = Hexagon::V6_vgathermh_pseudo;
2485  break;
2486  case Intrinsic::hexagon_V6_vgathermw:
2487  case Intrinsic::hexagon_V6_vgathermw_128B:
2488  Opcode = Hexagon::V6_vgathermw_pseudo;
2489  break;
2490  case Intrinsic::hexagon_V6_vgathermhw:
2491  case Intrinsic::hexagon_V6_vgathermhw_128B:
2492  Opcode = Hexagon::V6_vgathermhw_pseudo;
2493  break;
2494  }
2495 
2496  SDVTList VTs = CurDAG->getVTList(MVT::Other);
2497  SDValue Ops[] = { Address, ImmOperand, Base, Modifier, Offset, Chain };
2498  SDNode *Result = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
2499 
2500  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2501  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
2502 
2503  ReplaceNode(N, Result);
2504 }
2505 
2507  unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
2508  SDNode *Result;
2509  switch (IID) {
2510  case Intrinsic::hexagon_V6_vaddcarry: {
2511  std::array<SDValue, 3> Ops = {
2512  {N->getOperand(1), N->getOperand(2), N->getOperand(3)}};
2513  SDVTList VTs = CurDAG->getVTList(MVT::v16i32, MVT::v64i1);
2514  Result = CurDAG->getMachineNode(Hexagon::V6_vaddcarry, SDLoc(N), VTs, Ops);
2515  break;
2516  }
2517  case Intrinsic::hexagon_V6_vaddcarry_128B: {
2518  std::array<SDValue, 3> Ops = {
2519  {N->getOperand(1), N->getOperand(2), N->getOperand(3)}};
2520  SDVTList VTs = CurDAG->getVTList(MVT::v32i32, MVT::v128i1);
2521  Result = CurDAG->getMachineNode(Hexagon::V6_vaddcarry, SDLoc(N), VTs, Ops);
2522  break;
2523  }
2524  case Intrinsic::hexagon_V6_vsubcarry: {
2525  std::array<SDValue, 3> Ops = {
2526  {N->getOperand(1), N->getOperand(2), N->getOperand(3)}};
2527  SDVTList VTs = CurDAG->getVTList(MVT::v16i32, MVT::v64i1);
2528  Result = CurDAG->getMachineNode(Hexagon::V6_vsubcarry, SDLoc(N), VTs, Ops);
2529  break;
2530  }
2531  case Intrinsic::hexagon_V6_vsubcarry_128B: {
2532  std::array<SDValue, 3> Ops = {
2533  {N->getOperand(1), N->getOperand(2), N->getOperand(3)}};
2534  SDVTList VTs = CurDAG->getVTList(MVT::v32i32, MVT::v128i1);
2535  Result = CurDAG->getMachineNode(Hexagon::V6_vsubcarry, SDLoc(N), VTs, Ops);
2536  break;
2537  }
2538  default:
2539  llvm_unreachable("Unexpected HVX dual output intrinsic.");
2540  }
2541  ReplaceUses(N, Result);
2542  ReplaceUses(SDValue(N, 0), SDValue(Result, 0));
2543  ReplaceUses(SDValue(N, 1), SDValue(Result, 1));
2544  CurDAG->RemoveDeadNode(N);
2545 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:77
i
i
Definition: README.txt:29
llvm::alignTo
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:156
llvm::HexagonSubtarget::getVectorLength
unsigned getVectorLength() const
Definition: HexagonSubtarget.h:313
llvm::MVT::getVectorElementType
MVT getVectorElementType() const
Definition: MachineValueType.h:533
LLVM_DUMP_METHOD
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:492
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::MVT::v128i1
@ v128i1
Definition: MachineValueType.h:73
Pass
print lazy value Lazy Value Info Printer Pass
Definition: LazyValueInfo.cpp:1999
llvm::SDLoc
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Definition: SelectionDAGNodes.h:1106
llvm::HvxSelector::Lower
const HexagonTargetLowering & Lower
Definition: HexagonISelDAGToDAGHVX.cpp:809
print
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
Definition: ArchiveWriter.cpp:189
llvm::TargetLoweringBase::getTypeToTransformTo
virtual EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
Definition: TargetLowering.h:1002
llvm::NodeSet::end
iterator end()
Definition: MachinePipeliner.h:432
llvm::Function::empty
bool empty() const
Definition: Function.h:713
llvm::SDValue::getNode
SDNode * getNode() const
get the SDNode which holds the desired result
Definition: SelectionDAGNodes.h:159
llvm::Function::print
void print(raw_ostream &OS, AssemblyAnnotationWriter *AAW=nullptr, bool ShouldPreserveUseListOrder=false, bool IsForDebug=false) const
Print the function to an output stream with an optional AssemblyAnnotationWriter.
Definition: AsmWriter.cpp:4571
llvm::ISD::CONCAT_VECTORS
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:542
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
llvm::BitVector::set
BitVector & set()
Definition: BitVector.h:344
isIdentity
static bool isIdentity(ArrayRef< int > Mask)
Definition: HexagonISelDAGToDAGHVX.cpp:917
getHexagonLowering
static const HexagonTargetLowering & getHexagonLowering(SelectionDAG &G)
Definition: HexagonISelDAGToDAGHVX.cpp:800
llvm::SetVector::size
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:77
llvm::HvxSelector::HwLen
const unsigned HwLen
Definition: HexagonISelDAGToDAGHVX.cpp:813
Ignore
ReachingDefAnalysis InstSet InstSet & Ignore
Definition: ARMLowOverheadLoops.cpp:547
llvm::SmallVector< unsigned, 4 >
llvm::RISCVFenceField::W
@ W
Definition: RISCVBaseInfo.h:266
llvm::HexagonTargetLowering::LowerOperation
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
Definition: HexagonISelLowering.cpp:3307
llvm::HvxSelector::selectRor
void selectRor(SDNode *N)
Definition: HexagonISelDAGToDAGHVX.cpp:2378
PairTy
std::pair< MCSymbol *, MachineModuleInfoImpl::StubValueTy > PairTy
Definition: MachineModuleInfoImpls.cpp:30
llvm::MipsISD::Lo
@ Lo
Definition: MipsISelLowering.h:79
llvm::HvxSelector::selectShuffle
void selectShuffle(SDNode *N)
Definition: HexagonISelDAGToDAGHVX.cpp:2303
llvm::BitVector::set_bits
iterator_range< const_set_bits_iterator > set_bits() const
Definition: BitVector.h:133
R600_InstFlag::FC
@ FC
Definition: R600Defines.h:32
push
static void push(SmallVectorImpl< uint64_t > &R, StringRef Str)
Definition: BitstreamRemarkSerializer.cpp:24
llvm::SDNode
Represents one node in the SelectionDAG.
Definition: SelectionDAGNodes.h:463
llvm::MachO::Invalid
@ Invalid
Invalid file type.
Definition: InterfaceFile.h:55
llvm::MemOp
Definition: TargetLowering.h:111
llvm::coro::ABI::Switch
@ Switch
The "resume-switch" lowering, where there are separate resume and destroy functions that are shared b...
Shift
bool Shift
Definition: README.txt:468
llvm::sys::path::end
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:235
llvm::MachineMemOperand
A description of a memory reference used in the backend.
Definition: MachineMemOperand.h:127
llvm::sys::path::begin
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:226
T
#define T
Definition: Mips16ISelLowering.cpp:341
Results
Function Alias Analysis Results
Definition: AliasAnalysis.cpp:772
llvm::HexagonDAGToDAGISel
Definition: HexagonISelDAGToDAG.h:29
llvm::max
Expected< ExpressionValue > max(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:337
llvm::SelectionDAG::RemoveDeadNodes
void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
Definition: SelectionDAG.cpp:943
llvm::dump
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
Definition: SparseBitVector.h:877
HexagonTargetMachine.h
llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:458
getHexagonSubtarget
static const HexagonSubtarget & getHexagonSubtarget(SelectionDAG &G)
Definition: HexagonISelDAGToDAGHVX.cpp:803
LLVM_ATTRIBUTE_UNUSED
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:172
BaseType
Coloring
Stack Slot Coloring
Definition: StackSlotColoring.cpp:139
llvm::Data
@ Data
Definition: SIMachineScheduler.h:55
llvm::SelectionDAG::getContext
LLVMContext * getContext() const
Definition: SelectionDAG.h:479
llvm::AMDGPU::HSAMD::ValueKind::Queue
@ Queue
llvm::ArrayRef::data
const T * data() const
Definition: ArrayRef.h:161
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:265
llvm::MipsISD::Hi
@ Hi
Definition: MipsISelLowering.h:75
isUndef
static bool isUndef(ArrayRef< int > Mask)
Definition: HexagonISelDAGToDAGHVX.cpp:910
HexagonISelDAGToDAG.h
llvm::MVT::SimpleValueType
SimpleValueType
Definition: MachineValueType.h:33
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:187
llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
CommandLine.h
getInputSegmentList
static SmallVector< unsigned, 4 > getInputSegmentList(ShuffleMask SM, unsigned SegLen)
Definition: HexagonISelDAGToDAGHVX.cpp:926
llvm::MVT::i1
@ i1
Definition: MachineValueType.h:43
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1709
llvm::SDNode::getOpcode
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
Definition: SelectionDAGNodes.h:644
DEBUG_WITH_TYPE
#define DEBUG_WITH_TYPE(TYPE, X)
DEBUG_WITH_TYPE macro - This macro should be used by passes to emit debug information.
Definition: Debug.h:64
llvm::TargetInstrInfo
TargetInstrInfo - Interface to description of machine instruction set.
Definition: TargetInstrInfo.h:98
llvm::SDValue::getValueType
EVT getValueType() const
Return the ValueType of the referenced return value.
Definition: SelectionDAGNodes.h:1141
llvm::MutableArrayRef
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:28
llvm::SelectionDAG
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:220
f
Itanium Name Demangler i e convert the string _Z1fv into f()". You can also use the CRTP base ManglingParser to perform some simple analysis on the mangled name
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::HexagonISD::ISEL
@ ISEL
Definition: HexagonISelLowering.h:116
llvm::EVT
Extended Value Type.
Definition: ValueTypes.h:34
Intrinsics.h
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::ARM_PROC::A
@ A
Definition: ARMBaseInfo.h:34
llvm::SelectionDAG::getConstant
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
Definition: SelectionDAG.cpp:1514
int
Clang compiles this i1 i64 store i64 i64 store i64 i64 store i64 i64 store i64 align Which gets codegen d xmm0 movaps rbp movaps rbp movaps rbp movaps rbp rbp rbp rbp rbp It would be better to have movq s of instead of the movaps s LLVM produces ret int
Definition: README.txt:536
llvm::MVT::SimpleTy
SimpleValueType SimpleTy
Definition: MachineValueType.h:332
llvm::MVT::v64i1
@ v64i1
Definition: MachineValueType.h:72
llvm::AArch64CC::HS
@ HS
Definition: AArch64BaseInfo.h:257
llvm::SelectionDAGISel::ReplaceNode
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
Definition: SelectionDAGISel.h:231
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:125
llvm::dwarf::Index
Index
Definition: Dwarf.h:472
llvm::Log2_32
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:547
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
First
into llvm powi allowing the code generator to produce balanced multiplication trees First
Definition: README.txt:54
llvm::SDNode::uses
iterator_range< use_iterator > uses()
Definition: SelectionDAGNodes.h:806
Options
const char LLVMTargetMachineRef LLVMPassBuilderOptionsRef Options
Definition: PassBuilderBindings.cpp:48
llvm::SelectionDAG::DAGNodeDeletedListener
Definition: SelectionDAG.h:330
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
llvm::SmallVectorImpl::resize
void resize(size_type N)
Definition: SmallVector.h:642
llvm::operator<<
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:291
llvm::BitVector
Definition: BitVector.h:75
llvm::SmallVectorImpl::append
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:687
llvm::NodeSet
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
Definition: MachinePipeliner.h:321
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
llvm::omp::RTLDependInfoFields::Len
@ Len
Hexagon.h
llvm::HvxSelector
Definition: HexagonISelDAGToDAGHVX.cpp:808
llvm::dxil::PointerTypeAnalysis::run
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
Definition: PointerTypeAnalysis.cpp:189
llvm::NodeSet::begin
iterator begin()
Definition: MachinePipeliner.h:431
G
const DataFlowGraph & G
Definition: RDFGraph.cpp:200
llvm::RISCVFenceField::O
@ O
Definition: RISCVBaseInfo.h:264
llvm::PPC::Predicate
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
llvm::SignExtend32
constexpr int32_t SignExtend32(uint32_t X)
Sign-extend the number in the bottom B bits of X to a 32-bit integer.
Definition: MathExtras.h:702
llvm::MipsISD::Ext
@ Ext
Definition: MipsISelLowering.h:159
getOutputSegmentMap
static SmallVector< unsigned, 4 > getOutputSegmentMap(ShuffleMask SM, unsigned SegLen)
Definition: HexagonISelDAGToDAGHVX.cpp:946
llvm::SelectionDAG::RemoveDeadNode
void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
Definition: SelectionDAG.cpp:997
llvm::ArrayRef::drop_front
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
Definition: ArrayRef.h:203
llvm::RegState::Undef
@ Undef
Value of the register doesn't matter.
Definition: MachineInstrBuilder.h:52
Index
uint32_t Index
Definition: ELFObjHandler.cpp:83
llvm::HexagonDAGToDAGISel::SelectV65GatherPred
void SelectV65GatherPred(SDNode *N)
Definition: HexagonISelDAGToDAGHVX.cpp:2428
D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:53
llvm::DenseMap
Definition: DenseMap.h:714
llvm::ISD::EXTRACT_VECTOR_ELT
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:534
fail
static void fail(const SDLoc &DL, SelectionDAG &DAG, const Twine &Msg)
Definition: BPFISelLowering.cpp:38
llvm::SDNode::getOperand
const SDValue & getOperand(unsigned Num) const
Definition: SelectionDAGNodes.h:921
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::SelectionDAG::getNode
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
Definition: SelectionDAG.cpp:9047
size
i< reg-> size
Definition: README.txt:166
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
llvm::MVT::getVectorNumElements
unsigned getVectorNumElements() const
Definition: MachineValueType.h:880
llvm::MVT::i8
@ i8
Definition: MachineValueType.h:46
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::HvxSelector::HST
const HexagonSubtarget & HST
Definition: HexagonISelDAGToDAGHVX.cpp:812
llvm::MVT::Other
@ Other
Definition: MachineValueType.h:42
llvm::MVT::getSizeInBits
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
Definition: MachineValueType.h:894
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
llvm::SelectionDAG::getMachineNode
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
Definition: SelectionDAG.cpp:9791
llvm::MVT
Machine Value Type.
Definition: MachineValueType.h:31
llvm::SDNode::dumpr
void dumpr() const
Dump (recursively) this node and its use-def subgraph.
Definition: SelectionDAGDumper.cpp:1008
HexagonISelLowering.h
packSegmentMask
static void packSegmentMask(ArrayRef< int > Mask, ArrayRef< unsigned > OutSegMap, unsigned SegLen, MutableArrayRef< int > PackedMask)
Definition: HexagonISelDAGToDAGHVX.cpp:981
llvm::SDNode::isMachineOpcode
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
Definition: SelectionDAGNodes.h:699
llvm::SetVector::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
llvm::print
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr)
Definition: GCNRegPressure.cpp:138
llvm::MVT::getVectorVT
static MVT getVectorVT(MVT VT, unsigned NumElements)
Definition: MachineValueType.h:1230
llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1690
SelectionDAGISel.h
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143
llvm::HexagonTargetLowering
Definition: HexagonISelLowering.h:126
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::format
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:124
llvm::HvxSelector::ISel
HexagonDAGToDAGISel & ISel
Definition: HexagonISelDAGToDAGHVX.cpp:810
llvm::SDVTList
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
Definition: SelectionDAGNodes.h:79
llvm::MVT::v32i32
@ v32i32
Definition: MachineValueType.h:118
llvm::SelectionDAG::getBuildVector
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:822
llvm::object::Comma
@ Comma
Definition: COFFModuleDefinition.cpp:35
llvm::MVT::v16i32
@ v16i32
Definition: MachineValueType.h:117
llvm::MachO::All
@ All
Definition: InterfaceFile.h:69
j
return j(j<< 16)
llvm::MipsISD::Mult
@ Mult
Definition: MipsISelLowering.h:135
findStrip
static std::pair< int, unsigned > findStrip(ArrayRef< int > A, int Inc, unsigned MaxLen)
Definition: HexagonISelDAGToDAGHVX.cpp:897
H
#define H(x, y, z)
Definition: MD5.cpp:57
llvm::None
constexpr std::nullopt_t None
Definition: None.h:27
llvm::HvxSelector::getPairVT
MVT getPairVT(MVT ElemTy) const
Definition: HexagonISelDAGToDAGHVX.cpp:825
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::end
iterator end()
Definition: DenseMap.h:84
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:348
llvm::ArrayRef::take_front
ArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
Definition: ArrayRef.h:227
llvm::HexagonDAGToDAGISel::SelectV65Gather
void SelectV65Gather(SDNode *N)
Definition: HexagonISelDAGToDAGHVX.cpp:2468
llvm::c_str
SmallVectorImpl< T >::const_pointer c_str(SmallVectorImpl< T > &str)
Definition: WindowsSupport.h:193
llvm::SelectionDAG::getTargetExtractSubreg
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
Definition: SelectionDAG.cpp:9909
llvm::MVT::i32
@ i32
Definition: MachineValueType.h:48
llvm::SetVector::count
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:215
llvm::SDValue
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
Definition: SelectionDAGNodes.h:145
isPermutation
static bool isPermutation(ArrayRef< int > Mask)
Definition: HexagonISelDAGToDAGHVX.cpp:1004
llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:91
isValid
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
Definition: RustDemangle.cpp:184
llvm::HvxSelector::selectVAlign
void selectVAlign(SDNode *N)
Definition: HexagonISelDAGToDAGHVX.cpp:2402
MachineInstrBuilder.h
llvm::HvxSelector::DAG
SelectionDAG & DAG
Definition: HexagonISelDAGToDAGHVX.cpp:811
List
const NodeList & List
Definition: RDFGraph.cpp:199
N
#define N
splitMask
static void splitMask(ArrayRef< int > Mask, MutableArrayRef< int > MaskL, MutableArrayRef< int > MaskR)
Definition: HexagonISelDAGToDAGHVX.cpp:879
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:164
llvm::HvxSelector::HvxSelector
HvxSelector(HexagonDAGToDAGISel &HS, SelectionDAG &G)
Definition: HexagonISelDAGToDAGHVX.cpp:815
llvm::HexagonSubtarget
Definition: HexagonSubtarget.h:43
llvm::HexagonDAGToDAGISel::Select
void Select(SDNode *N) override
Main hook for targets to transform nodes into machine nodes.
Definition: HexagonISelDAGToDAG.cpp:902
llvm::reverse
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:485
llvm::SelectionDAG::getTargetConstant
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:669
llvm::NodeSet::insert
bool insert(SUnit *SU)
Definition: MachinePipeliner.h:355
RN
It looks like we only need to define PPCfmarto for these because according to these instructions perform RTO on fma s src2 rnd ← FPSCR RN
Definition: README_P9.txt:262
llvm::HvxSelector::getSingleVT
MVT getSingleVT(MVT ElemTy) const
Definition: HexagonISelDAGToDAGHVX.cpp:819
llvm::SetVector
A vector that has set insertion semantics.
Definition: SetVector.h:40
llvm::ShuffleVectorSDNode::commuteMask
static void commuteMask(MutableArrayRef< int > Mask)
Change values in a shuffle permute mask assuming the two vector operands have swapped position.
Definition: SelectionDAGNodes.h:1564
Debug.h
SubReg
unsigned SubReg
Definition: AArch64AdvSIMDScalarPass.cpp:104
llvm::SrcOp
Definition: MachineIRBuilder.h:128
SetVector.h
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58
llvm::HvxSelector::selectExtractSubvector
void selectExtractSubvector(SDNode *N)
Definition: HexagonISelDAGToDAGHVX.cpp:2285
llvm::HexagonDAGToDAGISel::SelectHVXDualOutput
void SelectHVXDualOutput(SDNode *N)
Definition: HexagonISelDAGToDAGHVX.cpp:2506
llvm::HvxSelector::getBoolVT
MVT getBoolVT() const
Definition: HexagonISelDAGToDAGHVX.cpp:831
llvm::Function::size
size_t size() const
Definition: Function.h:712
llvm::EVT::getSimpleVT
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:288
llvm::irsymtab::build
Error build(ArrayRef< Module * > Mods, SmallVector< char, 0 > &Symtab, StringTableBuilder &StrtabBuilder, BumpPtrAllocator &Alloc)
Fills in Symtab and StrtabBuilder with a valid symbol and string table for Mods.
Definition: IRSymtab.cpp:365