LLVM  14.0.0git
HexagonISelDAGToDAGHVX.cpp
Go to the documentation of this file.
1 //===-- HexagonISelDAGToDAGHVX.cpp ----------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "Hexagon.h"
10 #include "HexagonISelDAGToDAG.h"
11 #include "HexagonISelLowering.h"
12 #include "HexagonTargetMachine.h"
13 #include "llvm/ADT/SetVector.h"
16 #include "llvm/IR/Intrinsics.h"
17 #include "llvm/IR/IntrinsicsHexagon.h"
19 #include "llvm/Support/Debug.h"
20 
21 #include <deque>
22 #include <map>
23 #include <set>
24 #include <utility>
25 #include <vector>
26 
27 #define DEBUG_TYPE "hexagon-isel"
28 
29 using namespace llvm;
30 
31 namespace {
32 
33 // --------------------------------------------------------------------
34 // Implementation of permutation networks.
35 
36 // Implementation of the node routing through butterfly networks:
37 // - Forward delta.
38 // - Reverse delta.
39 // - Benes.
40 //
41 //
42 // Forward delta network consists of log(N) steps, where N is the number
43 // of inputs. In each step, an input can stay in place, or it can get
44 // routed to another position[1]. The step after that consists of two
45 // networks, each half in size in terms of the number of nodes. In those
46 // terms, in the given step, an input can go to either the upper or the
47 // lower network in the next step.
48 //
49 // [1] Hexagon's vdelta/vrdelta allow an element to be routed to both
50 // positions as long as there is no conflict.
51 
52 // Here's a delta network for 8 inputs, only the switching routes are
53 // shown:
54 //
55 // Steps:
56 // |- 1 ---------------|- 2 -----|- 3 -|
57 //
58 // Inp[0] *** *** *** *** Out[0]
59 // \ / \ / \ /
60 // \ / \ / X
61 // \ / \ / / \
62 // Inp[1] *** \ / *** X *** *** Out[1]
63 // \ \ / / \ / \ /
64 // \ \ / / X X
65 // \ \ / / / \ / \
66 // Inp[2] *** \ \ / / *** X *** *** Out[2]
67 // \ \ X / / / \ \ /
68 // \ \ / \ / / / \ X
69 // \ X X / / \ / \
70 // Inp[3] *** \ / \ / \ / *** *** *** Out[3]
71 // \ X X X /
72 // \ / \ / \ / \ /
73 // X X X X
74 // / \ / \ / \ / \
75 // / X X X \
76 // Inp[4] *** / \ / \ / \ *** *** *** Out[4]
77 // / X X \ \ / \ /
78 // / / \ / \ \ \ / X
79 // / / X \ \ \ / / \
80 // Inp[5] *** / / \ \ *** X *** *** Out[5]
81 // / / \ \ \ / \ /
82 // / / \ \ X X
83 // / / \ \ / \ / \
84 // Inp[6] *** / \ *** X *** *** Out[6]
85 // / \ / \ \ /
86 // / \ / \ X
87 // / \ / \ / \
88 // Inp[7] *** *** *** *** Out[7]
89 //
90 //
91 // Reverse delta network is same as delta network, with the steps in
92 // the opposite order.
93 //
94 //
95 // Benes network is a forward delta network immediately followed by
96 // a reverse delta network.
97 
98 enum class ColorKind { None, Red, Black };
99 
100 // Graph coloring utility used to partition nodes into two groups:
101 // they will correspond to nodes routed to the upper and lower networks.
102 struct Coloring {
103  using Node = int;
104  using MapType = std::map<Node, ColorKind>;
105  static constexpr Node Ignore = Node(-1);
106 
107  Coloring(ArrayRef<Node> Ord) : Order(Ord) {
108  build();
109  if (!color())
110  Colors.clear();
111  }
112 
113  const MapType &colors() const {
114  return Colors;
115  }
116 
117  ColorKind other(ColorKind Color) {
118  if (Color == ColorKind::None)
119  return ColorKind::Red;
120  return Color == ColorKind::Red ? ColorKind::Black : ColorKind::Red;
121  }
122 
123  LLVM_DUMP_METHOD void dump() const;
124 
125 private:
126  ArrayRef<Node> Order;
127  MapType Colors;
128  std::set<Node> Needed;
129 
130  using NodeSet = std::set<Node>;
131  std::map<Node,NodeSet> Edges;
132 
133  Node conj(Node Pos) {
134  Node Num = Order.size();
135  return (Pos < Num/2) ? Pos + Num/2 : Pos - Num/2;
136  }
137 
138  ColorKind getColor(Node N) {
139  auto F = Colors.find(N);
140  return F != Colors.end() ? F->second : ColorKind::None;
141  }
142 
143  std::pair<bool, ColorKind> getUniqueColor(const NodeSet &Nodes);
144 
145  void build();
146  bool color();
147 };
148 } // namespace
149 
150 std::pair<bool, ColorKind> Coloring::getUniqueColor(const NodeSet &Nodes) {
151  auto Color = ColorKind::None;
152  for (Node N : Nodes) {
153  ColorKind ColorN = getColor(N);
154  if (ColorN == ColorKind::None)
155  continue;
156  if (Color == ColorKind::None)
157  Color = ColorN;
158  else if (Color != ColorKind::None && Color != ColorN)
159  return { false, ColorKind::None };
160  }
161  return { true, Color };
162 }
163 
164 void Coloring::build() {
165  // Add Order[P] and Order[conj(P)] to Edges.
166  for (unsigned P = 0; P != Order.size(); ++P) {
167  Node I = Order[P];
168  if (I != Ignore) {
169  Needed.insert(I);
170  Node PC = Order[conj(P)];
171  if (PC != Ignore && PC != I)
172  Edges[I].insert(PC);
173  }
174  }
175  // Add I and conj(I) to Edges.
176  for (unsigned I = 0; I != Order.size(); ++I) {
177  if (!Needed.count(I))
178  continue;
179  Node C = conj(I);
180  // This will create an entry in the edge table, even if I is not
181  // connected to any other node. This is necessary, because it still
182  // needs to be colored.
183  NodeSet &Is = Edges[I];
184  if (Needed.count(C))
185  Is.insert(C);
186  }
187 }
188 
189 bool Coloring::color() {
190  SetVector<Node> FirstQ;
191  auto Enqueue = [this,&FirstQ] (Node N) {
192  SetVector<Node> Q;
193  Q.insert(N);
194  for (unsigned I = 0; I != Q.size(); ++I) {
195  NodeSet &Ns = Edges[Q[I]];
196  Q.insert(Ns.begin(), Ns.end());
197  }
198  FirstQ.insert(Q.begin(), Q.end());
199  };
200  for (Node N : Needed)
201  Enqueue(N);
202 
203  for (Node N : FirstQ) {
204  if (Colors.count(N))
205  continue;
206  NodeSet &Ns = Edges[N];
207  auto P = getUniqueColor(Ns);
208  if (!P.first)
209  return false;
210  Colors[N] = other(P.second);
211  }
212 
213  // First, color nodes that don't have any dups.
214  for (auto E : Edges) {
215  Node N = E.first;
216  if (!Needed.count(conj(N)) || Colors.count(N))
217  continue;
218  auto P = getUniqueColor(E.second);
219  if (!P.first)
220  return false;
221  Colors[N] = other(P.second);
222  }
223 
224  // Now, nodes that are still uncolored. Since the graph can be modified
225  // in this step, create a work queue.
226  std::vector<Node> WorkQ;
227  for (auto E : Edges) {
228  Node N = E.first;
229  if (!Colors.count(N))
230  WorkQ.push_back(N);
231  }
232 
233  for (unsigned I = 0; I < WorkQ.size(); ++I) {
234  Node N = WorkQ[I];
235  NodeSet &Ns = Edges[N];
236  auto P = getUniqueColor(Ns);
237  if (P.first) {
238  Colors[N] = other(P.second);
239  continue;
240  }
241 
242  // Coloring failed. Split this node.
243  Node C = conj(N);
244  ColorKind ColorN = other(ColorKind::None);
245  ColorKind ColorC = other(ColorN);
246  NodeSet &Cs = Edges[C];
247  NodeSet CopyNs = Ns;
248  for (Node M : CopyNs) {
249  ColorKind ColorM = getColor(M);
250  if (ColorM == ColorC) {
251  // Connect M with C, disconnect M from N.
252  Cs.insert(M);
253  Edges[M].insert(C);
254  Ns.erase(M);
255  Edges[M].erase(N);
256  }
257  }
258  Colors[N] = ColorN;
259  Colors[C] = ColorC;
260  }
261 
262  // Explicitly assign "None" to all uncolored nodes.
263  for (unsigned I = 0; I != Order.size(); ++I)
264  if (Colors.count(I) == 0)
265  Colors[I] = ColorKind::None;
266 
267  return true;
268 }
269 
270 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
271 void Coloring::dump() const {
272  dbgs() << "{ Order: {";
273  for (unsigned I = 0; I != Order.size(); ++I) {
274  Node P = Order[I];
275  if (P != Ignore)
276  dbgs() << ' ' << P;
277  else
278  dbgs() << " -";
279  }
280  dbgs() << " }\n";
281  dbgs() << " Needed: {";
282  for (Node N : Needed)
283  dbgs() << ' ' << N;
284  dbgs() << " }\n";
285 
286  dbgs() << " Edges: {\n";
287  for (auto E : Edges) {
288  dbgs() << " " << E.first << " -> {";
289  for (auto N : E.second)
290  dbgs() << ' ' << N;
291  dbgs() << " }\n";
292  }
293  dbgs() << " }\n";
294 
295  auto ColorKindToName = [](ColorKind C) {
296  switch (C) {
297  case ColorKind::None:
298  return "None";
299  case ColorKind::Red:
300  return "Red";
301  case ColorKind::Black:
302  return "Black";
303  }
304  llvm_unreachable("all ColorKinds should be handled by the switch above");
305  };
306 
307  dbgs() << " Colors: {\n";
308  for (auto C : Colors)
309  dbgs() << " " << C.first << " -> " << ColorKindToName(C.second) << "\n";
310  dbgs() << " }\n}\n";
311 }
312 #endif
313 
314 namespace {
315 // Base class of for reordering networks. They don't strictly need to be
316 // permutations, as outputs with repeated occurrences of an input element
317 // are allowed.
318 struct PermNetwork {
319  using Controls = std::vector<uint8_t>;
320  using ElemType = int;
321  static constexpr ElemType Ignore = ElemType(-1);
322 
323  enum : uint8_t {
324  None,
325  Pass,
326  Switch
327  };
328  enum : uint8_t {
329  Forward,
330  Reverse
331  };
332 
333  PermNetwork(ArrayRef<ElemType> Ord, unsigned Mult = 1) {
334  Order.assign(Ord.data(), Ord.data()+Ord.size());
335  Log = 0;
336 
337  unsigned S = Order.size();
338  while (S >>= 1)
339  ++Log;
340 
341  Table.resize(Order.size());
342  for (RowType &Row : Table)
343  Row.resize(Mult*Log, None);
344  }
345 
346  void getControls(Controls &V, unsigned StartAt, uint8_t Dir) const {
347  unsigned Size = Order.size();
348  V.resize(Size);
349  for (unsigned I = 0; I != Size; ++I) {
350  unsigned W = 0;
351  for (unsigned L = 0; L != Log; ++L) {
352  unsigned C = ctl(I, StartAt+L) == Switch;
353  if (Dir == Forward)
354  W |= C << (Log-1-L);
355  else
356  W |= C << L;
357  }
358  assert(isUInt<8>(W));
359  V[I] = uint8_t(W);
360  }
361  }
362 
363  uint8_t ctl(ElemType Pos, unsigned Step) const {
364  return Table[Pos][Step];
365  }
366  unsigned size() const {
367  return Order.size();
368  }
369  unsigned steps() const {
370  return Log;
371  }
372 
373 protected:
374  unsigned Log;
375  std::vector<ElemType> Order;
376  using RowType = std::vector<uint8_t>;
377  std::vector<RowType> Table;
378 };
379 
380 struct ForwardDeltaNetwork : public PermNetwork {
381  ForwardDeltaNetwork(ArrayRef<ElemType> Ord) : PermNetwork(Ord) {}
382 
383  bool run(Controls &V) {
384  if (!route(Order.data(), Table.data(), size(), 0))
385  return false;
386  getControls(V, 0, Forward);
387  return true;
388  }
389 
390 private:
391  bool route(ElemType *P, RowType *T, unsigned Size, unsigned Step);
392 };
393 
394 struct ReverseDeltaNetwork : public PermNetwork {
395  ReverseDeltaNetwork(ArrayRef<ElemType> Ord) : PermNetwork(Ord) {}
396 
397  bool run(Controls &V) {
398  if (!route(Order.data(), Table.data(), size(), 0))
399  return false;
400  getControls(V, 0, Reverse);
401  return true;
402  }
403 
404 private:
405  bool route(ElemType *P, RowType *T, unsigned Size, unsigned Step);
406 };
407 
408 struct BenesNetwork : public PermNetwork {
409  BenesNetwork(ArrayRef<ElemType> Ord) : PermNetwork(Ord, 2) {}
410 
411  bool run(Controls &F, Controls &R) {
412  if (!route(Order.data(), Table.data(), size(), 0))
413  return false;
414 
415  getControls(F, 0, Forward);
416  getControls(R, Log, Reverse);
417  return true;
418  }
419 
420 private:
421  bool route(ElemType *P, RowType *T, unsigned Size, unsigned Step);
422 };
423 } // namespace
424 
425 bool ForwardDeltaNetwork::route(ElemType *P, RowType *T, unsigned Size,
426  unsigned Step) {
427  bool UseUp = false, UseDown = false;
428  ElemType Num = Size;
429 
430  // Cannot use coloring here, because coloring is used to determine
431  // the "big" switch, i.e. the one that changes halves, and in a forward
432  // network, a color can be simultaneously routed to both halves in the
433  // step we're working on.
434  for (ElemType J = 0; J != Num; ++J) {
435  ElemType I = P[J];
436  // I is the position in the input,
437  // J is the position in the output.
438  if (I == Ignore)
439  continue;
440  uint8_t S;
441  if (I < Num/2)
442  S = (J < Num/2) ? Pass : Switch;
443  else
444  S = (J < Num/2) ? Switch : Pass;
445 
446  // U is the element in the table that needs to be updated.
447  ElemType U = (S == Pass) ? I : (I < Num/2 ? I+Num/2 : I-Num/2);
448  if (U < Num/2)
449  UseUp = true;
450  else
451  UseDown = true;
452  if (T[U][Step] != S && T[U][Step] != None)
453  return false;
454  T[U][Step] = S;
455  }
456 
457  for (ElemType J = 0; J != Num; ++J)
458  if (P[J] != Ignore && P[J] >= Num/2)
459  P[J] -= Num/2;
460 
461  if (Step+1 < Log) {
462  if (UseUp && !route(P, T, Size/2, Step+1))
463  return false;
464  if (UseDown && !route(P+Size/2, T+Size/2, Size/2, Step+1))
465  return false;
466  }
467  return true;
468 }
469 
470 bool ReverseDeltaNetwork::route(ElemType *P, RowType *T, unsigned Size,
471  unsigned Step) {
472  unsigned Pets = Log-1 - Step;
473  bool UseUp = false, UseDown = false;
474  ElemType Num = Size;
475 
476  // In this step half-switching occurs, so coloring can be used.
477  Coloring G({P,Size});
478  const Coloring::MapType &M = G.colors();
479  if (M.empty())
480  return false;
481 
482  ColorKind ColorUp = ColorKind::None;
483  for (ElemType J = 0; J != Num; ++J) {
484  ElemType I = P[J];
485  // I is the position in the input,
486  // J is the position in the output.
487  if (I == Ignore)
488  continue;
489  ColorKind C = M.at(I);
490  if (C == ColorKind::None)
491  continue;
492  // During "Step", inputs cannot switch halves, so if the "up" color
493  // is still unknown, make sure that it is selected in such a way that
494  // "I" will stay in the same half.
495  bool InpUp = I < Num/2;
496  if (ColorUp == ColorKind::None)
497  ColorUp = InpUp ? C : G.other(C);
498  if ((C == ColorUp) != InpUp) {
499  // If I should go to a different half than where is it now, give up.
500  return false;
501  }
502 
503  uint8_t S;
504  if (InpUp) {
505  S = (J < Num/2) ? Pass : Switch;
506  UseUp = true;
507  } else {
508  S = (J < Num/2) ? Switch : Pass;
509  UseDown = true;
510  }
511  T[J][Pets] = S;
512  }
513 
514  // Reorder the working permutation according to the computed switch table
515  // for the last step (i.e. Pets).
516  for (ElemType J = 0, E = Size / 2; J != E; ++J) {
517  ElemType PJ = P[J]; // Current values of P[J]
518  ElemType PC = P[J+Size/2]; // and P[conj(J)]
519  ElemType QJ = PJ; // New values of P[J]
520  ElemType QC = PC; // and P[conj(J)]
521  if (T[J][Pets] == Switch)
522  QC = PJ;
523  if (T[J+Size/2][Pets] == Switch)
524  QJ = PC;
525  P[J] = QJ;
526  P[J+Size/2] = QC;
527  }
528 
529  for (ElemType J = 0; J != Num; ++J)
530  if (P[J] != Ignore && P[J] >= Num/2)
531  P[J] -= Num/2;
532 
533  if (Step+1 < Log) {
534  if (UseUp && !route(P, T, Size/2, Step+1))
535  return false;
536  if (UseDown && !route(P+Size/2, T+Size/2, Size/2, Step+1))
537  return false;
538  }
539  return true;
540 }
541 
542 bool BenesNetwork::route(ElemType *P, RowType *T, unsigned Size,
543  unsigned Step) {
544  Coloring G({P,Size});
545  const Coloring::MapType &M = G.colors();
546  if (M.empty())
547  return false;
548  ElemType Num = Size;
549 
550  unsigned Pets = 2*Log-1 - Step;
551  bool UseUp = false, UseDown = false;
552 
553  // Both assignments, i.e. Red->Up and Red->Down are valid, but they will
554  // result in different controls. Let's pick the one where the first
555  // control will be "Pass".
556  ColorKind ColorUp = ColorKind::None;
557  for (ElemType J = 0; J != Num; ++J) {
558  ElemType I = P[J];
559  if (I == Ignore)
560  continue;
561  ColorKind C = M.at(I);
562  if (C == ColorKind::None)
563  continue;
564  if (ColorUp == ColorKind::None) {
565  ColorUp = (I < Num / 2) ? ColorKind::Red : ColorKind::Black;
566  }
567  unsigned CI = (I < Num/2) ? I+Num/2 : I-Num/2;
568  if (C == ColorUp) {
569  if (I < Num/2)
570  T[I][Step] = Pass;
571  else
572  T[CI][Step] = Switch;
573  T[J][Pets] = (J < Num/2) ? Pass : Switch;
574  UseUp = true;
575  } else { // Down
576  if (I < Num/2)
577  T[CI][Step] = Switch;
578  else
579  T[I][Step] = Pass;
580  T[J][Pets] = (J < Num/2) ? Switch : Pass;
581  UseDown = true;
582  }
583  }
584 
585  // Reorder the working permutation according to the computed switch table
586  // for the last step (i.e. Pets).
587  for (ElemType J = 0; J != Num/2; ++J) {
588  ElemType PJ = P[J]; // Current values of P[J]
589  ElemType PC = P[J+Num/2]; // and P[conj(J)]
590  ElemType QJ = PJ; // New values of P[J]
591  ElemType QC = PC; // and P[conj(J)]
592  if (T[J][Pets] == Switch)
593  QC = PJ;
594  if (T[J+Num/2][Pets] == Switch)
595  QJ = PC;
596  P[J] = QJ;
597  P[J+Num/2] = QC;
598  }
599 
600  for (ElemType J = 0; J != Num; ++J)
601  if (P[J] != Ignore && P[J] >= Num/2)
602  P[J] -= Num/2;
603 
604  if (Step+1 < Log) {
605  if (UseUp && !route(P, T, Size/2, Step+1))
606  return false;
607  if (UseDown && !route(P+Size/2, T+Size/2, Size/2, Step+1))
608  return false;
609  }
610  return true;
611 }
612 
613 // --------------------------------------------------------------------
614 // Support for building selection results (output instructions that are
615 // parts of the final selection).
616 
617 namespace {
618 struct OpRef {
619  OpRef(SDValue V) : OpV(V) {}
620  bool isValue() const { return OpV.getNode() != nullptr; }
621  bool isValid() const { return isValue() || !(OpN & Invalid); }
622  static OpRef res(int N) { return OpRef(Whole | (N & Index)); }
623  static OpRef fail() { return OpRef(Invalid); }
624 
625  static OpRef lo(const OpRef &R) {
626  assert(!R.isValue());
627  return OpRef(R.OpN & (Undef | Index | LoHalf));
628  }
629  static OpRef hi(const OpRef &R) {
630  assert(!R.isValue());
631  return OpRef(R.OpN & (Undef | Index | HiHalf));
632  }
633  static OpRef undef(MVT Ty) { return OpRef(Undef | Ty.SimpleTy); }
634 
635  // Direct value.
636  SDValue OpV = SDValue();
637 
638  // Reference to the operand of the input node:
639  // If the 31st bit is 1, it's undef, otherwise, bits 28..0 are the
640  // operand index:
641  // If bit 30 is set, it's the high half of the operand.
642  // If bit 29 is set, it's the low half of the operand.
643  unsigned OpN = 0;
644 
645  enum : unsigned {
646  Invalid = 0x10000000,
647  LoHalf = 0x20000000,
648  HiHalf = 0x40000000,
649  Whole = LoHalf | HiHalf,
650  Undef = 0x80000000,
651  Index = 0x0FFFFFFF, // Mask of the index value.
652  IndexBits = 28,
653  };
654 
656  void print(raw_ostream &OS, const SelectionDAG &G) const;
657 
658 private:
659  OpRef(unsigned N) : OpN(N) {}
660 };
661 
662 struct NodeTemplate {
663  NodeTemplate() = default;
664  unsigned Opc = 0;
665  MVT Ty = MVT::Other;
666  std::vector<OpRef> Ops;
667 
668  LLVM_DUMP_METHOD void print(raw_ostream &OS, const SelectionDAG &G) const;
669 };
670 
671 struct ResultStack {
672  ResultStack(SDNode *Inp)
673  : InpNode(Inp), InpTy(Inp->getValueType(0).getSimpleVT()) {}
674  SDNode *InpNode;
675  MVT InpTy;
676  unsigned push(const NodeTemplate &Res) {
677  List.push_back(Res);
678  return List.size()-1;
679  }
680  unsigned push(unsigned Opc, MVT Ty, std::vector<OpRef> &&Ops) {
681  NodeTemplate Res;
682  Res.Opc = Opc;
683  Res.Ty = Ty;
684  Res.Ops = Ops;
685  return push(Res);
686  }
687  bool empty() const { return List.empty(); }
688  unsigned size() const { return List.size(); }
689  unsigned top() const { return size()-1; }
690  const NodeTemplate &operator[](unsigned I) const { return List[I]; }
691  unsigned reset(unsigned NewTop) {
692  List.resize(NewTop+1);
693  return NewTop;
694  }
695 
696  using BaseType = std::vector<NodeTemplate>;
697  BaseType::iterator begin() { return List.begin(); }
698  BaseType::iterator end() { return List.end(); }
699  BaseType::const_iterator begin() const { return List.begin(); }
700  BaseType::const_iterator end() const { return List.end(); }
701 
702  BaseType List;
703 
705  void print(raw_ostream &OS, const SelectionDAG &G) const;
706 };
707 } // namespace
708 
709 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
710 void OpRef::print(raw_ostream &OS, const SelectionDAG &G) const {
711  if (isValue()) {
712  OpV.getNode()->print(OS, &G);
713  return;
714  }
715  if (OpN & Invalid) {
716  OS << "invalid";
717  return;
718  }
719  if (OpN & Undef) {
720  OS << "undef";
721  return;
722  }
723  if ((OpN & Whole) != Whole) {
724  assert((OpN & Whole) == LoHalf || (OpN & Whole) == HiHalf);
725  if (OpN & LoHalf)
726  OS << "lo ";
727  else
728  OS << "hi ";
729  }
730  OS << '#' << SignExtend32(OpN & Index, IndexBits);
731 }
732 
733 void NodeTemplate::print(raw_ostream &OS, const SelectionDAG &G) const {
734  const TargetInstrInfo &TII = *G.getSubtarget().getInstrInfo();
735  OS << format("%8s", EVT(Ty).getEVTString().c_str()) << " "
736  << TII.getName(Opc);
737  bool Comma = false;
738  for (const auto &R : Ops) {
739  if (Comma)
740  OS << ',';
741  Comma = true;
742  OS << ' ';
743  R.print(OS, G);
744  }
745 }
746 
747 void ResultStack::print(raw_ostream &OS, const SelectionDAG &G) const {
748  OS << "Input node:\n";
749 #ifndef NDEBUG
750  InpNode->dumpr(&G);
751 #endif
752  OS << "Result templates:\n";
753  for (unsigned I = 0, E = List.size(); I != E; ++I) {
754  OS << '[' << I << "] ";
755  List[I].print(OS, G);
756  OS << '\n';
757  }
758 }
759 #endif
760 
761 namespace {
762 struct ShuffleMask {
763  ShuffleMask(ArrayRef<int> M) : Mask(M) {
764  for (unsigned I = 0, E = Mask.size(); I != E; ++I) {
765  int M = Mask[I];
766  if (M == -1)
767  continue;
768  MinSrc = (MinSrc == -1) ? M : std::min(MinSrc, M);
769  MaxSrc = (MaxSrc == -1) ? M : std::max(MaxSrc, M);
770  }
771  }
772 
774  int MinSrc = -1, MaxSrc = -1;
775 
776  ShuffleMask lo() const {
777  size_t H = Mask.size()/2;
778  return ShuffleMask(Mask.take_front(H));
779  }
780  ShuffleMask hi() const {
781  size_t H = Mask.size()/2;
782  return ShuffleMask(Mask.take_back(H));
783  }
784 
785  void print(raw_ostream &OS) const {
786  OS << "MinSrc:" << MinSrc << ", MaxSrc:" << MaxSrc << " {";
787  for (int M : Mask)
788  OS << ' ' << M;
789  OS << " }";
790  }
791 };
792 
794 raw_ostream &operator<<(raw_ostream &OS, const ShuffleMask &SM) {
795  SM.print(OS);
796  return OS;
797 }
798 } // namespace
799 
800 // --------------------------------------------------------------------
801 // The HvxSelector class.
802 
804  return static_cast<const HexagonTargetLowering&>(G.getTargetLoweringInfo());
805 }
807  return static_cast<const HexagonSubtarget&>(G.getSubtarget());
808 }
809 
810 namespace llvm {
811  struct HvxSelector {
816  const unsigned HwLen;
817 
820  HST(getHexagonSubtarget(G)), HwLen(HST.getVectorLength()) {}
821 
822  MVT getSingleVT(MVT ElemTy) const {
823  assert(ElemTy != MVT::i1 && "Use getBoolVT for predicates");
824  unsigned NumElems = HwLen / (ElemTy.getSizeInBits()/8);
825  return MVT::getVectorVT(ElemTy, NumElems);
826  }
827 
828  MVT getPairVT(MVT ElemTy) const {
829  assert(ElemTy != MVT::i1); // Suspicious: there are no predicate pairs.
830  unsigned NumElems = (2*HwLen) / (ElemTy.getSizeInBits()/8);
831  return MVT::getVectorVT(ElemTy, NumElems);
832  }
833 
834  MVT getBoolVT() const {
835  // Return HwLen x i1.
836  return MVT::getVectorVT(MVT::i1, HwLen);
837  }
838 
839  void selectShuffle(SDNode *N);
840  void selectRor(SDNode *N);
841  void selectVAlign(SDNode *N);
842 
843  private:
844  void select(SDNode *ISelN);
845  void materialize(const ResultStack &Results);
846 
847  SDValue getConst32(int Val, const SDLoc &dl);
848  SDValue getVectorConstant(ArrayRef<uint8_t> Data, const SDLoc &dl);
849 
850  enum : unsigned {
851  None,
852  PackMux,
853  };
854  OpRef concats(OpRef Va, OpRef Vb, ResultStack &Results);
855  OpRef packs(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results,
856  MutableArrayRef<int> NewMask, unsigned Options = None);
857  OpRef packp(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results,
858  MutableArrayRef<int> NewMask);
859  OpRef vmuxs(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,
860  ResultStack &Results);
861  OpRef vmuxp(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,
862  ResultStack &Results);
863 
864  OpRef shuffs1(ShuffleMask SM, OpRef Va, ResultStack &Results);
865  OpRef shuffs2(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results);
866  OpRef shuffp1(ShuffleMask SM, OpRef Va, ResultStack &Results);
867  OpRef shuffp2(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results);
868 
869  OpRef butterfly(ShuffleMask SM, OpRef Va, ResultStack &Results);
870  OpRef contracting(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results);
871  OpRef expanding(ShuffleMask SM, OpRef Va, ResultStack &Results);
872  OpRef perfect(ShuffleMask SM, OpRef Va, ResultStack &Results);
873 
874  bool selectVectorConstants(SDNode *N);
875  bool scalarizeShuffle(ArrayRef<int> Mask, const SDLoc &dl, MVT ResTy,
876  SDValue Va, SDValue Vb, SDNode *N);
877 
878  };
879 }
880 
882  MutableArrayRef<int> MaskR) {
883  unsigned VecLen = Mask.size();
884  assert(MaskL.size() == VecLen && MaskR.size() == VecLen);
885  for (unsigned I = 0; I != VecLen; ++I) {
886  int M = Mask[I];
887  if (M < 0) {
888  MaskL[I] = MaskR[I] = -1;
889  } else if (unsigned(M) < VecLen) {
890  MaskL[I] = M;
891  MaskR[I] = -1;
892  } else {
893  MaskL[I] = -1;
894  MaskR[I] = M-VecLen;
895  }
896  }
897 }
898 
899 static std::pair<int,unsigned> findStrip(ArrayRef<int> A, int Inc,
900  unsigned MaxLen) {
901  assert(A.size() > 0 && A.size() >= MaxLen);
902  int F = A[0];
903  int E = F;
904  for (unsigned I = 1; I != MaxLen; ++I) {
905  if (A[I] - E != Inc)
906  return { F, I };
907  E = A[I];
908  }
909  return { F, MaxLen };
910 }
911 
912 static bool isUndef(ArrayRef<int> Mask) {
913  for (int Idx : Mask)
914  if (Idx != -1)
915  return false;
916  return true;
917 }
918 
920  for (int I = 0, E = Mask.size(); I != E; ++I) {
921  int M = Mask[I];
922  if (M >= 0 && M != I)
923  return false;
924  }
925  return true;
926 }
927 
929  unsigned SegLen) {
930  assert(isPowerOf2_32(SegLen));
931  SmallVector<unsigned, 4> SegList;
932  if (SM.MaxSrc == -1)
933  return SegList;
934 
935  unsigned Shift = Log2_32(SegLen);
936  BitVector Segs(alignTo(SM.MaxSrc + 1, SegLen) >> Shift);
937 
938  for (int I = 0, E = SM.Mask.size(); I != E; ++I) {
939  int M = SM.Mask[I];
940  if (M >= 0)
941  Segs.set(M >> Shift);
942  }
943 
944  for (unsigned B : Segs.set_bits())
945  SegList.push_back(B);
946  return SegList;
947 }
948 
950  unsigned SegLen) {
951  // Calculate the layout of the output segments in terms of the input
952  // segments.
953  // For example [1,3,1,0] means that the output consists of 4 output
954  // segments, where the first output segment has only elements of the
955  // input segment at index 1. The next output segment only has elements
956  // of the input segment 3, etc.
957  // If an output segment only has undef elements, the value will be ~0u.
958  // If an output segment has elements from more than one input segment,
959  // the corresponding value will be ~1u.
960  unsigned MaskLen = SM.Mask.size();
961  assert(MaskLen % SegLen == 0);
962  SmallVector<unsigned, 4> Map(MaskLen / SegLen);
963 
964  for (int S = 0, E = Map.size(); S != E; ++S) {
965  unsigned Idx = ~0u;
966  for (int I = 0; I != static_cast<int>(SegLen); ++I) {
967  int M = SM.Mask[S*SegLen + I];
968  if (M < 0)
969  continue;
970  unsigned G = M / SegLen; // Input segment of this element.
971  if (Idx == ~0u) {
972  Idx = G;
973  } else if (Idx != G) {
974  Idx = ~1u;
975  break;
976  }
977  }
978  Map[S] = Idx;
979  }
980 
981  return Map;
982 }
983 
985  unsigned SegLen, MutableArrayRef<int> PackedMask) {
987  for (int I = OutSegMap.size() - 1; I >= 0; --I) {
988  unsigned S = OutSegMap[I];
989  assert(S != ~0u && "Unexpected undef");
990  assert(S != ~1u && "Unexpected multi");
991  if (InvMap.size() <= S)
992  InvMap.resize(S+1);
993  InvMap[S] = I;
994  }
995 
996  unsigned Shift = Log2_32(SegLen);
997  for (int I = 0, E = Mask.size(); I != E; ++I) {
998  int M = Mask[I];
999  if (M >= 0) {
1000  int OutIdx = InvMap[M >> Shift];
1001  M = (M & (SegLen-1)) + SegLen*OutIdx;
1002  }
1003  PackedMask[I] = M;
1004  }
1005 }
1006 
1008  // Check by adding all numbers only works if there is no overflow.
1009  assert(Mask.size() < 0x00007FFF && "Sanity failure");
1010  int Sum = 0;
1011  for (int Idx : Mask) {
1012  if (Idx == -1)
1013  return false;
1014  Sum += Idx;
1015  }
1016  int N = Mask.size();
1017  return 2*Sum == N*(N-1);
1018 }
1019 
1020 bool HvxSelector::selectVectorConstants(SDNode *N) {
1021  // Constant vectors are generated as loads from constant pools or as
1022  // splats of a constant value. Since they are generated during the
1023  // selection process, the main selection algorithm is not aware of them.
1024  // Select them directly here.
1025  SmallVector<SDNode*,4> Nodes;
1026  SetVector<SDNode*> WorkQ;
1027 
1028  // The DAG can change (due to CSE) during selection, so cache all the
1029  // unselected nodes first to avoid traversing a mutating DAG.
1030  WorkQ.insert(N);
1031  for (unsigned i = 0; i != WorkQ.size(); ++i) {
1032  SDNode *W = WorkQ[i];
1033  if (!W->isMachineOpcode() && W->getOpcode() == HexagonISD::ISEL)
1034  Nodes.push_back(W);
1035  for (unsigned j = 0, f = W->getNumOperands(); j != f; ++j)
1036  WorkQ.insert(W->getOperand(j).getNode());
1037  }
1038 
1039  for (SDNode *L : Nodes)
1040  select(L);
1041 
1042  return !Nodes.empty();
1043 }
1044 
1045 void HvxSelector::materialize(const ResultStack &Results) {
1046  DEBUG_WITH_TYPE("isel", {
1047  dbgs() << "Materializing\n";
1048  Results.print(dbgs(), DAG);
1049  });
1050  if (Results.empty())
1051  return;
1052  const SDLoc &dl(Results.InpNode);
1053  std::vector<SDValue> Output;
1054 
1055  for (unsigned I = 0, E = Results.size(); I != E; ++I) {
1056  const NodeTemplate &Node = Results[I];
1057  std::vector<SDValue> Ops;
1058  for (const OpRef &R : Node.Ops) {
1059  assert(R.isValid());
1060  if (R.isValue()) {
1061  Ops.push_back(R.OpV);
1062  continue;
1063  }
1064  if (R.OpN & OpRef::Undef) {
1066  Ops.push_back(ISel.selectUndef(dl, MVT(SVT)));
1067  continue;
1068  }
1069  // R is an index of a result.
1070  unsigned Part = R.OpN & OpRef::Whole;
1071  int Idx = SignExtend32(R.OpN & OpRef::Index, OpRef::IndexBits);
1072  if (Idx < 0)
1073  Idx += I;
1074  assert(Idx >= 0 && unsigned(Idx) < Output.size());
1075  SDValue Op = Output[Idx];
1076  MVT OpTy = Op.getValueType().getSimpleVT();
1077  if (Part != OpRef::Whole) {
1078  assert(Part == OpRef::LoHalf || Part == OpRef::HiHalf);
1079  MVT HalfTy = MVT::getVectorVT(OpTy.getVectorElementType(),
1080  OpTy.getVectorNumElements()/2);
1081  unsigned Sub = (Part == OpRef::LoHalf) ? Hexagon::vsub_lo
1082  : Hexagon::vsub_hi;
1083  Op = DAG.getTargetExtractSubreg(Sub, dl, HalfTy, Op);
1084  }
1085  Ops.push_back(Op);
1086  } // for (Node : Results)
1087 
1088  assert(Node.Ty != MVT::Other);
1089  SDNode *ResN = (Node.Opc == TargetOpcode::COPY)
1090  ? Ops.front().getNode()
1091  : DAG.getMachineNode(Node.Opc, dl, Node.Ty, Ops);
1092  Output.push_back(SDValue(ResN, 0));
1093  }
1094 
1095  SDNode *OutN = Output.back().getNode();
1096  SDNode *InpN = Results.InpNode;
1097  DEBUG_WITH_TYPE("isel", {
1098  dbgs() << "Generated node:\n";
1099  OutN->dumpr(&DAG);
1100  });
1101 
1102  ISel.ReplaceNode(InpN, OutN);
1103  selectVectorConstants(OutN);
1104  DAG.RemoveDeadNodes();
1105 }
1106 
1107 OpRef HvxSelector::concats(OpRef Lo, OpRef Hi, ResultStack &Results) {
1108  DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1109  const SDLoc &dl(Results.InpNode);
1110  Results.push(TargetOpcode::REG_SEQUENCE, getPairVT(MVT::i8), {
1111  getConst32(Hexagon::HvxWRRegClassID, dl),
1112  Lo, getConst32(Hexagon::vsub_lo, dl),
1113  Hi, getConst32(Hexagon::vsub_hi, dl),
1114  });
1115  return OpRef::res(Results.top());
1116 }
1117 
1118 // Va, Vb are single vectors. If SM only uses two vector halves from Va/Vb,
1119 // pack these halves into a single vector, and remap SM into NewMask to use
1120 // the new vector instead.
1121 OpRef HvxSelector::packs(ShuffleMask SM, OpRef Va, OpRef Vb,
1122  ResultStack &Results, MutableArrayRef<int> NewMask,
1123  unsigned Options) {
1124  DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1125  if (!Va.isValid() || !Vb.isValid())
1126  return OpRef::fail();
1127 
1128  MVT Ty = getSingleVT(MVT::i8);
1130  OpRef Inp[2] = {Va, Vb};
1131  unsigned VecLen = SM.Mask.size();
1132 
1133  auto valign = [this](OpRef Lo, OpRef Hi, unsigned Amt, MVT Ty,
1134  ResultStack &Results) {
1135  if (Amt == 0)
1136  return Lo;
1137  const SDLoc &dl(Results.InpNode);
1138  if (isUInt<3>(Amt) || isUInt<3>(HwLen - Amt)) {
1139  bool IsRight = isUInt<3>(Amt); // Right align.
1140  SDValue S = getConst32(IsRight ? Amt : HwLen - Amt, dl);
1141  unsigned Opc = IsRight ? Hexagon::V6_valignbi : Hexagon::V6_vlalignbi;
1142  Results.push(Opc, Ty, {Hi, Lo, S});
1143  return OpRef::res(Results.top());
1144  }
1145  Results.push(Hexagon::A2_tfrsi, MVT::i32, {getConst32(Amt, dl)});
1146  OpRef A = OpRef::res(Results.top());
1147  Results.push(Hexagon::V6_valignb, Ty, {Hi, Lo, A});
1148  return OpRef::res(Results.top());
1149  };
1150 
1151  // Segment is a vector half.
1152  unsigned SegLen = HwLen / 2;
1153 
1154  // Check if we can shuffle vector halves around to get the used elements
1155  // into a single vector.
1156  SmallVector<int,128> MaskH(SM.Mask.begin(), SM.Mask.end());
1157  SmallVector<unsigned, 4> SegList = getInputSegmentList(SM.Mask, SegLen);
1158  unsigned SegCount = SegList.size();
1159  SmallVector<unsigned, 4> SegMap = getOutputSegmentMap(SM.Mask, SegLen);
1160 
1161  if (SegList.empty())
1162  return OpRef::undef(Ty);
1163 
1164  // NOTE:
1165  // In the following part of the function, where the segments are rearranged,
1166  // the shuffle mask SM can be of any length that is a multiple of a vector
1167  // (i.e. a multiple of 2*SegLen), and non-zero.
1168  // The output segment map is computed, and it may have any even number of
1169  // entries, but the rearrangement of input segments will be done based only
1170  // on the first two (non-undef) entries in the segment map.
1171  // For example, if the output map is 3, 1, 1, 3 (it can have at most two
1172  // distinct entries!), the segments 1 and 3 of Va/Vb will be packaged into
1173  // a single vector V = 3:1. The output mask will then be updated to use
1174  // seg(0,V), seg(1,V), seg(1,V), seg(0,V).
1175  //
1176  // Picking the segments based on the output map is an optimization. For
1177  // correctness it is only necessary that Seg0 and Seg1 are the two input
1178  // segments that are used in the output.
1179 
1180  unsigned Seg0 = ~0u, Seg1 = ~0u;
1181  for (int I = 0, E = SegMap.size(); I != E; ++I) {
1182  unsigned X = SegMap[I];
1183  if (X == ~0u)
1184  continue;
1185  if (Seg0 == ~0u)
1186  Seg0 = X;
1187  else if (Seg1 != ~0u)
1188  break;
1189  if (X == ~1u || X != Seg0)
1190  Seg1 = X;
1191  }
1192 
1193  if (SegCount == 1) {
1194  unsigned SrcOp = SegList[0] / 2;
1195  for (int I = 0; I != static_cast<int>(VecLen); ++I) {
1196  int M = SM.Mask[I];
1197  if (M >= 0) {
1198  M -= SrcOp * HwLen;
1199  assert(M >= 0);
1200  }
1201  NewMask[I] = M;
1202  }
1203  return Inp[SrcOp];
1204  }
1205 
1206  if (SegCount == 2) {
1207  // Seg0 should not be undef here: this would imply a SegList
1208  // with <= 1 elements, which was checked earlier.
1209  assert(Seg0 != ~0u);
1210 
1211  // If Seg0 or Seg1 are "multi-defined", pick them from the input
1212  // segment list instead.
1213  if (Seg0 == ~1u || Seg1 == ~1u) {
1214  if (Seg0 == Seg1) {
1215  Seg0 = SegList[0];
1216  Seg1 = SegList[1];
1217  } else if (Seg0 == ~1u) {
1218  Seg0 = SegList[0] != Seg1 ? SegList[0] : SegList[1];
1219  } else {
1220  assert(Seg1 == ~1u); // Sanity
1221  Seg1 = SegList[0] != Seg0 ? SegList[0] : SegList[1];
1222  }
1223  }
1224  assert(Seg0 != ~1u && Seg1 != ~1u);
1225 
1226  assert(Seg0 != Seg1 && "Expecting different segments");
1227  const SDLoc &dl(Results.InpNode);
1228  Results.push(Hexagon::A2_tfrsi, MVT::i32, {getConst32(SegLen, dl)});
1229  OpRef HL = OpRef::res(Results.top());
1230 
1231  // Va = AB, Vb = CD
1232 
1233  if (Seg0 / 2 == Seg1 / 2) {
1234  // Same input vector.
1235  Va = Inp[Seg0 / 2];
1236  if (Seg0 > Seg1) {
1237  // Swap halves.
1238  Results.push(Hexagon::V6_vror, Ty, {Inp[Seg0 / 2], HL});
1239  Va = OpRef::res(Results.top());
1240  }
1241  packSegmentMask(SM.Mask, {Seg0, Seg1}, SegLen, MaskH);
1242  } else if (Seg0 % 2 == Seg1 % 2) {
1243  // Picking AC, BD, CA, or DB.
1244  // vshuff(CD,AB,HL) -> BD:AC
1245  // vshuff(AB,CD,HL) -> DB:CA
1246  auto Vs = (Seg0 == 0 || Seg0 == 1) ? std::make_pair(Vb, Va) // AC or BD
1247  : std::make_pair(Va, Vb); // CA or DB
1248  Results.push(Hexagon::V6_vshuffvdd, PairTy, {Vs.first, Vs.second, HL});
1249  OpRef P = OpRef::res(Results.top());
1250  Va = (Seg0 == 0 || Seg0 == 2) ? OpRef::lo(P) : OpRef::hi(P);
1251  packSegmentMask(SM.Mask, {Seg0, Seg1}, SegLen, MaskH);
1252  } else {
1253  // Picking AD, BC, CB, or DA.
1254  if ((Seg0 == 0 && Seg1 == 3) || (Seg0 == 2 && Seg1 == 1)) {
1255  // AD or BC: this can be done using vmux.
1256  // Q = V6_pred_scalar2 SegLen
1257  // V = V6_vmux Q, (Va, Vb) or (Vb, Va)
1258  Results.push(Hexagon::V6_pred_scalar2, getBoolVT(), {HL});
1259  OpRef Qt = OpRef::res(Results.top());
1260  auto Vs = (Seg0 == 0) ? std::make_pair(Va, Vb) // AD
1261  : std::make_pair(Vb, Va); // CB
1262  Results.push(Hexagon::V6_vmux, Ty, {Qt, Vs.first, Vs.second});
1263  Va = OpRef::res(Results.top());
1264  packSegmentMask(SM.Mask, {Seg0, Seg1}, SegLen, MaskH);
1265  } else {
1266  // BC or DA: this could be done via valign by SegLen.
1267  // Do nothing here, because valign (if possible) will be generated
1268  // later on (make sure the Seg0 values are as expected, for sanity).
1269  assert(Seg0 == 1 || Seg0 == 3);
1270  }
1271  }
1272  }
1273 
1274  // Check if the arguments can be packed by valign(Va,Vb) or valign(Vb,Va).
1275 
1276  ShuffleMask SMH(MaskH);
1277  assert(SMH.Mask.size() == VecLen);
1278  SmallVector<int,128> MaskA(SMH.Mask.begin(), SMH.Mask.end());
1279 
1280  if (SMH.MaxSrc - SMH.MinSrc >= static_cast<int>(HwLen)) {
1281  // valign(Lo=Va,Hi=Vb) won't work. Try swapping Va/Vb.
1282  SmallVector<int,128> Swapped(SMH.Mask.begin(), SMH.Mask.end());
1284  ShuffleMask SW(Swapped);
1285  if (SW.MaxSrc - SW.MinSrc < static_cast<int>(HwLen)) {
1286  MaskA.assign(SW.Mask.begin(), SW.Mask.end());
1287  std::swap(Va, Vb);
1288  }
1289  }
1290  ShuffleMask SMA(MaskA);
1291  assert(SMA.Mask.size() == VecLen);
1292 
1293  if (SMA.MaxSrc - SMA.MinSrc < static_cast<int>(HwLen)) {
1294  int ShiftR = SMA.MinSrc;
1295  if (ShiftR >= static_cast<int>(HwLen)) {
1296  Va = Vb;
1297  Vb = OpRef::undef(Ty);
1298  ShiftR -= HwLen;
1299  }
1300  OpRef RetVal = valign(Va, Vb, ShiftR, Ty, Results);
1301 
1302  for (int I = 0; I != static_cast<int>(VecLen); ++I) {
1303  int M = SMA.Mask[I];
1304  if (M != -1)
1305  M -= SMA.MinSrc;
1306  NewMask[I] = M;
1307  }
1308  return RetVal;
1309  }
1310 
1311  // By here, packing by segment (half-vector) shuffling, and vector alignment
1312  // failed. Try vmux.
1313  // Note: since this is using the original mask, Va and Vb must not have been
1314  // modified.
1315 
1316  if (Options & PackMux) {
1317  // If elements picked from Va and Vb have all different (source) indexes
1318  // (relative to the start of the argument), do a mux, and update the mask.
1319  BitVector Picked(HwLen);
1320  SmallVector<uint8_t,128> MuxBytes(HwLen);
1321  bool CanMux = true;
1322  for (int I = 0; I != static_cast<int>(VecLen); ++I) {
1323  int M = SM.Mask[I];
1324  if (M == -1)
1325  continue;
1326  if (M >= static_cast<int>(HwLen))
1327  M -= HwLen;
1328  else
1329  MuxBytes[M] = 0xFF;
1330  if (Picked[M]) {
1331  CanMux = false;
1332  break;
1333  }
1334  NewMask[I] = M;
1335  }
1336  if (CanMux)
1337  return vmuxs(MuxBytes, Va, Vb, Results);
1338  }
1339  return OpRef::fail();
1340 }
1341 
1342 // Va, Vb are vector pairs. If SM only uses two single vectors from Va/Vb,
1343 // pack these vectors into a pair, and remap SM into NewMask to use the
1344 // new pair instead.
1345 OpRef HvxSelector::packp(ShuffleMask SM, OpRef Va, OpRef Vb,
1346  ResultStack &Results, MutableArrayRef<int> NewMask) {
1347  DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1349  if (SegList.empty())
1350  return OpRef::undef(getPairVT(MVT::i8));
1351 
1352  // If more than two halves are used, bail.
1353  // TODO: be more aggressive here?
1354  unsigned SegCount = SegList.size();
1355  if (SegCount > 2)
1356  return OpRef::fail();
1357 
1358  MVT HalfTy = getSingleVT(MVT::i8);
1359 
1360  OpRef Inp[2] = { Va, Vb };
1361  OpRef Out[2] = { OpRef::undef(HalfTy), OpRef::undef(HalfTy) };
1362 
1363  // Really make sure we have at most 2 vectors used in the mask.
1364  assert(SegCount <= 2);
1365 
1366  for (int I = 0, E = SegList.size(); I != E; ++I) {
1367  unsigned S = SegList[I];
1368  OpRef Op = Inp[S / 2];
1369  Out[I] = (S & 1) ? OpRef::hi(Op) : OpRef::lo(Op);
1370  }
1371 
1372  // NOTE: Using SegList as the packing map here (not SegMap). This works,
1373  // because we're not concerned here about the order of the segments (i.e.
1374  // single vectors) in the output pair. Changing the order of vectors is
1375  // free (as opposed to changing the order of vector halves as in packs),
1376  // and so there is no extra cost added in case the order needs to be
1377  // changed later.
1378  packSegmentMask(SM.Mask, SegList, HwLen, NewMask);
1379  return concats(Out[0], Out[1], Results);
1380 }
1381 
1382 OpRef HvxSelector::vmuxs(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,
1383  ResultStack &Results) {
1384  DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1385  MVT ByteTy = getSingleVT(MVT::i8);
1386  MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
1387  const SDLoc &dl(Results.InpNode);
1388  SDValue B = getVectorConstant(Bytes, dl);
1389  Results.push(Hexagon::V6_vd0, ByteTy, {});
1390  Results.push(Hexagon::V6_veqb, BoolTy, {OpRef(B), OpRef::res(-1)});
1391  Results.push(Hexagon::V6_vmux, ByteTy, {OpRef::res(-1), Vb, Va});
1392  return OpRef::res(Results.top());
1393 }
1394 
1395 OpRef HvxSelector::vmuxp(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,
1396  ResultStack &Results) {
1397  DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1398  size_t S = Bytes.size() / 2;
1399  OpRef L = vmuxs(Bytes.take_front(S), OpRef::lo(Va), OpRef::lo(Vb), Results);
1400  OpRef H = vmuxs(Bytes.drop_front(S), OpRef::hi(Va), OpRef::hi(Vb), Results);
1401  return concats(L, H, Results);
1402 }
1403 
1404 OpRef HvxSelector::shuffs1(ShuffleMask SM, OpRef Va, ResultStack &Results) {
1405  DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1406  unsigned VecLen = SM.Mask.size();
1407  assert(HwLen == VecLen);
1408  (void)VecLen;
1409  assert(all_of(SM.Mask, [this](int M) { return M == -1 || M < int(HwLen); }));
1410 
1411  if (isIdentity(SM.Mask))
1412  return Va;
1413  if (isUndef(SM.Mask))
1414  return OpRef::undef(getSingleVT(MVT::i8));
1415 
1416  unsigned HalfLen = HwLen / 2;
1417  assert(isPowerOf2_32(HalfLen)); // Sanity.
1418 
1419  // Handle special case where the output is the same half of the input
1420  // repeated twice, i.e. if Va = AB, then handle the output of AA or BB.
1421  std::pair<int, unsigned> Strip1 = findStrip(SM.Mask, 1, HalfLen);
1422  if ((Strip1.first & ~HalfLen) == 0 && Strip1.second == HalfLen) {
1423  std::pair<int, unsigned> Strip2 =
1424  findStrip(SM.Mask.drop_front(HalfLen), 1, HalfLen);
1425  if (Strip1 == Strip2) {
1426  const SDLoc &dl(Results.InpNode);
1427  Results.push(Hexagon::A2_tfrsi, MVT::i32, {getConst32(HalfLen, dl)});
1428  Results.push(Hexagon::V6_vshuffvdd, getPairVT(MVT::i8),
1429  {Va, Va, OpRef::res(Results.top())});
1430  OpRef S = OpRef::res(Results.top());
1431  return (Strip1.first == 0) ? OpRef::lo(S) : OpRef::hi(S);
1432  }
1433  }
1434 
1435  OpRef P = perfect(SM, Va, Results);
1436  if (P.isValid())
1437  return P;
1438  return butterfly(SM, Va, Results);
1439 }
1440 
1441 OpRef HvxSelector::shuffs2(ShuffleMask SM, OpRef Va, OpRef Vb,
1442  ResultStack &Results) {
1443  DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1444  if (isUndef(SM.Mask))
1445  return OpRef::undef(getSingleVT(MVT::i8));
1446 
1447  OpRef C = contracting(SM, Va, Vb, Results);
1448  if (C.isValid())
1449  return C;
1450 
1451  int VecLen = SM.Mask.size();
1452  SmallVector<int,128> PackedMask(VecLen);
1453  OpRef P = packs(SM, Va, Vb, Results, PackedMask);
1454  if (P.isValid())
1455  return shuffs1(ShuffleMask(PackedMask), P, Results);
1456 
1457  // TODO: Before we split the mask, try perfect shuffle on concatenated
1458  // operands. This won't work now, because the perfect code does not
1459  // tolerate undefs in the mask.
1460 
1461  SmallVector<int,128> MaskL(VecLen), MaskR(VecLen);
1462  splitMask(SM.Mask, MaskL, MaskR);
1463 
1464  OpRef L = shuffs1(ShuffleMask(MaskL), Va, Results);
1465  OpRef R = shuffs1(ShuffleMask(MaskR), Vb, Results);
1466  if (!L.isValid() || !R.isValid())
1467  return OpRef::fail();
1468 
1469  SmallVector<uint8_t,128> Bytes(VecLen);
1470  for (int I = 0; I != VecLen; ++I) {
1471  if (MaskL[I] != -1)
1472  Bytes[I] = 0xFF;
1473  }
1474  return vmuxs(Bytes, L, R, Results);
1475 }
1476 
1477 OpRef HvxSelector::shuffp1(ShuffleMask SM, OpRef Va, ResultStack &Results) {
1478  DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1479  int VecLen = SM.Mask.size();
1480 
1481  if (isIdentity(SM.Mask))
1482  return Va;
1483  if (isUndef(SM.Mask))
1484  return OpRef::undef(getPairVT(MVT::i8));
1485 
1486  SmallVector<int,128> PackedMask(VecLen);
1487  OpRef P = packs(SM, OpRef::lo(Va), OpRef::hi(Va), Results, PackedMask);
1488  if (P.isValid()) {
1489  ShuffleMask PM(PackedMask);
1490  OpRef E = expanding(PM, P, Results);
1491  if (E.isValid())
1492  return E;
1493 
1494  OpRef L = shuffs1(PM.lo(), P, Results);
1495  OpRef H = shuffs1(PM.hi(), P, Results);
1496  if (L.isValid() && H.isValid())
1497  return concats(L, H, Results);
1498  }
1499 
1500  OpRef R = perfect(SM, Va, Results);
1501  if (R.isValid())
1502  return R;
1503  // TODO commute the mask and try the opposite order of the halves.
1504 
1505  OpRef L = shuffs2(SM.lo(), OpRef::lo(Va), OpRef::hi(Va), Results);
1506  OpRef H = shuffs2(SM.hi(), OpRef::lo(Va), OpRef::hi(Va), Results);
1507  if (L.isValid() && H.isValid())
1508  return concats(L, H, Results);
1509 
1510  return OpRef::fail();
1511 }
1512 
1513 OpRef HvxSelector::shuffp2(ShuffleMask SM, OpRef Va, OpRef Vb,
1514  ResultStack &Results) {
1515  DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1516  if (isUndef(SM.Mask))
1517  return OpRef::undef(getPairVT(MVT::i8));
1518 
1519  int VecLen = SM.Mask.size();
1520  SmallVector<int,256> PackedMask(VecLen);
1521  OpRef P = packp(SM, Va, Vb, Results, PackedMask);
1522  if (P.isValid())
1523  return shuffp1(ShuffleMask(PackedMask), P, Results);
1524 
1525  SmallVector<int,256> MaskL(VecLen), MaskR(VecLen);
1526  splitMask(SM.Mask, MaskL, MaskR);
1527 
1528  OpRef L = shuffp1(ShuffleMask(MaskL), Va, Results);
1529  OpRef R = shuffp1(ShuffleMask(MaskR), Vb, Results);
1530  if (!L.isValid() || !R.isValid())
1531  return OpRef::fail();
1532 
1533  // Mux the results.
1534  SmallVector<uint8_t,256> Bytes(VecLen);
1535  for (int I = 0; I != VecLen; ++I) {
1536  if (MaskL[I] != -1)
1537  Bytes[I] = 0xFF;
1538  }
1539  return vmuxp(Bytes, L, R, Results);
1540 }
1541 
1542 namespace {
1543  struct Deleter : public SelectionDAG::DAGNodeDeletedListener {
1544  template <typename T>
1545  Deleter(SelectionDAG &D, T &C)
1546  : SelectionDAG::DAGNodeDeletedListener(D, [&C] (SDNode *N, SDNode *E) {
1547  C.erase(N);
1548  }) {}
1549  };
1550 
1551  template <typename T>
1552  struct NullifyingVector : public T {
1554  NullifyingVector(T &&V) : T(V) {
1555  for (unsigned i = 0, e = T::size(); i != e; ++i) {
1556  SDNode *&N = T::operator[](i);
1557  Refs[N] = &N;
1558  }
1559  }
1560  void erase(SDNode *N) {
1561  auto F = Refs.find(N);
1562  if (F != Refs.end())
1563  *F->second = nullptr;
1564  }
1565  };
1566 }
1567 
1568 void HvxSelector::select(SDNode *ISelN) {
1569  // What's important here is to select the right set of nodes. The main
1570  // selection algorithm loops over nodes in a topological order, i.e. users
1571  // are visited before their operands.
1572  //
1573  // It is an error to have an unselected node with a selected operand, and
1574  // there is an assertion in the main selector code to enforce that.
1575  //
1576  // Such a situation could occur if we selected a node, which is both a
1577  // subnode of ISelN, and a subnode of an unrelated (and yet unselected)
1578  // node in the DAG.
1579  assert(ISelN->getOpcode() == HexagonISD::ISEL);
1580  SDNode *N0 = ISelN->getOperand(0).getNode();
1581  if (N0->isMachineOpcode()) {
1582  ISel.ReplaceNode(ISelN, N0);
1583  return;
1584  }
1585 
1586  // There could have been nodes created (i.e. inserted into the DAG)
1587  // that are now dead. Remove them, in case they use any of the nodes
1588  // to select (and make them look shared).
1589  DAG.RemoveDeadNodes();
1590 
1591  SetVector<SDNode*> SubNodes, TmpQ;
1592  std::map<SDNode*,unsigned> NumOps;
1593 
1594  // Don't want to select N0 if it's shared with another node, except if
1595  // it's shared with other ISELs.
1596  auto IsISelN = [](SDNode *T) { return T->getOpcode() == HexagonISD::ISEL; };
1597  if (llvm::all_of(N0->uses(), IsISelN))
1598  SubNodes.insert(N0);
1599 
1600  auto InSubNodes = [&SubNodes](SDNode *T) { return SubNodes.count(T); };
1601  for (unsigned I = 0; I != SubNodes.size(); ++I) {
1602  SDNode *S = SubNodes[I];
1603  unsigned OpN = 0;
1604  // Only add subnodes that are only reachable from N0.
1605  for (SDValue Op : S->ops()) {
1606  SDNode *O = Op.getNode();
1607  if (llvm::all_of(O->uses(), InSubNodes)) {
1608  SubNodes.insert(O);
1609  ++OpN;
1610  }
1611  }
1612  NumOps.insert({S, OpN});
1613  if (OpN == 0)
1614  TmpQ.insert(S);
1615  }
1616 
1617  for (unsigned I = 0; I != TmpQ.size(); ++I) {
1618  SDNode *S = TmpQ[I];
1619  for (SDNode *U : S->uses()) {
1620  if (U == ISelN)
1621  continue;
1622  auto F = NumOps.find(U);
1623  assert(F != NumOps.end());
1624  if (F->second > 0 && !--F->second)
1625  TmpQ.insert(F->first);
1626  }
1627  }
1628 
1629  // Remove the marker.
1630  ISel.ReplaceNode(ISelN, N0);
1631 
1632  assert(SubNodes.size() == TmpQ.size());
1633  NullifyingVector<decltype(TmpQ)::vector_type> Queue(TmpQ.takeVector());
1634 
1635  Deleter DUQ(DAG, Queue);
1636  for (SDNode *S : reverse(Queue)) {
1637  if (S == nullptr)
1638  continue;
1639  DEBUG_WITH_TYPE("isel", {dbgs() << "HVX selecting: "; S->dump(&DAG);});
1640  ISel.Select(S);
1641  }
1642 }
1643 
1644 bool HvxSelector::scalarizeShuffle(ArrayRef<int> Mask, const SDLoc &dl,
1645  MVT ResTy, SDValue Va, SDValue Vb,
1646  SDNode *N) {
1647  DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1648  MVT ElemTy = ResTy.getVectorElementType();
1649  assert(ElemTy == MVT::i8);
1650  unsigned VecLen = Mask.size();
1651  bool HavePairs = (2*HwLen == VecLen);
1652  MVT SingleTy = getSingleVT(MVT::i8);
1653 
1654  // The prior attempts to handle this shuffle may have left a bunch of
1655  // dead nodes in the DAG (such as constants). These nodes will be added
1656  // at the end of DAG's node list, which at that point had already been
1657  // sorted topologically. In the main selection loop, the node list is
1658  // traversed backwards from the root node, which means that any new
1659  // nodes (from the end of the list) will not be visited.
1660  // Scalarization will replace the shuffle node with the scalarized
1661  // expression, and if that expression reused any if the leftoever (dead)
1662  // nodes, these nodes would not be selected (since the "local" selection
1663  // only visits nodes that are not in AllNodes).
1664  // To avoid this issue, remove all dead nodes from the DAG now.
1665 // DAG.RemoveDeadNodes();
1666 
1668  LLVMContext &Ctx = *DAG.getContext();
1669  MVT LegalTy = Lower.getTypeToTransformTo(Ctx, ElemTy).getSimpleVT();
1670  for (int I : Mask) {
1671  if (I < 0) {
1672  Ops.push_back(ISel.selectUndef(dl, LegalTy));
1673  continue;
1674  }
1675  SDValue Vec;
1676  unsigned M = I;
1677  if (M < VecLen) {
1678  Vec = Va;
1679  } else {
1680  Vec = Vb;
1681  M -= VecLen;
1682  }
1683  if (HavePairs) {
1684  if (M < HwLen) {
1685  Vec = DAG.getTargetExtractSubreg(Hexagon::vsub_lo, dl, SingleTy, Vec);
1686  } else {
1687  Vec = DAG.getTargetExtractSubreg(Hexagon::vsub_hi, dl, SingleTy, Vec);
1688  M -= HwLen;
1689  }
1690  }
1691  SDValue Idx = DAG.getConstant(M, dl, MVT::i32);
1692  SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, LegalTy, {Vec, Idx});
1693  SDValue L = Lower.LowerOperation(Ex, DAG);
1694  assert(L.getNode());
1695  Ops.push_back(L);
1696  }
1697 
1698  SDValue LV;
1699  if (2*HwLen == VecLen) {
1700  SDValue B0 = DAG.getBuildVector(SingleTy, dl, {Ops.data(), HwLen});
1701  SDValue L0 = Lower.LowerOperation(B0, DAG);
1702  SDValue B1 = DAG.getBuildVector(SingleTy, dl, {Ops.data()+HwLen, HwLen});
1704  // XXX CONCAT_VECTORS is legal for HVX vectors. Legalizing (lowering)
1705  // functions may expect to be called only for illegal operations, so
1706  // make sure that they are not called for legal ones. Develop a better
1707  // mechanism for dealing with this.
1708  LV = DAG.getNode(ISD::CONCAT_VECTORS, dl, ResTy, {L0, L1});
1709  } else {
1710  SDValue BV = DAG.getBuildVector(ResTy, dl, Ops);
1711  LV = Lower.LowerOperation(BV, DAG);
1712  }
1713 
1714  assert(!N->use_empty());
1715  SDValue IS = DAG.getNode(HexagonISD::ISEL, dl, ResTy, LV);
1716  ISel.ReplaceNode(N, IS.getNode());
1717  select(IS.getNode());
1718  DAG.RemoveDeadNodes();
1719  return true;
1720 }
1721 
1722 OpRef HvxSelector::contracting(ShuffleMask SM, OpRef Va, OpRef Vb,
1723  ResultStack &Results) {
1724  DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1725  if (!Va.isValid() || !Vb.isValid())
1726  return OpRef::fail();
1727 
1728  // Contracting shuffles, i.e. instructions that always discard some bytes
1729  // from the operand vectors.
1730  //
1731  // V6_vshuff{e,o}b
1732  // V6_vdealb4w
1733  // V6_vpack{e,o}{b,h}
1734 
1735  int VecLen = SM.Mask.size();
1736  std::pair<int,unsigned> Strip = findStrip(SM.Mask, 1, VecLen);
1737  MVT ResTy = getSingleVT(MVT::i8);
1738 
1739  // The following shuffles only work for bytes and halfwords. This requires
1740  // the strip length to be 1 or 2.
1741  if (Strip.second != 1 && Strip.second != 2)
1742  return OpRef::fail();
1743 
1744  // The patterns for the shuffles, in terms of the starting offsets of the
1745  // consecutive strips (L = length of the strip, N = VecLen):
1746  //
1747  // vpacke: 0, 2L, 4L ... N+0, N+2L, N+4L ... L = 1 or 2
1748  // vpacko: L, 3L, 5L ... N+L, N+3L, N+5L ... L = 1 or 2
1749  //
1750  // vshuffe: 0, N+0, 2L, N+2L, 4L ... L = 1 or 2
1751  // vshuffo: L, N+L, 3L, N+3L, 5L ... L = 1 or 2
1752  //
1753  // vdealb4w: 0, 4, 8 ... 2, 6, 10 ... N+0, N+4, N+8 ... N+2, N+6, N+10 ...
1754 
1755  // The value of the element in the mask following the strip will decide
1756  // what kind of a shuffle this can be.
1757  int NextInMask = SM.Mask[Strip.second];
1758 
1759  // Check if NextInMask could be 2L, 3L or 4, i.e. if it could be a mask
1760  // for vpack or vdealb4w. VecLen > 4, so NextInMask for vdealb4w would
1761  // satisfy this.
1762  if (NextInMask < VecLen) {
1763  // vpack{e,o} or vdealb4w
1764  if (Strip.first == 0 && Strip.second == 1 && NextInMask == 4) {
1765  int N = VecLen;
1766  // Check if this is vdealb4w (L=1).
1767  for (int I = 0; I != N/4; ++I)
1768  if (SM.Mask[I] != 4*I)
1769  return OpRef::fail();
1770  for (int I = 0; I != N/4; ++I)
1771  if (SM.Mask[I+N/4] != 2 + 4*I)
1772  return OpRef::fail();
1773  for (int I = 0; I != N/4; ++I)
1774  if (SM.Mask[I+N/2] != N + 4*I)
1775  return OpRef::fail();
1776  for (int I = 0; I != N/4; ++I)
1777  if (SM.Mask[I+3*N/4] != N+2 + 4*I)
1778  return OpRef::fail();
1779  // Matched mask for vdealb4w.
1780  Results.push(Hexagon::V6_vdealb4w, ResTy, {Vb, Va});
1781  return OpRef::res(Results.top());
1782  }
1783 
1784  // Check if this is vpack{e,o}.
1785  int N = VecLen;
1786  int L = Strip.second;
1787  // Check if the first strip starts at 0 or at L.
1788  if (Strip.first != 0 && Strip.first != L)
1789  return OpRef::fail();
1790  // Examine the rest of the mask.
1791  for (int I = L; I < N; I += L) {
1792  auto S = findStrip(SM.Mask.drop_front(I), 1, N-I);
1793  // Check whether the mask element at the beginning of each strip
1794  // increases by 2L each time.
1795  if (S.first - Strip.first != 2*I)
1796  return OpRef::fail();
1797  // Check whether each strip is of the same length.
1798  if (S.second != unsigned(L))
1799  return OpRef::fail();
1800  }
1801 
1802  // Strip.first == 0 => vpacke
1803  // Strip.first == L => vpacko
1804  assert(Strip.first == 0 || Strip.first == L);
1805  using namespace Hexagon;
1806  NodeTemplate Res;
1807  Res.Opc = Strip.second == 1 // Number of bytes.
1808  ? (Strip.first == 0 ? V6_vpackeb : V6_vpackob)
1809  : (Strip.first == 0 ? V6_vpackeh : V6_vpackoh);
1810  Res.Ty = ResTy;
1811  Res.Ops = { Vb, Va };
1812  Results.push(Res);
1813  return OpRef::res(Results.top());
1814  }
1815 
1816  // Check if this is vshuff{e,o}.
1817  int N = VecLen;
1818  int L = Strip.second;
1819  std::pair<int,unsigned> PrevS = Strip;
1820  bool Flip = false;
1821  for (int I = L; I < N; I += L) {
1822  auto S = findStrip(SM.Mask.drop_front(I), 1, N-I);
1823  if (S.second != PrevS.second)
1824  return OpRef::fail();
1825  int Diff = Flip ? PrevS.first - S.first + 2*L
1826  : S.first - PrevS.first;
1827  if (Diff != N)
1828  return OpRef::fail();
1829  Flip ^= true;
1830  PrevS = S;
1831  }
1832  // Strip.first == 0 => vshuffe
1833  // Strip.first == L => vshuffo
1834  assert(Strip.first == 0 || Strip.first == L);
1835  using namespace Hexagon;
1836  NodeTemplate Res;
1837  Res.Opc = Strip.second == 1 // Number of bytes.
1838  ? (Strip.first == 0 ? V6_vshuffeb : V6_vshuffob)
1839  : (Strip.first == 0 ? V6_vshufeh : V6_vshufoh);
1840  Res.Ty = ResTy;
1841  Res.Ops = { Vb, Va };
1842  Results.push(Res);
1843  return OpRef::res(Results.top());
1844 }
1845 
1846 OpRef HvxSelector::expanding(ShuffleMask SM, OpRef Va, ResultStack &Results) {
1847  DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1848  // Expanding shuffles (using all elements and inserting into larger vector):
1849  //
1850  // V6_vunpacku{b,h} [*]
1851  //
1852  // [*] Only if the upper elements (filled with 0s) are "don't care" in Mask.
1853  //
1854  // Note: V6_vunpacko{b,h} are or-ing the high byte/half in the result, so
1855  // they are not shuffles.
1856  //
1857  // The argument is a single vector.
1858 
1859  int VecLen = SM.Mask.size();
1860  assert(2*HwLen == unsigned(VecLen) && "Expecting vector-pair type");
1861 
1862  std::pair<int,unsigned> Strip = findStrip(SM.Mask, 1, VecLen);
1863 
1864  // The patterns for the unpacks, in terms of the starting offsets of the
1865  // consecutive strips (L = length of the strip, N = VecLen):
1866  //
1867  // vunpacku: 0, -1, L, -1, 2L, -1 ...
1868 
1869  if (Strip.first != 0)
1870  return OpRef::fail();
1871 
1872  // The vunpackus only handle byte and half-word.
1873  if (Strip.second != 1 && Strip.second != 2)
1874  return OpRef::fail();
1875 
1876  int N = VecLen;
1877  int L = Strip.second;
1878 
1879  // First, check the non-ignored strips.
1880  for (int I = 2*L; I < N; I += 2*L) {
1881  auto S = findStrip(SM.Mask.drop_front(I), 1, N-I);
1882  if (S.second != unsigned(L))
1883  return OpRef::fail();
1884  if (2*S.first != I)
1885  return OpRef::fail();
1886  }
1887  // Check the -1s.
1888  for (int I = L; I < N; I += 2*L) {
1889  auto S = findStrip(SM.Mask.drop_front(I), 0, N-I);
1890  if (S.first != -1 || S.second != unsigned(L))
1891  return OpRef::fail();
1892  }
1893 
1894  unsigned Opc = Strip.second == 1 ? Hexagon::V6_vunpackub
1895  : Hexagon::V6_vunpackuh;
1896  Results.push(Opc, getPairVT(MVT::i8), {Va});
1897  return OpRef::res(Results.top());
1898 }
1899 
1900 OpRef HvxSelector::perfect(ShuffleMask SM, OpRef Va, ResultStack &Results) {
1901  DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1902  // V6_vdeal{b,h}
1903  // V6_vshuff{b,h}
1904 
1905  // V6_vshufoe{b,h} those are quivalent to vshuffvdd(..,{1,2})
1906  // V6_vshuffvdd (V6_vshuff)
1907  // V6_dealvdd (V6_vdeal)
1908 
1909  int VecLen = SM.Mask.size();
1910  assert(isPowerOf2_32(VecLen) && Log2_32(VecLen) <= 8);
1911  unsigned LogLen = Log2_32(VecLen);
1912  unsigned HwLog = Log2_32(HwLen);
1913  // The result length must be the same as the length of a single vector,
1914  // or a vector pair.
1915  assert(LogLen == HwLog || LogLen == HwLog+1);
1916  bool HavePairs = LogLen == HwLog+1;
1917 
1918  if (!isPermutation(SM.Mask))
1919  return OpRef::fail();
1920 
1921  SmallVector<unsigned,8> Perm(LogLen);
1922 
1923  // Check if this could be a perfect shuffle, or a combination of perfect
1924  // shuffles.
1925  //
1926  // Consider this permutation (using hex digits to make the ASCII diagrams
1927  // easier to read):
1928  // { 0, 8, 1, 9, 2, A, 3, B, 4, C, 5, D, 6, E, 7, F }.
1929  // This is a "deal" operation: divide the input into two halves, and
1930  // create the output by picking elements by alternating between these two
1931  // halves:
1932  // 0 1 2 3 4 5 6 7 --> 0 8 1 9 2 A 3 B 4 C 5 D 6 E 7 F [*]
1933  // 8 9 A B C D E F
1934  //
1935  // Aside from a few special explicit cases (V6_vdealb, etc.), HVX provides
1936  // a somwehat different mechanism that could be used to perform shuffle/
1937  // deal operations: a 2x2 transpose.
1938  // Consider the halves of inputs again, they can be interpreted as a 2x8
1939  // matrix. A 2x8 matrix can be looked at four 2x2 matrices concatenated
1940  // together. Now, when considering 2 elements at a time, it will be a 2x4
1941  // matrix (with elements 01, 23, 45, etc.), or two 2x2 matrices:
1942  // 01 23 45 67
1943  // 89 AB CD EF
1944  // With groups of 4, this will become a single 2x2 matrix, and so on.
1945  //
1946  // The 2x2 transpose instruction works by transposing each of the 2x2
1947  // matrices (or "sub-matrices"), given a specific group size. For example,
1948  // if the group size is 1 (i.e. each element is its own group), there
1949  // will be four transposes of the four 2x2 matrices that form the 2x8.
1950  // For example, with the inputs as above, the result will be:
1951  // 0 8 2 A 4 C 6 E
1952  // 1 9 3 B 5 D 7 F
1953  // Now, this result can be tranposed again, but with the group size of 2:
1954  // 08 19 4C 5D
1955  // 2A 3B 6E 7F
1956  // If we then transpose that result, but with the group size of 4, we get:
1957  // 0819 2A3B
1958  // 4C5D 6E7F
1959  // If we concatenate these two rows, it will be
1960  // 0 8 1 9 2 A 3 B 4 C 5 D 6 E 7 F
1961  // which is the same as the "deal" [*] above.
1962  //
1963  // In general, a "deal" of individual elements is a series of 2x2 transposes,
1964  // with changing group size. HVX has two instructions:
1965  // Vdd = V6_vdealvdd Vu, Vv, Rt
1966  // Vdd = V6_shufvdd Vu, Vv, Rt
1967  // that perform exactly that. The register Rt controls which transposes are
1968  // going to happen: a bit at position n (counting from 0) indicates that a
1969  // transpose with a group size of 2^n will take place. If multiple bits are
1970  // set, multiple transposes will happen: vdealvdd will perform them starting
1971  // with the largest group size, vshuffvdd will do them in the reverse order.
1972  //
1973  // The main observation is that each 2x2 transpose corresponds to swapping
1974  // columns of bits in the binary representation of the values.
1975  //
1976  // The numbers {3,2,1,0} and the log2 of the number of contiguous 1 bits
1977  // in a given column. The * denote the columns that will be swapped.
1978  // The transpose with the group size 2^n corresponds to swapping columns
1979  // 3 (the highest log) and log2(n):
1980  //
1981  // 3 2 1 0 0 2 1 3 0 2 3 1
1982  // * * * * * *
1983  // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
1984  // 1 0 0 0 1 8 1 0 0 0 8 1 0 0 0 8 1 0 0 0
1985  // 2 0 0 1 0 2 0 0 1 0 1 0 0 0 1 1 0 0 0 1
1986  // 3 0 0 1 1 A 1 0 1 0 9 1 0 0 1 9 1 0 0 1
1987  // 4 0 1 0 0 4 0 1 0 0 4 0 1 0 0 2 0 0 1 0
1988  // 5 0 1 0 1 C 1 1 0 0 C 1 1 0 0 A 1 0 1 0
1989  // 6 0 1 1 0 6 0 1 1 0 5 0 1 0 1 3 0 0 1 1
1990  // 7 0 1 1 1 E 1 1 1 0 D 1 1 0 1 B 1 0 1 1
1991  // 8 1 0 0 0 1 0 0 0 1 2 0 0 1 0 4 0 1 0 0
1992  // 9 1 0 0 1 9 1 0 0 1 A 1 0 1 0 C 1 1 0 0
1993  // A 1 0 1 0 3 0 0 1 1 3 0 0 1 1 5 0 1 0 1
1994  // B 1 0 1 1 B 1 0 1 1 B 1 0 1 1 D 1 1 0 1
1995  // C 1 1 0 0 5 0 1 0 1 6 0 1 1 0 6 0 1 1 0
1996  // D 1 1 0 1 D 1 1 0 1 E 1 1 1 0 E 1 1 1 0
1997  // E 1 1 1 0 7 0 1 1 1 7 0 1 1 1 7 0 1 1 1
1998  // F 1 1 1 1 F 1 1 1 1 F 1 1 1 1 F 1 1 1 1
1999 
2000  // There is one special case that is not a perfect shuffle, but
2001  // can be turned into one easily: when the shuffle operates on
2002  // a vector pair, but the two vectors in the pair are swapped.
2003  // The code below that identifies perfect shuffles will reject
2004  // it, unless the order is reversed.
2005  SmallVector<int,128> MaskStorage(SM.Mask.begin(), SM.Mask.end());
2006  bool InvertedPair = false;
2007  if (HavePairs && SM.Mask[0] >= int(HwLen)) {
2008  for (int i = 0, e = SM.Mask.size(); i != e; ++i) {
2009  int M = SM.Mask[i];
2010  MaskStorage[i] = M >= int(HwLen) ? M-HwLen : M+HwLen;
2011  }
2012  InvertedPair = true;
2013  }
2014  ArrayRef<int> LocalMask(MaskStorage);
2015 
2016  auto XorPow2 = [] (ArrayRef<int> Mask, unsigned Num) {
2017  unsigned X = Mask[0] ^ Mask[Num/2];
2018  // Check that the first half has the X's bits clear.
2019  if ((Mask[0] & X) != 0)
2020  return 0u;
2021  for (unsigned I = 1; I != Num/2; ++I) {
2022  if (unsigned(Mask[I] ^ Mask[I+Num/2]) != X)
2023  return 0u;
2024  if ((Mask[I] & X) != 0)
2025  return 0u;
2026  }
2027  return X;
2028  };
2029 
2030  // Create a vector of log2's for each column: Perm[i] corresponds to
2031  // the i-th bit (lsb is 0).
2032  assert(VecLen > 2);
2033  for (unsigned I = VecLen; I >= 2; I >>= 1) {
2034  // Examine the initial segment of Mask of size I.
2035  unsigned X = XorPow2(LocalMask, I);
2036  if (!isPowerOf2_32(X))
2037  return OpRef::fail();
2038  // Check the other segments of Mask.
2039  for (int J = I; J < VecLen; J += I) {
2040  if (XorPow2(LocalMask.slice(J, I), I) != X)
2041  return OpRef::fail();
2042  }
2043  Perm[Log2_32(X)] = Log2_32(I)-1;
2044  }
2045 
2046  // Once we have Perm, represent it as cycles. Denote the maximum log2
2047  // (equal to log2(VecLen)-1) as M. The cycle containing M can then be
2048  // written as (M a1 a2 a3 ... an). That cycle can be broken up into
2049  // simple swaps as (M a1)(M a2)(M a3)...(M an), with the composition
2050  // order being from left to right. Any (contiguous) segment where the
2051  // values ai, ai+1...aj are either all increasing or all decreasing,
2052  // can be implemented via a single vshuffvdd/vdealvdd respectively.
2053  //
2054  // If there is a cycle (a1 a2 ... an) that does not involve M, it can
2055  // be written as (M an)(a1 a2 ... an)(M a1). The first two cycles can
2056  // then be folded to get (M a1 a2 ... an)(M a1), and the above procedure
2057  // can be used to generate a sequence of vshuffvdd/vdealvdd.
2058  //
2059  // Example:
2060  // Assume M = 4 and consider a permutation (0 1)(2 3). It can be written
2061  // as (4 0 1)(4 0) composed with (4 2 3)(4 2), or simply
2062  // (4 0 1)(4 0)(4 2 3)(4 2).
2063  // It can then be expanded into swaps as
2064  // (4 0)(4 1)(4 0)(4 2)(4 3)(4 2),
2065  // and broken up into "increasing" segments as
2066  // [(4 0)(4 1)] [(4 0)(4 2)(4 3)] [(4 2)].
2067  // This is equivalent to
2068  // (4 0 1)(4 0 2 3)(4 2),
2069  // which can be implemented as 3 vshufvdd instructions.
2070 
2071  using CycleType = SmallVector<unsigned,8>;
2072  std::set<CycleType> Cycles;
2073  std::set<unsigned> All;
2074 
2075  for (unsigned I : Perm)
2076  All.insert(I);
2077 
2078  // If the cycle contains LogLen-1, move it to the front of the cycle.
2079  // Otherwise, return the cycle unchanged.
2080  auto canonicalize = [LogLen](const CycleType &C) -> CycleType {
2081  unsigned LogPos, N = C.size();
2082  for (LogPos = 0; LogPos != N; ++LogPos)
2083  if (C[LogPos] == LogLen-1)
2084  break;
2085  if (LogPos == N)
2086  return C;
2087 
2088  CycleType NewC(C.begin()+LogPos, C.end());
2089  NewC.append(C.begin(), C.begin()+LogPos);
2090  return NewC;
2091  };
2092 
2093  auto pfs = [](const std::set<CycleType> &Cs, unsigned Len) {
2094  // Ordering: shuff: 5 0 1 2 3 4, deal: 5 4 3 2 1 0 (for Log=6),
2095  // for bytes zero is included, for halfwords is not.
2096  if (Cs.size() != 1)
2097  return 0u;
2098  const CycleType &C = *Cs.begin();
2099  if (C[0] != Len-1)
2100  return 0u;
2101  int D = Len - C.size();
2102  if (D != 0 && D != 1)
2103  return 0u;
2104 
2105  bool IsDeal = true, IsShuff = true;
2106  for (unsigned I = 1; I != Len-D; ++I) {
2107  if (C[I] != Len-1-I)
2108  IsDeal = false;
2109  if (C[I] != I-(1-D)) // I-1, I
2110  IsShuff = false;
2111  }
2112  // At most one, IsDeal or IsShuff, can be non-zero.
2113  assert(!(IsDeal || IsShuff) || IsDeal != IsShuff);
2114  static unsigned Deals[] = { Hexagon::V6_vdealb, Hexagon::V6_vdealh };
2115  static unsigned Shufs[] = { Hexagon::V6_vshuffb, Hexagon::V6_vshuffh };
2116  return IsDeal ? Deals[D] : (IsShuff ? Shufs[D] : 0);
2117  };
2118 
2119  while (!All.empty()) {
2120  unsigned A = *All.begin();
2121  All.erase(A);
2122  CycleType C;
2123  C.push_back(A);
2124  for (unsigned B = Perm[A]; B != A; B = Perm[B]) {
2125  C.push_back(B);
2126  All.erase(B);
2127  }
2128  if (C.size() <= 1)
2129  continue;
2130  Cycles.insert(canonicalize(C));
2131  }
2132 
2133  MVT SingleTy = getSingleVT(MVT::i8);
2135 
2136  // Recognize patterns for V6_vdeal{b,h} and V6_vshuff{b,h}.
2137  if (unsigned(VecLen) == HwLen) {
2138  if (unsigned SingleOpc = pfs(Cycles, LogLen)) {
2139  Results.push(SingleOpc, SingleTy, {Va});
2140  return OpRef::res(Results.top());
2141  }
2142  }
2143 
2144  // From the cycles, construct the sequence of values that will
2145  // then form the control values for vdealvdd/vshuffvdd, i.e.
2146  // (M a1 a2)(M a3 a4 a5)... -> a1 a2 a3 a4 a5
2147  // This essentially strips the M value from the cycles where
2148  // it's present, and performs the insertion of M (then stripping)
2149  // for cycles without M (as described in an earlier comment).
2150  SmallVector<unsigned,8> SwapElems;
2151  // When the input is extended (i.e. single vector becomes a pair),
2152  // this is done by using an "undef" vector as the second input.
2153  // However, then we get
2154  // input 1: GOODBITS
2155  // input 2: ........
2156  // but we need
2157  // input 1: ....BITS
2158  // input 2: ....GOOD
2159  // Then at the end, this needs to be undone. To accomplish this,
2160  // artificially add "LogLen-1" at both ends of the sequence.
2161  if (!HavePairs)
2162  SwapElems.push_back(LogLen-1);
2163  for (const CycleType &C : Cycles) {
2164  // Do the transformation: (a1..an) -> (M a1..an)(M a1).
2165  unsigned First = (C[0] == LogLen-1) ? 1 : 0;
2166  SwapElems.append(C.begin()+First, C.end());
2167  if (First == 0)
2168  SwapElems.push_back(C[0]);
2169  }
2170  if (!HavePairs)
2171  SwapElems.push_back(LogLen-1);
2172 
2173  const SDLoc &dl(Results.InpNode);
2174  OpRef Arg = HavePairs ? Va
2175  : concats(Va, OpRef::undef(SingleTy), Results);
2176  if (InvertedPair)
2177  Arg = concats(OpRef::hi(Arg), OpRef::lo(Arg), Results);
2178 
2179  for (unsigned I = 0, E = SwapElems.size(); I != E; ) {
2180  bool IsInc = I == E-1 || SwapElems[I] < SwapElems[I+1];
2181  unsigned S = (1u << SwapElems[I]);
2182  if (I < E-1) {
2183  while (++I < E-1 && IsInc == (SwapElems[I] < SwapElems[I+1]))
2184  S |= 1u << SwapElems[I];
2185  // The above loop will not add a bit for the final SwapElems[I+1],
2186  // so add it here.
2187  S |= 1u << SwapElems[I];
2188  }
2189  ++I;
2190 
2191  NodeTemplate Res;
2192  Results.push(Hexagon::A2_tfrsi, MVT::i32, {getConst32(S, dl)});
2193  Res.Opc = IsInc ? Hexagon::V6_vshuffvdd : Hexagon::V6_vdealvdd;
2194  Res.Ty = PairTy;
2195  Res.Ops = { OpRef::hi(Arg), OpRef::lo(Arg), OpRef::res(-1) };
2196  Results.push(Res);
2197  Arg = OpRef::res(Results.top());
2198  }
2199 
2200  return HavePairs ? Arg : OpRef::lo(Arg);
2201 }
2202 
2203 OpRef HvxSelector::butterfly(ShuffleMask SM, OpRef Va, ResultStack &Results) {
2204  DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
2205  // Butterfly shuffles.
2206  //
2207  // V6_vdelta
2208  // V6_vrdelta
2209  // V6_vror
2210 
2211  // The assumption here is that all elements picked by Mask are in the
2212  // first operand to the vector_shuffle. This assumption is enforced
2213  // by the caller.
2214 
2215  MVT ResTy = getSingleVT(MVT::i8);
2216  PermNetwork::Controls FC, RC;
2217  const SDLoc &dl(Results.InpNode);
2218  int VecLen = SM.Mask.size();
2219 
2220  for (int M : SM.Mask) {
2221  if (M != -1 && M >= VecLen)
2222  return OpRef::fail();
2223  }
2224 
2225  // Try the deltas/benes for both single vectors and vector pairs.
2226  ForwardDeltaNetwork FN(SM.Mask);
2227  if (FN.run(FC)) {
2228  SDValue Ctl = getVectorConstant(FC, dl);
2229  Results.push(Hexagon::V6_vdelta, ResTy, {Va, OpRef(Ctl)});
2230  return OpRef::res(Results.top());
2231  }
2232 
2233  // Try reverse delta.
2234  ReverseDeltaNetwork RN(SM.Mask);
2235  if (RN.run(RC)) {
2236  SDValue Ctl = getVectorConstant(RC, dl);
2237  Results.push(Hexagon::V6_vrdelta, ResTy, {Va, OpRef(Ctl)});
2238  return OpRef::res(Results.top());
2239  }
2240 
2241  // Do Benes.
2242  BenesNetwork BN(SM.Mask);
2243  if (BN.run(FC, RC)) {
2244  SDValue CtlF = getVectorConstant(FC, dl);
2245  SDValue CtlR = getVectorConstant(RC, dl);
2246  Results.push(Hexagon::V6_vdelta, ResTy, {Va, OpRef(CtlF)});
2247  Results.push(Hexagon::V6_vrdelta, ResTy,
2248  {OpRef::res(-1), OpRef(CtlR)});
2249  return OpRef::res(Results.top());
2250  }
2251 
2252  return OpRef::fail();
2253 }
2254 
2255 SDValue HvxSelector::getConst32(int Val, const SDLoc &dl) {
2256  return DAG.getTargetConstant(Val, dl, MVT::i32);
2257 }
2258 
2259 SDValue HvxSelector::getVectorConstant(ArrayRef<uint8_t> Data,
2260  const SDLoc &dl) {
2262  for (uint8_t C : Data)
2263  Elems.push_back(DAG.getConstant(C, dl, MVT::i8));
2264  MVT VecTy = MVT::getVectorVT(MVT::i8, Data.size());
2265  SDValue BV = DAG.getBuildVector(VecTy, dl, Elems);
2266  SDValue LV = Lower.LowerOperation(BV, DAG);
2267  DAG.RemoveDeadNode(BV.getNode());
2268  return DAG.getNode(HexagonISD::ISEL, dl, VecTy, LV);
2269 }
2270 
2272  DEBUG_WITH_TYPE("isel", {
2273  dbgs() << "Starting " << __func__ << " on node:\n";
2274  N->dump(&DAG);
2275  });
2276  MVT ResTy = N->getValueType(0).getSimpleVT();
2277  // Assume that vector shuffles operate on vectors of bytes.
2278  assert(ResTy.isVector() && ResTy.getVectorElementType() == MVT::i8);
2279 
2280  auto *SN = cast<ShuffleVectorSDNode>(N);
2281  std::vector<int> Mask(SN->getMask().begin(), SN->getMask().end());
2282  // This shouldn't really be necessary. Is it?
2283  for (int &Idx : Mask)
2284  if (Idx != -1 && Idx < 0)
2285  Idx = -1;
2286 
2287  unsigned VecLen = Mask.size();
2288  bool HavePairs = (2*HwLen == VecLen);
2289  assert(ResTy.getSizeInBits() / 8 == VecLen);
2290 
2291  // Vd = vector_shuffle Va, Vb, Mask
2292  //
2293 
2294  bool UseLeft = false, UseRight = false;
2295  for (unsigned I = 0; I != VecLen; ++I) {
2296  if (Mask[I] == -1)
2297  continue;
2298  unsigned Idx = Mask[I];
2299  assert(Idx < 2*VecLen);
2300  if (Idx < VecLen)
2301  UseLeft = true;
2302  else
2303  UseRight = true;
2304  }
2305 
2306  DEBUG_WITH_TYPE("isel", {
2307  dbgs() << "VecLen=" << VecLen << " HwLen=" << HwLen << " UseLeft="
2308  << UseLeft << " UseRight=" << UseRight << " HavePairs="
2309  << HavePairs << '\n';
2310  });
2311  // If the mask is all -1's, generate "undef".
2312  if (!UseLeft && !UseRight) {
2313  ISel.ReplaceNode(N, ISel.selectUndef(SDLoc(SN), ResTy).getNode());
2314  return;
2315  }
2316 
2317  SDValue Vec0 = N->getOperand(0);
2318  SDValue Vec1 = N->getOperand(1);
2319  ResultStack Results(SN);
2320  Results.push(TargetOpcode::COPY, ResTy, {Vec0});
2321  Results.push(TargetOpcode::COPY, ResTy, {Vec1});
2322  OpRef Va = OpRef::res(Results.top()-1);
2323  OpRef Vb = OpRef::res(Results.top());
2324 
2325  OpRef Res = !HavePairs ? shuffs2(ShuffleMask(Mask), Va, Vb, Results)
2326  : shuffp2(ShuffleMask(Mask), Va, Vb, Results);
2327 
2328  bool Done = Res.isValid();
2329  if (Done) {
2330  // Make sure that Res is on the stack before materializing.
2331  Results.push(TargetOpcode::COPY, ResTy, {Res});
2332  materialize(Results);
2333  } else {
2334  Done = scalarizeShuffle(Mask, SDLoc(N), ResTy, Vec0, Vec1, N);
2335  }
2336 
2337  if (!Done) {
2338 #ifndef NDEBUG
2339  dbgs() << "Unhandled shuffle:\n";
2340  SN->dumpr(&DAG);
2341 #endif
2342  llvm_unreachable("Failed to select vector shuffle");
2343  }
2344 }
2345 
2347  // If this is a rotation by less than 8, use V6_valignbi.
2348  MVT Ty = N->getValueType(0).getSimpleVT();
2349  const SDLoc &dl(N);
2350  SDValue VecV = N->getOperand(0);
2351  SDValue RotV = N->getOperand(1);
2352  SDNode *NewN = nullptr;
2353 
2354  if (auto *CN = dyn_cast<ConstantSDNode>(RotV.getNode())) {
2355  unsigned S = CN->getZExtValue() % HST.getVectorLength();
2356  if (S == 0) {
2357  NewN = VecV.getNode();
2358  } else if (isUInt<3>(S)) {
2359  NewN = DAG.getMachineNode(Hexagon::V6_valignbi, dl, Ty,
2360  {VecV, VecV, getConst32(S, dl)});
2361  }
2362  }
2363 
2364  if (!NewN)
2365  NewN = DAG.getMachineNode(Hexagon::V6_vror, dl, Ty, {VecV, RotV});
2366 
2367  ISel.ReplaceNode(N, NewN);
2368 }
2369 
2371  SDValue Vv = N->getOperand(0);
2372  SDValue Vu = N->getOperand(1);
2373  SDValue Rt = N->getOperand(2);
2374  SDNode *NewN = DAG.getMachineNode(Hexagon::V6_valignb, SDLoc(N),
2375  N->getValueType(0), {Vv, Vu, Rt});
2376  ISel.ReplaceNode(N, NewN);
2377  DAG.RemoveDeadNode(N);
2378 }
2379 
2380 void HexagonDAGToDAGISel::SelectHvxShuffle(SDNode *N) {
2381  HvxSelector(*this, *CurDAG).selectShuffle(N);
2382 }
2383 
2384 void HexagonDAGToDAGISel::SelectHvxRor(SDNode *N) {
2385  HvxSelector(*this, *CurDAG).selectRor(N);
2386 }
2387 
2388 void HexagonDAGToDAGISel::SelectHvxVAlign(SDNode *N) {
2389  HvxSelector(*this, *CurDAG).selectVAlign(N);
2390 }
2391 
2393  const SDLoc &dl(N);
2394  SDValue Chain = N->getOperand(0);
2395  SDValue Address = N->getOperand(2);
2396  SDValue Predicate = N->getOperand(3);
2397  SDValue Base = N->getOperand(4);
2398  SDValue Modifier = N->getOperand(5);
2399  SDValue Offset = N->getOperand(6);
2400 
2401  unsigned Opcode;
2402  unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
2403  switch (IntNo) {
2404  default:
2405  llvm_unreachable("Unexpected HVX gather intrinsic.");
2406  case Intrinsic::hexagon_V6_vgathermhq:
2407  case Intrinsic::hexagon_V6_vgathermhq_128B:
2408  Opcode = Hexagon::V6_vgathermhq_pseudo;
2409  break;
2410  case Intrinsic::hexagon_V6_vgathermwq:
2411  case Intrinsic::hexagon_V6_vgathermwq_128B:
2412  Opcode = Hexagon::V6_vgathermwq_pseudo;
2413  break;
2414  case Intrinsic::hexagon_V6_vgathermhwq:
2415  case Intrinsic::hexagon_V6_vgathermhwq_128B:
2416  Opcode = Hexagon::V6_vgathermhwq_pseudo;
2417  break;
2418  }
2419 
2420  SDVTList VTs = CurDAG->getVTList(MVT::Other);
2421  SDValue Ops[] = { Address, Predicate, Base, Modifier, Offset, Chain };
2422  SDNode *Result = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
2423 
2424  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2425  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
2426 
2427  ReplaceNode(N, Result);
2428 }
2429 
2431  const SDLoc &dl(N);
2432  SDValue Chain = N->getOperand(0);
2433  SDValue Address = N->getOperand(2);
2434  SDValue Base = N->getOperand(3);
2435  SDValue Modifier = N->getOperand(4);
2436  SDValue Offset = N->getOperand(5);
2437 
2438  unsigned Opcode;
2439  unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
2440  switch (IntNo) {
2441  default:
2442  llvm_unreachable("Unexpected HVX gather intrinsic.");
2443  case Intrinsic::hexagon_V6_vgathermh:
2444  case Intrinsic::hexagon_V6_vgathermh_128B:
2445  Opcode = Hexagon::V6_vgathermh_pseudo;
2446  break;
2447  case Intrinsic::hexagon_V6_vgathermw:
2448  case Intrinsic::hexagon_V6_vgathermw_128B:
2449  Opcode = Hexagon::V6_vgathermw_pseudo;
2450  break;
2451  case Intrinsic::hexagon_V6_vgathermhw:
2452  case Intrinsic::hexagon_V6_vgathermhw_128B:
2453  Opcode = Hexagon::V6_vgathermhw_pseudo;
2454  break;
2455  }
2456 
2457  SDVTList VTs = CurDAG->getVTList(MVT::Other);
2458  SDValue Ops[] = { Address, Base, Modifier, Offset, Chain };
2459  SDNode *Result = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
2460 
2461  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2462  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
2463 
2464  ReplaceNode(N, Result);
2465 }
2466 
2468  unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
2469  SDNode *Result;
2470  switch (IID) {
2471  case Intrinsic::hexagon_V6_vaddcarry: {
2472  std::array<SDValue, 3> Ops = {
2473  {N->getOperand(1), N->getOperand(2), N->getOperand(3)}};
2474  SDVTList VTs = CurDAG->getVTList(MVT::v16i32, MVT::v64i1);
2475  Result = CurDAG->getMachineNode(Hexagon::V6_vaddcarry, SDLoc(N), VTs, Ops);
2476  break;
2477  }
2478  case Intrinsic::hexagon_V6_vaddcarry_128B: {
2479  std::array<SDValue, 3> Ops = {
2480  {N->getOperand(1), N->getOperand(2), N->getOperand(3)}};
2481  SDVTList VTs = CurDAG->getVTList(MVT::v32i32, MVT::v128i1);
2482  Result = CurDAG->getMachineNode(Hexagon::V6_vaddcarry, SDLoc(N), VTs, Ops);
2483  break;
2484  }
2485  case Intrinsic::hexagon_V6_vsubcarry: {
2486  std::array<SDValue, 3> Ops = {
2487  {N->getOperand(1), N->getOperand(2), N->getOperand(3)}};
2488  SDVTList VTs = CurDAG->getVTList(MVT::v16i32, MVT::v64i1);
2489  Result = CurDAG->getMachineNode(Hexagon::V6_vsubcarry, SDLoc(N), VTs, Ops);
2490  break;
2491  }
2492  case Intrinsic::hexagon_V6_vsubcarry_128B: {
2493  std::array<SDValue, 3> Ops = {
2494  {N->getOperand(1), N->getOperand(2), N->getOperand(3)}};
2495  SDVTList VTs = CurDAG->getVTList(MVT::v32i32, MVT::v128i1);
2496  Result = CurDAG->getMachineNode(Hexagon::V6_vsubcarry, SDLoc(N), VTs, Ops);
2497  break;
2498  }
2499  default:
2500  llvm_unreachable("Unexpected HVX dual output intrinsic.");
2501  }
2502  ReplaceUses(N, Result);
2503  ReplaceUses(SDValue(N, 0), SDValue(Result, 0));
2504  ReplaceUses(SDValue(N, 1), SDValue(Result, 1));
2505  CurDAG->RemoveDeadNode(N);
2506 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
i
i
Definition: README.txt:29
llvm::alignTo
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:148
llvm::HexagonSubtarget::getVectorLength
unsigned getVectorLength() const
Definition: HexagonSubtarget.h:274
llvm::MVT::getVectorElementType
MVT getVectorElementType() const
Definition: MachineValueType.h:519
B1
LLVM_DUMP_METHOD
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:491
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::MVT::v128i1
@ v128i1
Definition: MachineValueType.h:71
Pass
print lazy value Lazy Value Info Printer Pass
Definition: LazyValueInfo.cpp:1966
llvm::SDLoc
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Definition: SelectionDAGNodes.h:1086
llvm::HvxSelector::Lower
const HexagonTargetLowering & Lower
Definition: HexagonISelDAGToDAGHVX.cpp:812
print
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
Definition: ArchiveWriter.cpp:147
llvm::NodeSet::end
iterator end()
Definition: MachinePipeliner.h:422
llvm::Function::empty
bool empty() const
Definition: Function.h:739
T
llvm::SDValue::getNode
SDNode * getNode() const
get the SDNode which holds the desired result
Definition: SelectionDAGNodes.h:152
llvm::Function::print
void print(raw_ostream &OS, AssemblyAnnotationWriter *AAW=nullptr, bool ShouldPreserveUseListOrder=false, bool IsForDebug=false) const
Print the function to an output stream with an optional AssemblyAnnotationWriter.
Definition: AsmWriter.cpp:4493
llvm::ISD::CONCAT_VECTORS
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:519
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
llvm::BitVector::set
BitVector & set()
Definition: BitVector.h:343
isIdentity
static bool isIdentity(ArrayRef< int > Mask)
Definition: HexagonISelDAGToDAGHVX.cpp:919
getHexagonLowering
static const HexagonTargetLowering & getHexagonLowering(SelectionDAG &G)
Definition: HexagonISelDAGToDAGHVX.cpp:803
llvm::SetVector::size
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:77
llvm::HvxSelector::HwLen
const unsigned HwLen
Definition: HexagonISelDAGToDAGHVX.cpp:816
Ignore
ReachingDefAnalysis InstSet InstSet & Ignore
Definition: ARMLowOverheadLoops.cpp:545
llvm::SmallVector< unsigned, 4 >
llvm::RISCVFenceField::W
@ W
Definition: RISCVBaseInfo.h:199
llvm::HexagonTargetLowering::LowerOperation
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
Definition: HexagonISelLowering.cpp:3154
llvm::HvxSelector::selectRor
void selectRor(SDNode *N)
Definition: HexagonISelDAGToDAGHVX.cpp:2346
PairTy
std::pair< MCSymbol *, MachineModuleInfoImpl::StubValueTy > PairTy
Definition: MachineModuleInfoImpls.cpp:30
llvm::MipsISD::Lo
@ Lo
Definition: MipsISelLowering.h:79
llvm::HvxSelector::selectShuffle
void selectShuffle(SDNode *N)
Definition: HexagonISelDAGToDAGHVX.cpp:2271
llvm::BitVector::set_bits
iterator_range< const_set_bits_iterator > set_bits() const
Definition: BitVector.h:132
R600_InstFlag::FC
@ FC
Definition: R600Defines.h:32
push
static void push(SmallVectorImpl< uint64_t > &R, StringRef Str)
Definition: BitstreamRemarkSerializer.cpp:23
llvm::SDNode
Represents one node in the SelectionDAG.
Definition: SelectionDAGNodes.h:455
llvm::MachO::Invalid
@ Invalid
Invalid file type.
Definition: InterfaceFile.h:59
llvm::MemOp
Definition: TargetLowering.h:112
llvm::coro::ABI::Switch
@ Switch
The "resume-switch" lowering, where there are separate resume and destroy functions that are shared b...
Shift
bool Shift
Definition: README.txt:468
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:333
llvm::sys::path::end
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:233
llvm::MachineMemOperand
A description of a memory reference used in the backend.
Definition: MachineMemOperand.h:128
llvm::sys::path::begin
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:224
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:81
Results
Function Alias Analysis Results
Definition: AliasAnalysis.cpp:847
llvm::HexagonDAGToDAGISel
Definition: HexagonISelDAGToDAG.h:29
llvm::SelectionDAG::RemoveDeadNodes
void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
Definition: SelectionDAG.cpp:830
llvm::dump
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
Definition: SparseBitVector.h:876
HexagonTargetMachine.h
llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:491
getHexagonSubtarget
static const HexagonSubtarget & getHexagonSubtarget(SelectionDAG &G)
Definition: HexagonISelDAGToDAGHVX.cpp:806
LLVM_ATTRIBUTE_UNUSED
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:188
BaseType
llvm::BitmaskEnumDetail::Mask
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
Coloring
Stack Slot Coloring
Definition: StackSlotColoring.cpp:140
llvm::Data
@ Data
Definition: SIMachineScheduler.h:55
llvm::SelectionDAG::getContext
LLVMContext * getContext() const
Definition: SelectionDAG.h:447
llvm::AMDGPU::HSAMD::ValueKind::Queue
@ Queue
llvm::ArrayRef::data
const T * data() const
Definition: ArrayRef.h:162
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:198
llvm::MipsISD::Hi
@ Hi
Definition: MipsISelLowering.h:75
isUndef
static bool isUndef(ArrayRef< int > Mask)
Definition: HexagonISelDAGToDAGHVX.cpp:912
HexagonISelDAGToDAG.h
llvm::MVT::SimpleValueType
SimpleValueType
Definition: MachineValueType.h:33
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:206
CommandLine.h
getInputSegmentList
static SmallVector< unsigned, 4 > getInputSegmentList(ShuffleMask SM, unsigned SegLen)
Definition: HexagonISelDAGToDAGHVX.cpp:928
llvm::MVT::i1
@ i1
Definition: MachineValueType.h:43
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1551
llvm::SDNode::getOpcode
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
Definition: SelectionDAGNodes.h:629
DEBUG_WITH_TYPE
#define DEBUG_WITH_TYPE(TYPE, X)
DEBUG_WITH_TYPE macro - This macro should be used by passes to emit debug information.
Definition: Debug.h:64
llvm::TargetInstrInfo
TargetInstrInfo - Interface to description of machine instruction set.
Definition: TargetInstrInfo.h:97
llvm::MutableArrayRef
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:307
llvm::SelectionDAG
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:216
f
Itanium Name Demangler i e convert the string _Z1fv into f()". You can also use the CRTP base ManglingParser to perform some simple analysis on the mangled name
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::SmallVectorImpl::append
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:648
llvm::HexagonISD::ISEL
@ ISEL
Definition: HexagonISelLowering.h:95
llvm::RegState::Undef
@ Undef
Value of the register doesn't matter.
Definition: MachineInstrBuilder.h:52
llvm::EVT
Extended Value Type.
Definition: ValueTypes.h:35
Intrinsics.h
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::ARM_PROC::A
@ A
Definition: ARMBaseInfo.h:34
llvm::SelectionDAG::getConstant
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
Definition: SelectionDAG.cpp:1373
int
Clang compiles this i1 i64 store i64 i64 store i64 i64 store i64 i64 store i64 align Which gets codegen d xmm0 movaps rbp movaps rbp movaps rbp movaps rbp rbp rbp rbp rbp It would be better to have movq s of instead of the movaps s LLVM produces ret int
Definition: README.txt:536
llvm::MVT::SimpleTy
SimpleValueType SimpleTy
Definition: MachineValueType.h:321
llvm::MVT::v64i1
@ v64i1
Definition: MachineValueType.h:70
llvm::AArch64CC::HS
@ HS
Definition: AArch64BaseInfo.h:257
llvm::SelectionDAGISel::ReplaceNode
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
Definition: SelectionDAGISel.h:227
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:129
llvm::Log2_32
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:596
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
First
into llvm powi allowing the code generator to produce balanced multiplication trees First
Definition: README.txt:54
llvm::SDNode::uses
iterator_range< use_iterator > uses()
Definition: SelectionDAGNodes.h:789
Options
const char LLVMTargetMachineRef LLVMPassBuilderOptionsRef Options
Definition: PassBuilderBindings.cpp:48
llvm::SelectionDAG::DAGNodeDeletedListener
Definition: SelectionDAG.h:314
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
llvm::SmallVectorImpl::resize
void resize(size_type N)
Definition: SmallVector.h:606
llvm::operator<<
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:230
llvm::BitVector
Definition: BitVector.h:74
llvm::NodeSet
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
Definition: MachinePipeliner.h:311
llvm::None
const NoneType None
Definition: None.h:23
llvm::TargetLoweringBase::getTypeToTransformTo
EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
Definition: TargetLowering.h:942
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Hexagon.h
llvm::HvxSelector
Definition: HexagonISelDAGToDAGHVX.cpp:811
llvm::NodeSet::begin
iterator begin()
Definition: MachinePipeliner.h:421
G
const DataFlowGraph & G
Definition: RDFGraph.cpp:202
llvm::RISCVFenceField::O
@ O
Definition: RISCVBaseInfo.h:197
llvm::PPC::Predicate
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
llvm::SignExtend32
constexpr int32_t SignExtend32(uint32_t X)
Sign-extend the number in the bottom B bits of X to a 32-bit integer.
Definition: MathExtras.h:761
getOutputSegmentMap
static SmallVector< unsigned, 4 > getOutputSegmentMap(ShuffleMask SM, unsigned SegLen)
Definition: HexagonISelDAGToDAGHVX.cpp:949
llvm::SelectionDAG::RemoveDeadNode
void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
Definition: SelectionDAG.cpp:884
llvm::ArrayRef::drop_front
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
Definition: ArrayRef.h:204
Index
uint32_t Index
Definition: ELFObjHandler.cpp:84
llvm::HexagonDAGToDAGISel::SelectV65GatherPred
void SelectV65GatherPred(SDNode *N)
Definition: HexagonISelDAGToDAGHVX.cpp:2392
D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::DenseMap
Definition: DenseMap.h:714
llvm::ISD::EXTRACT_VECTOR_ELT
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:511
fail
static void fail(const SDLoc &DL, SelectionDAG &DAG, const Twine &Msg)
Definition: BPFISelLowering.cpp:38
llvm::SDNode::getOperand
const SDValue & getOperand(unsigned Num) const
Definition: SelectionDAGNodes.h:904
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::SelectionDAG::getNode
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
Definition: SelectionDAG.cpp:8316
size
i< reg-> size
Definition: README.txt:166
llvm::HighlightColor::Address
@ Address
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
llvm::MVT::getVectorNumElements
unsigned getVectorNumElements() const
Definition: MachineValueType.h:850
llvm::MVT::i8
@ i8
Definition: MachineValueType.h:44
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::HvxSelector::HST
const HexagonSubtarget & HST
Definition: HexagonISelDAGToDAGHVX.cpp:815
llvm::MVT::Other
@ Other
Definition: MachineValueType.h:42
llvm::MVT::getSizeInBits
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
Definition: MachineValueType.h:860
llvm::isUInt< 8 >
constexpr bool isUInt< 8 >(uint64_t x)
Definition: MathExtras.h:405
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:840
llvm::SelectionDAG::getMachineNode
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
Definition: SelectionDAG.cpp:8989
llvm::MVT
Machine Value Type.
Definition: MachineValueType.h:31
llvm::SDNode::dumpr
void dumpr() const
Dump (recursively) this node and its use-def subgraph.
Definition: SelectionDAGDumper.cpp:996
HexagonISelLowering.h
packSegmentMask
static void packSegmentMask(ArrayRef< int > Mask, ArrayRef< unsigned > OutSegMap, unsigned SegLen, MutableArrayRef< int > PackedMask)
Definition: HexagonISelDAGToDAGHVX.cpp:984
llvm::SDNode::isMachineOpcode
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
Definition: SelectionDAGNodes.h:681
llvm::SetVector::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
llvm::MVT::getVectorVT
static MVT getVectorVT(MVT VT, unsigned NumElements)
Definition: MachineValueType.h:1177
llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1532
SelectionDAGISel.h
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:136
llvm::HexagonTargetLowering
Definition: HexagonISelLowering.h:105
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::format
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:124
llvm::HvxSelector::ISel
HexagonDAGToDAGISel & ISel
Definition: HexagonISelDAGToDAGHVX.cpp:813
llvm::SDVTList
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
Definition: SelectionDAGNodes.h:79
llvm::MVT::v32i32
@ v32i32
Definition: MachineValueType.h:109
llvm::SelectionDAG::getBuildVector
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:790
llvm::object::Comma
@ Comma
Definition: COFFModuleDefinition.cpp:37
llvm::MVT::v16i32
@ v16i32
Definition: MachineValueType.h:108
llvm::MachO::All
@ All
Definition: InterfaceFile.h:73
j
return j(j<< 16)
llvm::MipsISD::Mult
@ Mult
Definition: MipsISelLowering.h:132
findStrip
static std::pair< int, unsigned > findStrip(ArrayRef< int > A, int Inc, unsigned MaxLen)
Definition: HexagonISelDAGToDAGHVX.cpp:899
llvm::empty
constexpr bool empty(const T &RangeOrContainer)
Test whether RangeOrContainer is empty. Similar to C++17 std::empty.
Definition: STLExtras.h:254
H
#define H(x, y, z)
Definition: MD5.cpp:58
llvm::HvxSelector::getPairVT
MVT getPairVT(MVT ElemTy) const
Definition: HexagonISelDAGToDAGHVX.cpp:828
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::end
iterator end()
Definition: DenseMap.h:83
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:321
llvm::ArrayRef::take_front
ArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
Definition: ArrayRef.h:228
llvm::HexagonDAGToDAGISel::SelectV65Gather
void SelectV65Gather(SDNode *N)
Definition: HexagonISelDAGToDAGHVX.cpp:2430
llvm::c_str
SmallVectorImpl< T >::const_pointer c_str(SmallVectorImpl< T > &str)
Definition: WindowsSupport.h:193
llvm::SelectionDAG::getTargetExtractSubreg
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
Definition: SelectionDAG.cpp:9107
llvm::MVT::i32
@ i32
Definition: MachineValueType.h:46
llvm::SetVector::count
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:215
llvm::SDValue
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
Definition: SelectionDAGNodes.h:138
isPermutation
static bool isPermutation(ArrayRef< int > Mask)
Definition: HexagonISelDAGToDAGHVX.cpp:1007
Predicate
llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:91
isValid
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
Definition: RustDemangle.cpp:216
llvm::HvxSelector::selectVAlign
void selectVAlign(SDNode *N)
Definition: HexagonISelDAGToDAGHVX.cpp:2370
MachineInstrBuilder.h
llvm::HvxSelector::DAG
SelectionDAG & DAG
Definition: HexagonISelDAGToDAGHVX.cpp:814
List
const NodeList & List
Definition: RDFGraph.cpp:201
N
#define N
splitMask
static void splitMask(ArrayRef< int > Mask, MutableArrayRef< int > MaskL, MutableArrayRef< int > MaskR)
Definition: HexagonISelDAGToDAGHVX.cpp:881
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
llvm::HvxSelector::HvxSelector
HvxSelector(HexagonDAGToDAGISel &HS, SelectionDAG &G)
Definition: HexagonISelDAGToDAGHVX.cpp:818
llvm::HexagonSubtarget
Definition: HexagonSubtarget.h:43
llvm::HexagonDAGToDAGISel::Select
void Select(SDNode *N) override
Main hook for targets to transform nodes into machine nodes.
Definition: HexagonISelDAGToDAG.cpp:883
llvm::SelectionDAG::getTargetConstant
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:637
llvm::NodeSet::insert
bool insert(SUnit *SU)
Definition: MachinePipeliner.h:345
RN
It looks like we only need to define PPCfmarto for these because according to these instructions perform RTO on fma s src2 rnd ← FPSCR RN
Definition: README_P9.txt:262
llvm::HvxSelector::getSingleVT
MVT getSingleVT(MVT ElemTy) const
Definition: HexagonISelDAGToDAGHVX.cpp:822
llvm::SetVector
A vector that has set insertion semantics.
Definition: SetVector.h:40
llvm::ShuffleVectorSDNode::commuteMask
static void commuteMask(MutableArrayRef< int > Mask)
Change values in a shuffle permute mask assuming the two vector operands have swapped position.
Definition: SelectionDAGNodes.h:1538
Debug.h
llvm::SrcOp
Definition: MachineIRBuilder.h:119
SetVector.h
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58
llvm::HexagonDAGToDAGISel::SelectHVXDualOutput
void SelectHVXDualOutput(SDNode *N)
Definition: HexagonISelDAGToDAGHVX.cpp:2467
llvm::HvxSelector::getBoolVT
MVT getBoolVT() const
Definition: HexagonISelDAGToDAGHVX.cpp:834
llvm::Function::size
size_t size() const
Definition: Function.h:738
llvm::EVT::getSimpleVT
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:289
llvm::irsymtab::build
Error build(ArrayRef< Module * > Mods, SmallVector< char, 0 > &Symtab, StringTableBuilder &StrtabBuilder, BumpPtrAllocator &Alloc)
Fills in Symtab and StrtabBuilder with a valid symbol and string table for Mods.
Definition: IRSymtab.cpp:356