LLVM  13.0.0git
HexagonISelDAGToDAGHVX.cpp
Go to the documentation of this file.
1 //===-- HexagonISelDAGToDAGHVX.cpp ----------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "Hexagon.h"
10 #include "HexagonISelDAGToDAG.h"
11 #include "HexagonISelLowering.h"
12 #include "HexagonTargetMachine.h"
13 #include "llvm/ADT/SetVector.h"
16 #include "llvm/IR/Intrinsics.h"
17 #include "llvm/IR/IntrinsicsHexagon.h"
19 #include "llvm/Support/Debug.h"
20 
21 #include <deque>
22 #include <map>
23 #include <set>
24 #include <utility>
25 #include <vector>
26 
27 #define DEBUG_TYPE "hexagon-isel"
28 
29 using namespace llvm;
30 
31 namespace {
32 
33 // --------------------------------------------------------------------
34 // Implementation of permutation networks.
35 
36 // Implementation of the node routing through butterfly networks:
37 // - Forward delta.
38 // - Reverse delta.
39 // - Benes.
40 //
41 //
42 // Forward delta network consists of log(N) steps, where N is the number
43 // of inputs. In each step, an input can stay in place, or it can get
44 // routed to another position[1]. The step after that consists of two
45 // networks, each half in size in terms of the number of nodes. In those
46 // terms, in the given step, an input can go to either the upper or the
47 // lower network in the next step.
48 //
49 // [1] Hexagon's vdelta/vrdelta allow an element to be routed to both
50 // positions as long as there is no conflict.
51 
52 // Here's a delta network for 8 inputs, only the switching routes are
53 // shown:
54 //
55 // Steps:
56 // |- 1 ---------------|- 2 -----|- 3 -|
57 //
58 // Inp[0] *** *** *** *** Out[0]
59 // \ / \ / \ /
60 // \ / \ / X
61 // \ / \ / / \
62 // Inp[1] *** \ / *** X *** *** Out[1]
63 // \ \ / / \ / \ /
64 // \ \ / / X X
65 // \ \ / / / \ / \
66 // Inp[2] *** \ \ / / *** X *** *** Out[2]
67 // \ \ X / / / \ \ /
68 // \ \ / \ / / / \ X
69 // \ X X / / \ / \
70 // Inp[3] *** \ / \ / \ / *** *** *** Out[3]
71 // \ X X X /
72 // \ / \ / \ / \ /
73 // X X X X
74 // / \ / \ / \ / \
75 // / X X X \
76 // Inp[4] *** / \ / \ / \ *** *** *** Out[4]
77 // / X X \ \ / \ /
78 // / / \ / \ \ \ / X
79 // / / X \ \ \ / / \
80 // Inp[5] *** / / \ \ *** X *** *** Out[5]
81 // / / \ \ \ / \ /
82 // / / \ \ X X
83 // / / \ \ / \ / \
84 // Inp[6] *** / \ *** X *** *** Out[6]
85 // / \ / \ \ /
86 // / \ / \ X
87 // / \ / \ / \
88 // Inp[7] *** *** *** *** Out[7]
89 //
90 //
91 // Reverse delta network is same as delta network, with the steps in
92 // the opposite order.
93 //
94 //
95 // Benes network is a forward delta network immediately followed by
96 // a reverse delta network.
97 
98 enum class ColorKind { None, Red, Black };
99 
100 // Graph coloring utility used to partition nodes into two groups:
101 // they will correspond to nodes routed to the upper and lower networks.
102 struct Coloring {
103  using Node = int;
104  using MapType = std::map<Node, ColorKind>;
105  static constexpr Node Ignore = Node(-1);
106 
107  Coloring(ArrayRef<Node> Ord) : Order(Ord) {
108  build();
109  if (!color())
110  Colors.clear();
111  }
112 
113  const MapType &colors() const {
114  return Colors;
115  }
116 
117  ColorKind other(ColorKind Color) {
118  if (Color == ColorKind::None)
119  return ColorKind::Red;
120  return Color == ColorKind::Red ? ColorKind::Black : ColorKind::Red;
121  }
122 
123  LLVM_DUMP_METHOD void dump() const;
124 
125 private:
126  ArrayRef<Node> Order;
127  MapType Colors;
128  std::set<Node> Needed;
129 
130  using NodeSet = std::set<Node>;
131  std::map<Node,NodeSet> Edges;
132 
133  Node conj(Node Pos) {
134  Node Num = Order.size();
135  return (Pos < Num/2) ? Pos + Num/2 : Pos - Num/2;
136  }
137 
138  ColorKind getColor(Node N) {
139  auto F = Colors.find(N);
140  return F != Colors.end() ? F->second : ColorKind::None;
141  }
142 
143  std::pair<bool, ColorKind> getUniqueColor(const NodeSet &Nodes);
144 
145  void build();
146  bool color();
147 };
148 } // namespace
149 
150 std::pair<bool, ColorKind> Coloring::getUniqueColor(const NodeSet &Nodes) {
151  auto Color = ColorKind::None;
152  for (Node N : Nodes) {
153  ColorKind ColorN = getColor(N);
154  if (ColorN == ColorKind::None)
155  continue;
156  if (Color == ColorKind::None)
157  Color = ColorN;
158  else if (Color != ColorKind::None && Color != ColorN)
159  return { false, ColorKind::None };
160  }
161  return { true, Color };
162 }
163 
164 void Coloring::build() {
165  // Add Order[P] and Order[conj(P)] to Edges.
166  for (unsigned P = 0; P != Order.size(); ++P) {
167  Node I = Order[P];
168  if (I != Ignore) {
169  Needed.insert(I);
170  Node PC = Order[conj(P)];
171  if (PC != Ignore && PC != I)
172  Edges[I].insert(PC);
173  }
174  }
175  // Add I and conj(I) to Edges.
176  for (unsigned I = 0; I != Order.size(); ++I) {
177  if (!Needed.count(I))
178  continue;
179  Node C = conj(I);
180  // This will create an entry in the edge table, even if I is not
181  // connected to any other node. This is necessary, because it still
182  // needs to be colored.
183  NodeSet &Is = Edges[I];
184  if (Needed.count(C))
185  Is.insert(C);
186  }
187 }
188 
189 bool Coloring::color() {
190  SetVector<Node> FirstQ;
191  auto Enqueue = [this,&FirstQ] (Node N) {
192  SetVector<Node> Q;
193  Q.insert(N);
194  for (unsigned I = 0; I != Q.size(); ++I) {
195  NodeSet &Ns = Edges[Q[I]];
196  Q.insert(Ns.begin(), Ns.end());
197  }
198  FirstQ.insert(Q.begin(), Q.end());
199  };
200  for (Node N : Needed)
201  Enqueue(N);
202 
203  for (Node N : FirstQ) {
204  if (Colors.count(N))
205  continue;
206  NodeSet &Ns = Edges[N];
207  auto P = getUniqueColor(Ns);
208  if (!P.first)
209  return false;
210  Colors[N] = other(P.second);
211  }
212 
213  // First, color nodes that don't have any dups.
214  for (auto E : Edges) {
215  Node N = E.first;
216  if (!Needed.count(conj(N)) || Colors.count(N))
217  continue;
218  auto P = getUniqueColor(E.second);
219  if (!P.first)
220  return false;
221  Colors[N] = other(P.second);
222  }
223 
224  // Now, nodes that are still uncolored. Since the graph can be modified
225  // in this step, create a work queue.
226  std::vector<Node> WorkQ;
227  for (auto E : Edges) {
228  Node N = E.first;
229  if (!Colors.count(N))
230  WorkQ.push_back(N);
231  }
232 
233  for (unsigned I = 0; I < WorkQ.size(); ++I) {
234  Node N = WorkQ[I];
235  NodeSet &Ns = Edges[N];
236  auto P = getUniqueColor(Ns);
237  if (P.first) {
238  Colors[N] = other(P.second);
239  continue;
240  }
241 
242  // Coloring failed. Split this node.
243  Node C = conj(N);
244  ColorKind ColorN = other(ColorKind::None);
245  ColorKind ColorC = other(ColorN);
246  NodeSet &Cs = Edges[C];
247  NodeSet CopyNs = Ns;
248  for (Node M : CopyNs) {
249  ColorKind ColorM = getColor(M);
250  if (ColorM == ColorC) {
251  // Connect M with C, disconnect M from N.
252  Cs.insert(M);
253  Edges[M].insert(C);
254  Ns.erase(M);
255  Edges[M].erase(N);
256  }
257  }
258  Colors[N] = ColorN;
259  Colors[C] = ColorC;
260  }
261 
262  // Explicitly assign "None" to all uncolored nodes.
263  for (unsigned I = 0; I != Order.size(); ++I)
264  if (Colors.count(I) == 0)
265  Colors[I] = ColorKind::None;
266 
267  return true;
268 }
269 
270 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
271 void Coloring::dump() const {
272  dbgs() << "{ Order: {";
273  for (unsigned I = 0; I != Order.size(); ++I) {
274  Node P = Order[I];
275  if (P != Ignore)
276  dbgs() << ' ' << P;
277  else
278  dbgs() << " -";
279  }
280  dbgs() << " }\n";
281  dbgs() << " Needed: {";
282  for (Node N : Needed)
283  dbgs() << ' ' << N;
284  dbgs() << " }\n";
285 
286  dbgs() << " Edges: {\n";
287  for (auto E : Edges) {
288  dbgs() << " " << E.first << " -> {";
289  for (auto N : E.second)
290  dbgs() << ' ' << N;
291  dbgs() << " }\n";
292  }
293  dbgs() << " }\n";
294 
295  auto ColorKindToName = [](ColorKind C) {
296  switch (C) {
297  case ColorKind::None:
298  return "None";
299  case ColorKind::Red:
300  return "Red";
301  case ColorKind::Black:
302  return "Black";
303  }
304  llvm_unreachable("all ColorKinds should be handled by the switch above");
305  };
306 
307  dbgs() << " Colors: {\n";
308  for (auto C : Colors)
309  dbgs() << " " << C.first << " -> " << ColorKindToName(C.second) << "\n";
310  dbgs() << " }\n}\n";
311 }
312 #endif
313 
314 namespace {
315 // Base class of for reordering networks. They don't strictly need to be
316 // permutations, as outputs with repeated occurrences of an input element
317 // are allowed.
318 struct PermNetwork {
319  using Controls = std::vector<uint8_t>;
320  using ElemType = int;
321  static constexpr ElemType Ignore = ElemType(-1);
322 
323  enum : uint8_t {
324  None,
325  Pass,
326  Switch
327  };
328  enum : uint8_t {
329  Forward,
330  Reverse
331  };
332 
333  PermNetwork(ArrayRef<ElemType> Ord, unsigned Mult = 1) {
334  Order.assign(Ord.data(), Ord.data()+Ord.size());
335  Log = 0;
336 
337  unsigned S = Order.size();
338  while (S >>= 1)
339  ++Log;
340 
341  Table.resize(Order.size());
342  for (RowType &Row : Table)
343  Row.resize(Mult*Log, None);
344  }
345 
346  void getControls(Controls &V, unsigned StartAt, uint8_t Dir) const {
347  unsigned Size = Order.size();
348  V.resize(Size);
349  for (unsigned I = 0; I != Size; ++I) {
350  unsigned W = 0;
351  for (unsigned L = 0; L != Log; ++L) {
352  unsigned C = ctl(I, StartAt+L) == Switch;
353  if (Dir == Forward)
354  W |= C << (Log-1-L);
355  else
356  W |= C << L;
357  }
358  assert(isUInt<8>(W));
359  V[I] = uint8_t(W);
360  }
361  }
362 
363  uint8_t ctl(ElemType Pos, unsigned Step) const {
364  return Table[Pos][Step];
365  }
366  unsigned size() const {
367  return Order.size();
368  }
369  unsigned steps() const {
370  return Log;
371  }
372 
373 protected:
374  unsigned Log;
375  std::vector<ElemType> Order;
376  using RowType = std::vector<uint8_t>;
377  std::vector<RowType> Table;
378 };
379 
380 struct ForwardDeltaNetwork : public PermNetwork {
381  ForwardDeltaNetwork(ArrayRef<ElemType> Ord) : PermNetwork(Ord) {}
382 
383  bool run(Controls &V) {
384  if (!route(Order.data(), Table.data(), size(), 0))
385  return false;
386  getControls(V, 0, Forward);
387  return true;
388  }
389 
390 private:
391  bool route(ElemType *P, RowType *T, unsigned Size, unsigned Step);
392 };
393 
394 struct ReverseDeltaNetwork : public PermNetwork {
395  ReverseDeltaNetwork(ArrayRef<ElemType> Ord) : PermNetwork(Ord) {}
396 
397  bool run(Controls &V) {
398  if (!route(Order.data(), Table.data(), size(), 0))
399  return false;
400  getControls(V, 0, Reverse);
401  return true;
402  }
403 
404 private:
405  bool route(ElemType *P, RowType *T, unsigned Size, unsigned Step);
406 };
407 
408 struct BenesNetwork : public PermNetwork {
409  BenesNetwork(ArrayRef<ElemType> Ord) : PermNetwork(Ord, 2) {}
410 
411  bool run(Controls &F, Controls &R) {
412  if (!route(Order.data(), Table.data(), size(), 0))
413  return false;
414 
415  getControls(F, 0, Forward);
416  getControls(R, Log, Reverse);
417  return true;
418  }
419 
420 private:
421  bool route(ElemType *P, RowType *T, unsigned Size, unsigned Step);
422 };
423 } // namespace
424 
425 bool ForwardDeltaNetwork::route(ElemType *P, RowType *T, unsigned Size,
426  unsigned Step) {
427  bool UseUp = false, UseDown = false;
428  ElemType Num = Size;
429 
430  // Cannot use coloring here, because coloring is used to determine
431  // the "big" switch, i.e. the one that changes halves, and in a forward
432  // network, a color can be simultaneously routed to both halves in the
433  // step we're working on.
434  for (ElemType J = 0; J != Num; ++J) {
435  ElemType I = P[J];
436  // I is the position in the input,
437  // J is the position in the output.
438  if (I == Ignore)
439  continue;
440  uint8_t S;
441  if (I < Num/2)
442  S = (J < Num/2) ? Pass : Switch;
443  else
444  S = (J < Num/2) ? Switch : Pass;
445 
446  // U is the element in the table that needs to be updated.
447  ElemType U = (S == Pass) ? I : (I < Num/2 ? I+Num/2 : I-Num/2);
448  if (U < Num/2)
449  UseUp = true;
450  else
451  UseDown = true;
452  if (T[U][Step] != S && T[U][Step] != None)
453  return false;
454  T[U][Step] = S;
455  }
456 
457  for (ElemType J = 0; J != Num; ++J)
458  if (P[J] != Ignore && P[J] >= Num/2)
459  P[J] -= Num/2;
460 
461  if (Step+1 < Log) {
462  if (UseUp && !route(P, T, Size/2, Step+1))
463  return false;
464  if (UseDown && !route(P+Size/2, T+Size/2, Size/2, Step+1))
465  return false;
466  }
467  return true;
468 }
469 
470 bool ReverseDeltaNetwork::route(ElemType *P, RowType *T, unsigned Size,
471  unsigned Step) {
472  unsigned Pets = Log-1 - Step;
473  bool UseUp = false, UseDown = false;
474  ElemType Num = Size;
475 
476  // In this step half-switching occurs, so coloring can be used.
477  Coloring G({P,Size});
478  const Coloring::MapType &M = G.colors();
479  if (M.empty())
480  return false;
481 
482  ColorKind ColorUp = ColorKind::None;
483  for (ElemType J = 0; J != Num; ++J) {
484  ElemType I = P[J];
485  // I is the position in the input,
486  // J is the position in the output.
487  if (I == Ignore)
488  continue;
489  ColorKind C = M.at(I);
490  if (C == ColorKind::None)
491  continue;
492  // During "Step", inputs cannot switch halves, so if the "up" color
493  // is still unknown, make sure that it is selected in such a way that
494  // "I" will stay in the same half.
495  bool InpUp = I < Num/2;
496  if (ColorUp == ColorKind::None)
497  ColorUp = InpUp ? C : G.other(C);
498  if ((C == ColorUp) != InpUp) {
499  // If I should go to a different half than where is it now, give up.
500  return false;
501  }
502 
503  uint8_t S;
504  if (InpUp) {
505  S = (J < Num/2) ? Pass : Switch;
506  UseUp = true;
507  } else {
508  S = (J < Num/2) ? Switch : Pass;
509  UseDown = true;
510  }
511  T[J][Pets] = S;
512  }
513 
514  // Reorder the working permutation according to the computed switch table
515  // for the last step (i.e. Pets).
516  for (ElemType J = 0, E = Size / 2; J != E; ++J) {
517  ElemType PJ = P[J]; // Current values of P[J]
518  ElemType PC = P[J+Size/2]; // and P[conj(J)]
519  ElemType QJ = PJ; // New values of P[J]
520  ElemType QC = PC; // and P[conj(J)]
521  if (T[J][Pets] == Switch)
522  QC = PJ;
523  if (T[J+Size/2][Pets] == Switch)
524  QJ = PC;
525  P[J] = QJ;
526  P[J+Size/2] = QC;
527  }
528 
529  for (ElemType J = 0; J != Num; ++J)
530  if (P[J] != Ignore && P[J] >= Num/2)
531  P[J] -= Num/2;
532 
533  if (Step+1 < Log) {
534  if (UseUp && !route(P, T, Size/2, Step+1))
535  return false;
536  if (UseDown && !route(P+Size/2, T+Size/2, Size/2, Step+1))
537  return false;
538  }
539  return true;
540 }
541 
542 bool BenesNetwork::route(ElemType *P, RowType *T, unsigned Size,
543  unsigned Step) {
544  Coloring G({P,Size});
545  const Coloring::MapType &M = G.colors();
546  if (M.empty())
547  return false;
548  ElemType Num = Size;
549 
550  unsigned Pets = 2*Log-1 - Step;
551  bool UseUp = false, UseDown = false;
552 
553  // Both assignments, i.e. Red->Up and Red->Down are valid, but they will
554  // result in different controls. Let's pick the one where the first
555  // control will be "Pass".
556  ColorKind ColorUp = ColorKind::None;
557  for (ElemType J = 0; J != Num; ++J) {
558  ElemType I = P[J];
559  if (I == Ignore)
560  continue;
561  ColorKind C = M.at(I);
562  if (C == ColorKind::None)
563  continue;
564  if (ColorUp == ColorKind::None) {
565  ColorUp = (I < Num / 2) ? ColorKind::Red : ColorKind::Black;
566  }
567  unsigned CI = (I < Num/2) ? I+Num/2 : I-Num/2;
568  if (C == ColorUp) {
569  if (I < Num/2)
570  T[I][Step] = Pass;
571  else
572  T[CI][Step] = Switch;
573  T[J][Pets] = (J < Num/2) ? Pass : Switch;
574  UseUp = true;
575  } else { // Down
576  if (I < Num/2)
577  T[CI][Step] = Switch;
578  else
579  T[I][Step] = Pass;
580  T[J][Pets] = (J < Num/2) ? Switch : Pass;
581  UseDown = true;
582  }
583  }
584 
585  // Reorder the working permutation according to the computed switch table
586  // for the last step (i.e. Pets).
587  for (ElemType J = 0; J != Num/2; ++J) {
588  ElemType PJ = P[J]; // Current values of P[J]
589  ElemType PC = P[J+Num/2]; // and P[conj(J)]
590  ElemType QJ = PJ; // New values of P[J]
591  ElemType QC = PC; // and P[conj(J)]
592  if (T[J][Pets] == Switch)
593  QC = PJ;
594  if (T[J+Num/2][Pets] == Switch)
595  QJ = PC;
596  P[J] = QJ;
597  P[J+Num/2] = QC;
598  }
599 
600  for (ElemType J = 0; J != Num; ++J)
601  if (P[J] != Ignore && P[J] >= Num/2)
602  P[J] -= Num/2;
603 
604  if (Step+1 < Log) {
605  if (UseUp && !route(P, T, Size/2, Step+1))
606  return false;
607  if (UseDown && !route(P+Size/2, T+Size/2, Size/2, Step+1))
608  return false;
609  }
610  return true;
611 }
612 
613 // --------------------------------------------------------------------
614 // Support for building selection results (output instructions that are
615 // parts of the final selection).
616 
617 namespace {
618 struct OpRef {
619  OpRef(SDValue V) : OpV(V) {}
620  bool isValue() const { return OpV.getNode() != nullptr; }
621  bool isValid() const { return isValue() || !(OpN & Invalid); }
622  static OpRef res(int N) { return OpRef(Whole | (N & Index)); }
623  static OpRef fail() { return OpRef(Invalid); }
624 
625  static OpRef lo(const OpRef &R) {
626  assert(!R.isValue());
627  return OpRef(R.OpN & (Undef | Index | LoHalf));
628  }
629  static OpRef hi(const OpRef &R) {
630  assert(!R.isValue());
631  return OpRef(R.OpN & (Undef | Index | HiHalf));
632  }
633  static OpRef undef(MVT Ty) { return OpRef(Undef | Ty.SimpleTy); }
634 
635  // Direct value.
636  SDValue OpV = SDValue();
637 
638  // Reference to the operand of the input node:
639  // If the 31st bit is 1, it's undef, otherwise, bits 28..0 are the
640  // operand index:
641  // If bit 30 is set, it's the high half of the operand.
642  // If bit 29 is set, it's the low half of the operand.
643  unsigned OpN = 0;
644 
645  enum : unsigned {
646  Invalid = 0x10000000,
647  LoHalf = 0x20000000,
648  HiHalf = 0x40000000,
649  Whole = LoHalf | HiHalf,
650  Undef = 0x80000000,
651  Index = 0x0FFFFFFF, // Mask of the index value.
652  IndexBits = 28,
653  };
654 
656  void print(raw_ostream &OS, const SelectionDAG &G) const;
657 
658 private:
659  OpRef(unsigned N) : OpN(N) {}
660 };
661 
662 struct NodeTemplate {
663  NodeTemplate() = default;
664  unsigned Opc = 0;
665  MVT Ty = MVT::Other;
666  std::vector<OpRef> Ops;
667 
668  LLVM_DUMP_METHOD void print(raw_ostream &OS, const SelectionDAG &G) const;
669 };
670 
671 struct ResultStack {
672  ResultStack(SDNode *Inp)
673  : InpNode(Inp), InpTy(Inp->getValueType(0).getSimpleVT()) {}
674  SDNode *InpNode;
675  MVT InpTy;
676  unsigned push(const NodeTemplate &Res) {
677  List.push_back(Res);
678  return List.size()-1;
679  }
680  unsigned push(unsigned Opc, MVT Ty, std::vector<OpRef> &&Ops) {
681  NodeTemplate Res;
682  Res.Opc = Opc;
683  Res.Ty = Ty;
684  Res.Ops = Ops;
685  return push(Res);
686  }
687  bool empty() const { return List.empty(); }
688  unsigned size() const { return List.size(); }
689  unsigned top() const { return size()-1; }
690  const NodeTemplate &operator[](unsigned I) const { return List[I]; }
691  unsigned reset(unsigned NewTop) {
692  List.resize(NewTop+1);
693  return NewTop;
694  }
695 
696  using BaseType = std::vector<NodeTemplate>;
697  BaseType::iterator begin() { return List.begin(); }
698  BaseType::iterator end() { return List.end(); }
699  BaseType::const_iterator begin() const { return List.begin(); }
700  BaseType::const_iterator end() const { return List.end(); }
701 
702  BaseType List;
703 
705  void print(raw_ostream &OS, const SelectionDAG &G) const;
706 };
707 } // namespace
708 
709 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
710 void OpRef::print(raw_ostream &OS, const SelectionDAG &G) const {
711  if (isValue()) {
712  OpV.getNode()->print(OS, &G);
713  return;
714  }
715  if (OpN & Invalid) {
716  OS << "invalid";
717  return;
718  }
719  if (OpN & Undef) {
720  OS << "undef";
721  return;
722  }
723  if ((OpN & Whole) != Whole) {
724  assert((OpN & Whole) == LoHalf || (OpN & Whole) == HiHalf);
725  if (OpN & LoHalf)
726  OS << "lo ";
727  else
728  OS << "hi ";
729  }
730  OS << '#' << SignExtend32(OpN & Index, IndexBits);
731 }
732 
733 void NodeTemplate::print(raw_ostream &OS, const SelectionDAG &G) const {
734  const TargetInstrInfo &TII = *G.getSubtarget().getInstrInfo();
735  OS << format("%8s", EVT(Ty).getEVTString().c_str()) << " "
736  << TII.getName(Opc);
737  bool Comma = false;
738  for (const auto &R : Ops) {
739  if (Comma)
740  OS << ',';
741  Comma = true;
742  OS << ' ';
743  R.print(OS, G);
744  }
745 }
746 
747 void ResultStack::print(raw_ostream &OS, const SelectionDAG &G) const {
748  OS << "Input node:\n";
749 #ifndef NDEBUG
750  InpNode->dumpr(&G);
751 #endif
752  OS << "Result templates:\n";
753  for (unsigned I = 0, E = List.size(); I != E; ++I) {
754  OS << '[' << I << "] ";
755  List[I].print(OS, G);
756  OS << '\n';
757  }
758 }
759 #endif
760 
761 namespace {
762 struct ShuffleMask {
763  ShuffleMask(ArrayRef<int> M) : Mask(M) {
764  for (unsigned I = 0, E = Mask.size(); I != E; ++I) {
765  int M = Mask[I];
766  if (M == -1)
767  continue;
768  MinSrc = (MinSrc == -1) ? M : std::min(MinSrc, M);
769  MaxSrc = (MaxSrc == -1) ? M : std::max(MaxSrc, M);
770  }
771  }
772 
774  int MinSrc = -1, MaxSrc = -1;
775 
776  ShuffleMask lo() const {
777  size_t H = Mask.size()/2;
778  return ShuffleMask(Mask.take_front(H));
779  }
780  ShuffleMask hi() const {
781  size_t H = Mask.size()/2;
782  return ShuffleMask(Mask.take_back(H));
783  }
784 
785  void print(raw_ostream &OS) const {
786  OS << "MinSrc:" << MinSrc << ", MaxSrc:" << MaxSrc << " {";
787  for (int M : Mask)
788  OS << ' ' << M;
789  OS << " }";
790  }
791 };
792 
794 raw_ostream &operator<<(raw_ostream &OS, const ShuffleMask &SM) {
795  SM.print(OS);
796  return OS;
797 }
798 } // namespace
799 
800 // --------------------------------------------------------------------
801 // The HvxSelector class.
802 
804  return static_cast<const HexagonTargetLowering&>(G.getTargetLoweringInfo());
805 }
807  return static_cast<const HexagonSubtarget&>(G.getSubtarget());
808 }
809 
810 namespace llvm {
811  struct HvxSelector {
816  const unsigned HwLen;
817 
820  HST(getHexagonSubtarget(G)), HwLen(HST.getVectorLength()) {}
821 
822  MVT getSingleVT(MVT ElemTy) const {
823  unsigned NumElems = HwLen / (ElemTy.getSizeInBits()/8);
824  return MVT::getVectorVT(ElemTy, NumElems);
825  }
826 
827  MVT getPairVT(MVT ElemTy) const {
828  unsigned NumElems = (2*HwLen) / (ElemTy.getSizeInBits()/8);
829  return MVT::getVectorVT(ElemTy, NumElems);
830  }
831 
832  void selectShuffle(SDNode *N);
833  void selectRor(SDNode *N);
834  void selectVAlign(SDNode *N);
835 
836  private:
837  void select(SDNode *ISelN);
838  void materialize(const ResultStack &Results);
839 
840  SDValue getVectorConstant(ArrayRef<uint8_t> Data, const SDLoc &dl);
841 
842  enum : unsigned {
843  None,
844  PackMux,
845  };
846  OpRef concat(OpRef Va, OpRef Vb, ResultStack &Results);
847  OpRef packs(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results,
848  MutableArrayRef<int> NewMask, unsigned Options = None);
849  OpRef packp(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results,
850  MutableArrayRef<int> NewMask);
851  OpRef vmuxs(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,
852  ResultStack &Results);
853  OpRef vmuxp(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,
854  ResultStack &Results);
855 
856  OpRef shuffs1(ShuffleMask SM, OpRef Va, ResultStack &Results);
857  OpRef shuffs2(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results);
858  OpRef shuffp1(ShuffleMask SM, OpRef Va, ResultStack &Results);
859  OpRef shuffp2(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results);
860 
861  OpRef butterfly(ShuffleMask SM, OpRef Va, ResultStack &Results);
862  OpRef contracting(ShuffleMask SM, OpRef Va, OpRef Vb, ResultStack &Results);
863  OpRef expanding(ShuffleMask SM, OpRef Va, ResultStack &Results);
864  OpRef perfect(ShuffleMask SM, OpRef Va, ResultStack &Results);
865 
866  bool selectVectorConstants(SDNode *N);
867  bool scalarizeShuffle(ArrayRef<int> Mask, const SDLoc &dl, MVT ResTy,
868  SDValue Va, SDValue Vb, SDNode *N);
869 
870  };
871 }
872 
874  MutableArrayRef<int> MaskR) {
875  unsigned VecLen = Mask.size();
876  assert(MaskL.size() == VecLen && MaskR.size() == VecLen);
877  for (unsigned I = 0; I != VecLen; ++I) {
878  int M = Mask[I];
879  if (M < 0) {
880  MaskL[I] = MaskR[I] = -1;
881  } else if (unsigned(M) < VecLen) {
882  MaskL[I] = M;
883  MaskR[I] = -1;
884  } else {
885  MaskL[I] = -1;
886  MaskR[I] = M-VecLen;
887  }
888  }
889 }
890 
891 static std::pair<int,unsigned> findStrip(ArrayRef<int> A, int Inc,
892  unsigned MaxLen) {
893  assert(A.size() > 0 && A.size() >= MaxLen);
894  int F = A[0];
895  int E = F;
896  for (unsigned I = 1; I != MaxLen; ++I) {
897  if (A[I] - E != Inc)
898  return { F, I };
899  E = A[I];
900  }
901  return { F, MaxLen };
902 }
903 
904 static bool isUndef(ArrayRef<int> Mask) {
905  for (int Idx : Mask)
906  if (Idx != -1)
907  return false;
908  return true;
909 }
910 
912  for (int I = 0, E = Mask.size(); I != E; ++I) {
913  int M = Mask[I];
914  if (M >= 0 && M != I)
915  return false;
916  }
917  return true;
918 }
919 
921  // Check by adding all numbers only works if there is no overflow.
922  assert(Mask.size() < 0x00007FFF && "Sanity failure");
923  int Sum = 0;
924  for (int Idx : Mask) {
925  if (Idx == -1)
926  return false;
927  Sum += Idx;
928  }
929  int N = Mask.size();
930  return 2*Sum == N*(N-1);
931 }
932 
933 bool HvxSelector::selectVectorConstants(SDNode *N) {
934  // Constant vectors are generated as loads from constant pools or as
935  // splats of a constant value. Since they are generated during the
936  // selection process, the main selection algorithm is not aware of them.
937  // Select them directly here.
939  SetVector<SDNode*> WorkQ;
940 
941  // The DAG can change (due to CSE) during selection, so cache all the
942  // unselected nodes first to avoid traversing a mutating DAG.
943  WorkQ.insert(N);
944  for (unsigned i = 0; i != WorkQ.size(); ++i) {
945  SDNode *W = WorkQ[i];
946  if (!W->isMachineOpcode() && W->getOpcode() == HexagonISD::ISEL)
947  Nodes.push_back(W);
948  for (unsigned j = 0, f = W->getNumOperands(); j != f; ++j)
949  WorkQ.insert(W->getOperand(j).getNode());
950  }
951 
952  for (SDNode *L : Nodes)
953  select(L);
954 
955  return !Nodes.empty();
956 }
957 
958 void HvxSelector::materialize(const ResultStack &Results) {
959  DEBUG_WITH_TYPE("isel", {
960  dbgs() << "Materializing\n";
961  Results.print(dbgs(), DAG);
962  });
963  if (Results.empty())
964  return;
965  const SDLoc &dl(Results.InpNode);
966  std::vector<SDValue> Output;
967 
968  for (unsigned I = 0, E = Results.size(); I != E; ++I) {
969  const NodeTemplate &Node = Results[I];
970  std::vector<SDValue> Ops;
971  for (const OpRef &R : Node.Ops) {
972  assert(R.isValid());
973  if (R.isValue()) {
974  Ops.push_back(R.OpV);
975  continue;
976  }
977  if (R.OpN & OpRef::Undef) {
979  Ops.push_back(ISel.selectUndef(dl, MVT(SVT)));
980  continue;
981  }
982  // R is an index of a result.
983  unsigned Part = R.OpN & OpRef::Whole;
984  int Idx = SignExtend32(R.OpN & OpRef::Index, OpRef::IndexBits);
985  if (Idx < 0)
986  Idx += I;
987  assert(Idx >= 0 && unsigned(Idx) < Output.size());
988  SDValue Op = Output[Idx];
989  MVT OpTy = Op.getValueType().getSimpleVT();
990  if (Part != OpRef::Whole) {
991  assert(Part == OpRef::LoHalf || Part == OpRef::HiHalf);
992  MVT HalfTy = MVT::getVectorVT(OpTy.getVectorElementType(),
993  OpTy.getVectorNumElements()/2);
994  unsigned Sub = (Part == OpRef::LoHalf) ? Hexagon::vsub_lo
995  : Hexagon::vsub_hi;
996  Op = DAG.getTargetExtractSubreg(Sub, dl, HalfTy, Op);
997  }
998  Ops.push_back(Op);
999  } // for (Node : Results)
1000 
1001  assert(Node.Ty != MVT::Other);
1002  SDNode *ResN = (Node.Opc == TargetOpcode::COPY)
1003  ? Ops.front().getNode()
1004  : DAG.getMachineNode(Node.Opc, dl, Node.Ty, Ops);
1005  Output.push_back(SDValue(ResN, 0));
1006  }
1007 
1008  SDNode *OutN = Output.back().getNode();
1009  SDNode *InpN = Results.InpNode;
1010  DEBUG_WITH_TYPE("isel", {
1011  dbgs() << "Generated node:\n";
1012  OutN->dumpr(&DAG);
1013  });
1014 
1015  ISel.ReplaceNode(InpN, OutN);
1016  selectVectorConstants(OutN);
1017  DAG.RemoveDeadNodes();
1018 }
1019 
1020 OpRef HvxSelector::concat(OpRef Lo, OpRef Hi, ResultStack &Results) {
1021  DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1022  const SDLoc &dl(Results.InpNode);
1023  Results.push(TargetOpcode::REG_SEQUENCE, getPairVT(MVT::i8), {
1024  DAG.getTargetConstant(Hexagon::HvxWRRegClassID, dl, MVT::i32),
1025  Lo, DAG.getTargetConstant(Hexagon::vsub_lo, dl, MVT::i32),
1026  Hi, DAG.getTargetConstant(Hexagon::vsub_hi, dl, MVT::i32),
1027  });
1028  return OpRef::res(Results.top());
1029 }
1030 
1031 // Va, Vb are single vectors, SM can be arbitrarily long.
1032 OpRef HvxSelector::packs(ShuffleMask SM, OpRef Va, OpRef Vb,
1033  ResultStack &Results, MutableArrayRef<int> NewMask,
1034  unsigned Options) {
1035  DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1036  if (!Va.isValid() || !Vb.isValid())
1037  return OpRef::fail();
1038 
1039  int VecLen = SM.Mask.size();
1040  MVT Ty = getSingleVT(MVT::i8);
1041 
1042  auto IsExtSubvector = [] (ShuffleMask M) {
1043  assert(M.MinSrc >= 0 && M.MaxSrc >= 0);
1044  for (int I = 0, E = M.Mask.size(); I != E; ++I) {
1045  if (M.Mask[I] >= 0 && M.Mask[I]-I != M.MinSrc)
1046  return false;
1047  }
1048  return true;
1049  };
1050 
1051  if (SM.MaxSrc - SM.MinSrc < int(HwLen)) {
1052  if (SM.MinSrc == 0 || SM.MinSrc == int(HwLen) || !IsExtSubvector(SM)) {
1053  // If the mask picks elements from only one of the operands, return
1054  // that operand, and update the mask to use index 0 to refer to the
1055  // first element of that operand.
1056  // If the mask extracts a subvector, it will be handled below, so
1057  // skip it here.
1058  if (SM.MaxSrc < int(HwLen)) {
1059  memcpy(NewMask.data(), SM.Mask.data(), sizeof(int)*VecLen);
1060  return Va;
1061  }
1062  if (SM.MinSrc >= int(HwLen)) {
1063  for (int I = 0; I != VecLen; ++I) {
1064  int M = SM.Mask[I];
1065  if (M != -1)
1066  M -= HwLen;
1067  NewMask[I] = M;
1068  }
1069  return Vb;
1070  }
1071  }
1072  int MinSrc = SM.MinSrc;
1073  if (SM.MaxSrc < int(HwLen)) {
1074  Vb = Va;
1075  } else if (SM.MinSrc > int(HwLen)) {
1076  Va = Vb;
1077  MinSrc = SM.MinSrc - HwLen;
1078  }
1079  const SDLoc &dl(Results.InpNode);
1080  if (isUInt<3>(MinSrc) || isUInt<3>(HwLen-MinSrc)) {
1081  bool IsRight = isUInt<3>(MinSrc); // Right align.
1082  SDValue S = DAG.getTargetConstant(IsRight ? MinSrc : HwLen-MinSrc,
1083  dl, MVT::i32);
1084  unsigned Opc = IsRight ? Hexagon::V6_valignbi
1085  : Hexagon::V6_vlalignbi;
1086  Results.push(Opc, Ty, {Vb, Va, S});
1087  } else {
1088  SDValue S = DAG.getTargetConstant(MinSrc, dl, MVT::i32);
1089  Results.push(Hexagon::A2_tfrsi, MVT::i32, {S});
1090  unsigned Top = Results.top();
1091  Results.push(Hexagon::V6_valignb, Ty, {Vb, Va, OpRef::res(Top)});
1092  }
1093  for (int I = 0; I != VecLen; ++I) {
1094  int M = SM.Mask[I];
1095  if (M != -1)
1096  M -= SM.MinSrc;
1097  NewMask[I] = M;
1098  }
1099  return OpRef::res(Results.top());
1100  }
1101 
1102  if (Options & PackMux) {
1103  // If elements picked from Va and Vb have all different (source) indexes
1104  // (relative to the start of the argument), do a mux, and update the mask.
1105  BitVector Picked(HwLen);
1106  SmallVector<uint8_t,128> MuxBytes(HwLen);
1107  bool CanMux = true;
1108  for (int I = 0; I != VecLen; ++I) {
1109  int M = SM.Mask[I];
1110  if (M == -1)
1111  continue;
1112  if (M >= int(HwLen))
1113  M -= HwLen;
1114  else
1115  MuxBytes[M] = 0xFF;
1116  if (Picked[M]) {
1117  CanMux = false;
1118  break;
1119  }
1120  NewMask[I] = M;
1121  }
1122  if (CanMux)
1123  return vmuxs(MuxBytes, Va, Vb, Results);
1124  }
1125 
1126  return OpRef::fail();
1127 }
1128 
1129 OpRef HvxSelector::packp(ShuffleMask SM, OpRef Va, OpRef Vb,
1130  ResultStack &Results, MutableArrayRef<int> NewMask) {
1131  DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1132  unsigned HalfMask = 0;
1133  unsigned LogHw = Log2_32(HwLen);
1134  for (int M : SM.Mask) {
1135  if (M == -1)
1136  continue;
1137  HalfMask |= (1u << (M >> LogHw));
1138  }
1139 
1140  if (HalfMask == 0)
1141  return OpRef::undef(getPairVT(MVT::i8));
1142 
1143  // If more than two halves are used, bail.
1144  // TODO: be more aggressive here?
1145  if (countPopulation(HalfMask) > 2)
1146  return OpRef::fail();
1147 
1148  MVT HalfTy = getSingleVT(MVT::i8);
1149 
1150  OpRef Inp[2] = { Va, Vb };
1151  OpRef Out[2] = { OpRef::undef(HalfTy), OpRef::undef(HalfTy) };
1152 
1153  uint8_t HalfIdx[4] = { 0xFF, 0xFF, 0xFF, 0xFF };
1154  unsigned Idx = 0;
1155  for (unsigned I = 0; I != 4; ++I) {
1156  if ((HalfMask & (1u << I)) == 0)
1157  continue;
1158  assert(Idx < 2);
1159  OpRef Op = Inp[I/2];
1160  Out[Idx] = (I & 1) ? OpRef::hi(Op) : OpRef::lo(Op);
1161  HalfIdx[I] = Idx++;
1162  }
1163 
1164  int VecLen = SM.Mask.size();
1165  for (int I = 0; I != VecLen; ++I) {
1166  int M = SM.Mask[I];
1167  if (M >= 0) {
1168  uint8_t Idx = HalfIdx[M >> LogHw];
1169  assert(Idx == 0 || Idx == 1);
1170  M = (M & (HwLen-1)) + HwLen*Idx;
1171  }
1172  NewMask[I] = M;
1173  }
1174 
1175  return concat(Out[0], Out[1], Results);
1176 }
1177 
1178 OpRef HvxSelector::vmuxs(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,
1179  ResultStack &Results) {
1180  DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1181  MVT ByteTy = getSingleVT(MVT::i8);
1182  MVT BoolTy = MVT::getVectorVT(MVT::i1, HwLen);
1183  const SDLoc &dl(Results.InpNode);
1184  SDValue B = getVectorConstant(Bytes, dl);
1185  Results.push(Hexagon::V6_vd0, ByteTy, {});
1186  Results.push(Hexagon::V6_veqb, BoolTy, {OpRef(B), OpRef::res(-1)});
1187  Results.push(Hexagon::V6_vmux, ByteTy, {OpRef::res(-1), Vb, Va});
1188  return OpRef::res(Results.top());
1189 }
1190 
1191 OpRef HvxSelector::vmuxp(ArrayRef<uint8_t> Bytes, OpRef Va, OpRef Vb,
1192  ResultStack &Results) {
1193  DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1194  size_t S = Bytes.size() / 2;
1195  OpRef L = vmuxs(Bytes.take_front(S), OpRef::lo(Va), OpRef::lo(Vb), Results);
1196  OpRef H = vmuxs(Bytes.drop_front(S), OpRef::hi(Va), OpRef::hi(Vb), Results);
1197  return concat(L, H, Results);
1198 }
1199 
1200 OpRef HvxSelector::shuffs1(ShuffleMask SM, OpRef Va, ResultStack &Results) {
1201  DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1202  unsigned VecLen = SM.Mask.size();
1203  assert(HwLen == VecLen);
1204  (void)VecLen;
1205  assert(all_of(SM.Mask, [this](int M) { return M == -1 || M < int(HwLen); }));
1206 
1207  if (isIdentity(SM.Mask))
1208  return Va;
1209  if (isUndef(SM.Mask))
1210  return OpRef::undef(getSingleVT(MVT::i8));
1211 
1212  OpRef P = perfect(SM, Va, Results);
1213  if (P.isValid())
1214  return P;
1215  return butterfly(SM, Va, Results);
1216 }
1217 
1218 OpRef HvxSelector::shuffs2(ShuffleMask SM, OpRef Va, OpRef Vb,
1219  ResultStack &Results) {
1220  DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1221  if (isUndef(SM.Mask))
1222  return OpRef::undef(getSingleVT(MVT::i8));
1223 
1224  OpRef C = contracting(SM, Va, Vb, Results);
1225  if (C.isValid())
1226  return C;
1227 
1228  int VecLen = SM.Mask.size();
1229  SmallVector<int,128> NewMask(VecLen);
1230  OpRef P = packs(SM, Va, Vb, Results, NewMask);
1231  if (P.isValid())
1232  return shuffs1(ShuffleMask(NewMask), P, Results);
1233 
1234  SmallVector<int,128> MaskL(VecLen), MaskR(VecLen);
1235  splitMask(SM.Mask, MaskL, MaskR);
1236 
1237  OpRef L = shuffs1(ShuffleMask(MaskL), Va, Results);
1238  OpRef R = shuffs1(ShuffleMask(MaskR), Vb, Results);
1239  if (!L.isValid() || !R.isValid())
1240  return OpRef::fail();
1241 
1242  SmallVector<uint8_t,128> Bytes(VecLen);
1243  for (int I = 0; I != VecLen; ++I) {
1244  if (MaskL[I] != -1)
1245  Bytes[I] = 0xFF;
1246  }
1247  return vmuxs(Bytes, L, R, Results);
1248 }
1249 
1250 OpRef HvxSelector::shuffp1(ShuffleMask SM, OpRef Va, ResultStack &Results) {
1251  DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1252  int VecLen = SM.Mask.size();
1253 
1254  if (isIdentity(SM.Mask))
1255  return Va;
1256  if (isUndef(SM.Mask))
1257  return OpRef::undef(getPairVT(MVT::i8));
1258 
1259  SmallVector<int,128> PackedMask(VecLen);
1260  OpRef P = packs(SM, OpRef::lo(Va), OpRef::hi(Va), Results, PackedMask);
1261  if (P.isValid()) {
1262  ShuffleMask PM(PackedMask);
1263  OpRef E = expanding(PM, P, Results);
1264  if (E.isValid())
1265  return E;
1266 
1267  OpRef L = shuffs1(PM.lo(), P, Results);
1268  OpRef H = shuffs1(PM.hi(), P, Results);
1269  if (L.isValid() && H.isValid())
1270  return concat(L, H, Results);
1271  }
1272 
1273  OpRef R = perfect(SM, Va, Results);
1274  if (R.isValid())
1275  return R;
1276  // TODO commute the mask and try the opposite order of the halves.
1277 
1278  OpRef L = shuffs2(SM.lo(), OpRef::lo(Va), OpRef::hi(Va), Results);
1279  OpRef H = shuffs2(SM.hi(), OpRef::lo(Va), OpRef::hi(Va), Results);
1280  if (L.isValid() && H.isValid())
1281  return concat(L, H, Results);
1282 
1283  return OpRef::fail();
1284 }
1285 
1286 OpRef HvxSelector::shuffp2(ShuffleMask SM, OpRef Va, OpRef Vb,
1287  ResultStack &Results) {
1288  DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1289  if (isUndef(SM.Mask))
1290  return OpRef::undef(getPairVT(MVT::i8));
1291 
1292  int VecLen = SM.Mask.size();
1293  SmallVector<int,256> PackedMask(VecLen);
1294  OpRef P = packp(SM, Va, Vb, Results, PackedMask);
1295  if (P.isValid())
1296  return shuffp1(ShuffleMask(PackedMask), P, Results);
1297 
1298  SmallVector<int,256> MaskL(VecLen), MaskR(VecLen);
1299  splitMask(SM.Mask, MaskL, MaskR);
1300 
1301  OpRef L = shuffp1(ShuffleMask(MaskL), Va, Results);
1302  OpRef R = shuffp1(ShuffleMask(MaskR), Vb, Results);
1303  if (!L.isValid() || !R.isValid())
1304  return OpRef::fail();
1305 
1306  // Mux the results.
1307  SmallVector<uint8_t,256> Bytes(VecLen);
1308  for (int I = 0; I != VecLen; ++I) {
1309  if (MaskL[I] != -1)
1310  Bytes[I] = 0xFF;
1311  }
1312  return vmuxp(Bytes, L, R, Results);
1313 }
1314 
1315 namespace {
1316  struct Deleter : public SelectionDAG::DAGNodeDeletedListener {
1317  template <typename T>
1318  Deleter(SelectionDAG &D, T &C)
1319  : SelectionDAG::DAGNodeDeletedListener(D, [&C] (SDNode *N, SDNode *E) {
1320  C.erase(N);
1321  }) {}
1322  };
1323 
1324  template <typename T>
1325  struct NullifyingVector : public T {
1327  NullifyingVector(T &&V) : T(V) {
1328  for (unsigned i = 0, e = T::size(); i != e; ++i) {
1329  SDNode *&N = T::operator[](i);
1330  Refs[N] = &N;
1331  }
1332  }
1333  void erase(SDNode *N) {
1334  auto F = Refs.find(N);
1335  if (F != Refs.end())
1336  *F->second = nullptr;
1337  }
1338  };
1339 }
1340 
1341 void HvxSelector::select(SDNode *ISelN) {
1342  // What's important here is to select the right set of nodes. The main
1343  // selection algorithm loops over nodes in a topological order, i.e. users
1344  // are visited before their operands.
1345  //
1346  // It is an error to have an unselected node with a selected operand, and
1347  // there is an assertion in the main selector code to enforce that.
1348  //
1349  // Such a situation could occur if we selected a node, which is both a
1350  // subnode of ISelN, and a subnode of an unrelated (and yet unselected)
1351  // node in the DAG.
1352  assert(ISelN->getOpcode() == HexagonISD::ISEL);
1353  SDNode *N0 = ISelN->getOperand(0).getNode();
1354  if (N0->isMachineOpcode()) {
1355  ISel.ReplaceNode(ISelN, N0);
1356  return;
1357  }
1358 
1359  // There could have been nodes created (i.e. inserted into the DAG)
1360  // that are now dead. Remove them, in case they use any of the nodes
1361  // to select (and make them look shared).
1362  DAG.RemoveDeadNodes();
1363 
1364  SetVector<SDNode*> SubNodes, TmpQ;
1365  std::map<SDNode*,unsigned> NumOps;
1366 
1367  // Don't want to select N0 if it's shared with another node, except if
1368  // it's shared with other ISELs.
1369  auto IsISelN = [](SDNode *T) { return T->getOpcode() == HexagonISD::ISEL; };
1370  if (llvm::all_of(N0->uses(), IsISelN))
1371  SubNodes.insert(N0);
1372 
1373  auto InSubNodes = [&SubNodes](SDNode *T) { return SubNodes.count(T); };
1374  for (unsigned I = 0; I != SubNodes.size(); ++I) {
1375  SDNode *S = SubNodes[I];
1376  unsigned OpN = 0;
1377  // Only add subnodes that are only reachable from N0.
1378  for (SDValue Op : S->ops()) {
1379  SDNode *O = Op.getNode();
1380  if (llvm::all_of(O->uses(), InSubNodes)) {
1381  SubNodes.insert(O);
1382  ++OpN;
1383  }
1384  }
1385  NumOps.insert({S, OpN});
1386  if (OpN == 0)
1387  TmpQ.insert(S);
1388  }
1389 
1390  for (unsigned I = 0; I != TmpQ.size(); ++I) {
1391  SDNode *S = TmpQ[I];
1392  for (SDNode *U : S->uses()) {
1393  if (U == ISelN)
1394  continue;
1395  auto F = NumOps.find(U);
1396  assert(F != NumOps.end());
1397  if (F->second > 0 && !--F->second)
1398  TmpQ.insert(F->first);
1399  }
1400  }
1401 
1402  // Remove the marker.
1403  ISel.ReplaceNode(ISelN, N0);
1404 
1405  assert(SubNodes.size() == TmpQ.size());
1406  NullifyingVector<decltype(TmpQ)::vector_type> Queue(TmpQ.takeVector());
1407 
1408  Deleter DUQ(DAG, Queue);
1409  for (SDNode *S : reverse(Queue)) {
1410  if (S == nullptr)
1411  continue;
1412  DEBUG_WITH_TYPE("isel", {dbgs() << "HVX selecting: "; S->dump(&DAG);});
1413  ISel.Select(S);
1414  }
1415 }
1416 
1417 bool HvxSelector::scalarizeShuffle(ArrayRef<int> Mask, const SDLoc &dl,
1418  MVT ResTy, SDValue Va, SDValue Vb,
1419  SDNode *N) {
1420  DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1421  MVT ElemTy = ResTy.getVectorElementType();
1422  assert(ElemTy == MVT::i8);
1423  unsigned VecLen = Mask.size();
1424  bool HavePairs = (2*HwLen == VecLen);
1425  MVT SingleTy = getSingleVT(MVT::i8);
1426 
1427  // The prior attempts to handle this shuffle may have left a bunch of
1428  // dead nodes in the DAG (such as constants). These nodes will be added
1429  // at the end of DAG's node list, which at that point had already been
1430  // sorted topologically. In the main selection loop, the node list is
1431  // traversed backwards from the root node, which means that any new
1432  // nodes (from the end of the list) will not be visited.
1433  // Scalarization will replace the shuffle node with the scalarized
1434  // expression, and if that expression reused any if the leftoever (dead)
1435  // nodes, these nodes would not be selected (since the "local" selection
1436  // only visits nodes that are not in AllNodes).
1437  // To avoid this issue, remove all dead nodes from the DAG now.
1438 // DAG.RemoveDeadNodes();
1439 
1441  LLVMContext &Ctx = *DAG.getContext();
1442  MVT LegalTy = Lower.getTypeToTransformTo(Ctx, ElemTy).getSimpleVT();
1443  for (int I : Mask) {
1444  if (I < 0) {
1445  Ops.push_back(ISel.selectUndef(dl, LegalTy));
1446  continue;
1447  }
1448  SDValue Vec;
1449  unsigned M = I;
1450  if (M < VecLen) {
1451  Vec = Va;
1452  } else {
1453  Vec = Vb;
1454  M -= VecLen;
1455  }
1456  if (HavePairs) {
1457  if (M < HwLen) {
1458  Vec = DAG.getTargetExtractSubreg(Hexagon::vsub_lo, dl, SingleTy, Vec);
1459  } else {
1460  Vec = DAG.getTargetExtractSubreg(Hexagon::vsub_hi, dl, SingleTy, Vec);
1461  M -= HwLen;
1462  }
1463  }
1464  SDValue Idx = DAG.getConstant(M, dl, MVT::i32);
1465  SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, LegalTy, {Vec, Idx});
1466  SDValue L = Lower.LowerOperation(Ex, DAG);
1467  assert(L.getNode());
1468  Ops.push_back(L);
1469  }
1470 
1471  SDValue LV;
1472  if (2*HwLen == VecLen) {
1473  SDValue B0 = DAG.getBuildVector(SingleTy, dl, {Ops.data(), HwLen});
1474  SDValue L0 = Lower.LowerOperation(B0, DAG);
1475  SDValue B1 = DAG.getBuildVector(SingleTy, dl, {Ops.data()+HwLen, HwLen});
1477  // XXX CONCAT_VECTORS is legal for HVX vectors. Legalizing (lowering)
1478  // functions may expect to be called only for illegal operations, so
1479  // make sure that they are not called for legal ones. Develop a better
1480  // mechanism for dealing with this.
1481  LV = DAG.getNode(ISD::CONCAT_VECTORS, dl, ResTy, {L0, L1});
1482  } else {
1483  SDValue BV = DAG.getBuildVector(ResTy, dl, Ops);
1484  LV = Lower.LowerOperation(BV, DAG);
1485  }
1486 
1487  assert(!N->use_empty());
1488  SDValue IS = DAG.getNode(HexagonISD::ISEL, dl, ResTy, LV);
1489  ISel.ReplaceNode(N, IS.getNode());
1490  select(IS.getNode());
1491  DAG.RemoveDeadNodes();
1492  return true;
1493 }
1494 
1495 OpRef HvxSelector::contracting(ShuffleMask SM, OpRef Va, OpRef Vb,
1496  ResultStack &Results) {
1497  DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1498  if (!Va.isValid() || !Vb.isValid())
1499  return OpRef::fail();
1500 
1501  // Contracting shuffles, i.e. instructions that always discard some bytes
1502  // from the operand vectors.
1503  //
1504  // V6_vshuff{e,o}b
1505  // V6_vdealb4w
1506  // V6_vpack{e,o}{b,h}
1507 
1508  int VecLen = SM.Mask.size();
1509  std::pair<int,unsigned> Strip = findStrip(SM.Mask, 1, VecLen);
1510  MVT ResTy = getSingleVT(MVT::i8);
1511 
1512  // The following shuffles only work for bytes and halfwords. This requires
1513  // the strip length to be 1 or 2.
1514  if (Strip.second != 1 && Strip.second != 2)
1515  return OpRef::fail();
1516 
1517  // The patterns for the shuffles, in terms of the starting offsets of the
1518  // consecutive strips (L = length of the strip, N = VecLen):
1519  //
1520  // vpacke: 0, 2L, 4L ... N+0, N+2L, N+4L ... L = 1 or 2
1521  // vpacko: L, 3L, 5L ... N+L, N+3L, N+5L ... L = 1 or 2
1522  //
1523  // vshuffe: 0, N+0, 2L, N+2L, 4L ... L = 1 or 2
1524  // vshuffo: L, N+L, 3L, N+3L, 5L ... L = 1 or 2
1525  //
1526  // vdealb4w: 0, 4, 8 ... 2, 6, 10 ... N+0, N+4, N+8 ... N+2, N+6, N+10 ...
1527 
1528  // The value of the element in the mask following the strip will decide
1529  // what kind of a shuffle this can be.
1530  int NextInMask = SM.Mask[Strip.second];
1531 
1532  // Check if NextInMask could be 2L, 3L or 4, i.e. if it could be a mask
1533  // for vpack or vdealb4w. VecLen > 4, so NextInMask for vdealb4w would
1534  // satisfy this.
1535  if (NextInMask < VecLen) {
1536  // vpack{e,o} or vdealb4w
1537  if (Strip.first == 0 && Strip.second == 1 && NextInMask == 4) {
1538  int N = VecLen;
1539  // Check if this is vdealb4w (L=1).
1540  for (int I = 0; I != N/4; ++I)
1541  if (SM.Mask[I] != 4*I)
1542  return OpRef::fail();
1543  for (int I = 0; I != N/4; ++I)
1544  if (SM.Mask[I+N/4] != 2 + 4*I)
1545  return OpRef::fail();
1546  for (int I = 0; I != N/4; ++I)
1547  if (SM.Mask[I+N/2] != N + 4*I)
1548  return OpRef::fail();
1549  for (int I = 0; I != N/4; ++I)
1550  if (SM.Mask[I+3*N/4] != N+2 + 4*I)
1551  return OpRef::fail();
1552  // Matched mask for vdealb4w.
1553  Results.push(Hexagon::V6_vdealb4w, ResTy, {Vb, Va});
1554  return OpRef::res(Results.top());
1555  }
1556 
1557  // Check if this is vpack{e,o}.
1558  int N = VecLen;
1559  int L = Strip.second;
1560  // Check if the first strip starts at 0 or at L.
1561  if (Strip.first != 0 && Strip.first != L)
1562  return OpRef::fail();
1563  // Examine the rest of the mask.
1564  for (int I = L; I < N; I += L) {
1565  auto S = findStrip(SM.Mask.drop_front(I), 1, N-I);
1566  // Check whether the mask element at the beginning of each strip
1567  // increases by 2L each time.
1568  if (S.first - Strip.first != 2*I)
1569  return OpRef::fail();
1570  // Check whether each strip is of the same length.
1571  if (S.second != unsigned(L))
1572  return OpRef::fail();
1573  }
1574 
1575  // Strip.first == 0 => vpacke
1576  // Strip.first == L => vpacko
1577  assert(Strip.first == 0 || Strip.first == L);
1578  using namespace Hexagon;
1579  NodeTemplate Res;
1580  Res.Opc = Strip.second == 1 // Number of bytes.
1581  ? (Strip.first == 0 ? V6_vpackeb : V6_vpackob)
1582  : (Strip.first == 0 ? V6_vpackeh : V6_vpackoh);
1583  Res.Ty = ResTy;
1584  Res.Ops = { Vb, Va };
1585  Results.push(Res);
1586  return OpRef::res(Results.top());
1587  }
1588 
1589  // Check if this is vshuff{e,o}.
1590  int N = VecLen;
1591  int L = Strip.second;
1592  std::pair<int,unsigned> PrevS = Strip;
1593  bool Flip = false;
1594  for (int I = L; I < N; I += L) {
1595  auto S = findStrip(SM.Mask.drop_front(I), 1, N-I);
1596  if (S.second != PrevS.second)
1597  return OpRef::fail();
1598  int Diff = Flip ? PrevS.first - S.first + 2*L
1599  : S.first - PrevS.first;
1600  if (Diff != N)
1601  return OpRef::fail();
1602  Flip ^= true;
1603  PrevS = S;
1604  }
1605  // Strip.first == 0 => vshuffe
1606  // Strip.first == L => vshuffo
1607  assert(Strip.first == 0 || Strip.first == L);
1608  using namespace Hexagon;
1609  NodeTemplate Res;
1610  Res.Opc = Strip.second == 1 // Number of bytes.
1611  ? (Strip.first == 0 ? V6_vshuffeb : V6_vshuffob)
1612  : (Strip.first == 0 ? V6_vshufeh : V6_vshufoh);
1613  Res.Ty = ResTy;
1614  Res.Ops = { Vb, Va };
1615  Results.push(Res);
1616  return OpRef::res(Results.top());
1617 }
1618 
1619 OpRef HvxSelector::expanding(ShuffleMask SM, OpRef Va, ResultStack &Results) {
1620  DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1621  // Expanding shuffles (using all elements and inserting into larger vector):
1622  //
1623  // V6_vunpacku{b,h} [*]
1624  //
1625  // [*] Only if the upper elements (filled with 0s) are "don't care" in Mask.
1626  //
1627  // Note: V6_vunpacko{b,h} are or-ing the high byte/half in the result, so
1628  // they are not shuffles.
1629  //
1630  // The argument is a single vector.
1631 
1632  int VecLen = SM.Mask.size();
1633  assert(2*HwLen == unsigned(VecLen) && "Expecting vector-pair type");
1634 
1635  std::pair<int,unsigned> Strip = findStrip(SM.Mask, 1, VecLen);
1636 
1637  // The patterns for the unpacks, in terms of the starting offsets of the
1638  // consecutive strips (L = length of the strip, N = VecLen):
1639  //
1640  // vunpacku: 0, -1, L, -1, 2L, -1 ...
1641 
1642  if (Strip.first != 0)
1643  return OpRef::fail();
1644 
1645  // The vunpackus only handle byte and half-word.
1646  if (Strip.second != 1 && Strip.second != 2)
1647  return OpRef::fail();
1648 
1649  int N = VecLen;
1650  int L = Strip.second;
1651 
1652  // First, check the non-ignored strips.
1653  for (int I = 2*L; I < 2*N; I += 2*L) {
1654  auto S = findStrip(SM.Mask.drop_front(I), 1, N-I);
1655  if (S.second != unsigned(L))
1656  return OpRef::fail();
1657  if (2*S.first != I)
1658  return OpRef::fail();
1659  }
1660  // Check the -1s.
1661  for (int I = L; I < 2*N; I += 2*L) {
1662  auto S = findStrip(SM.Mask.drop_front(I), 0, N-I);
1663  if (S.first != -1 || S.second != unsigned(L))
1664  return OpRef::fail();
1665  }
1666 
1667  unsigned Opc = Strip.second == 1 ? Hexagon::V6_vunpackub
1668  : Hexagon::V6_vunpackuh;
1669  Results.push(Opc, getPairVT(MVT::i8), {Va});
1670  return OpRef::res(Results.top());
1671 }
1672 
1673 OpRef HvxSelector::perfect(ShuffleMask SM, OpRef Va, ResultStack &Results) {
1674  DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1675  // V6_vdeal{b,h}
1676  // V6_vshuff{b,h}
1677 
1678  // V6_vshufoe{b,h} those are quivalent to vshuffvdd(..,{1,2})
1679  // V6_vshuffvdd (V6_vshuff)
1680  // V6_dealvdd (V6_vdeal)
1681 
1682  int VecLen = SM.Mask.size();
1683  assert(isPowerOf2_32(VecLen) && Log2_32(VecLen) <= 8);
1684  unsigned LogLen = Log2_32(VecLen);
1685  unsigned HwLog = Log2_32(HwLen);
1686  // The result length must be the same as the length of a single vector,
1687  // or a vector pair.
1688  assert(LogLen == HwLog || LogLen == HwLog+1);
1689  bool HavePairs = LogLen == HwLog+1;
1690 
1691  if (!isPermutation(SM.Mask))
1692  return OpRef::fail();
1693 
1694  SmallVector<unsigned,8> Perm(LogLen);
1695 
1696  // Check if this could be a perfect shuffle, or a combination of perfect
1697  // shuffles.
1698  //
1699  // Consider this permutation (using hex digits to make the ASCII diagrams
1700  // easier to read):
1701  // { 0, 8, 1, 9, 2, A, 3, B, 4, C, 5, D, 6, E, 7, F }.
1702  // This is a "deal" operation: divide the input into two halves, and
1703  // create the output by picking elements by alternating between these two
1704  // halves:
1705  // 0 1 2 3 4 5 6 7 --> 0 8 1 9 2 A 3 B 4 C 5 D 6 E 7 F [*]
1706  // 8 9 A B C D E F
1707  //
1708  // Aside from a few special explicit cases (V6_vdealb, etc.), HVX provides
1709  // a somwehat different mechanism that could be used to perform shuffle/
1710  // deal operations: a 2x2 transpose.
1711  // Consider the halves of inputs again, they can be interpreted as a 2x8
1712  // matrix. A 2x8 matrix can be looked at four 2x2 matrices concatenated
1713  // together. Now, when considering 2 elements at a time, it will be a 2x4
1714  // matrix (with elements 01, 23, 45, etc.), or two 2x2 matrices:
1715  // 01 23 45 67
1716  // 89 AB CD EF
1717  // With groups of 4, this will become a single 2x2 matrix, and so on.
1718  //
1719  // The 2x2 transpose instruction works by transposing each of the 2x2
1720  // matrices (or "sub-matrices"), given a specific group size. For example,
1721  // if the group size is 1 (i.e. each element is its own group), there
1722  // will be four transposes of the four 2x2 matrices that form the 2x8.
1723  // For example, with the inputs as above, the result will be:
1724  // 0 8 2 A 4 C 6 E
1725  // 1 9 3 B 5 D 7 F
1726  // Now, this result can be tranposed again, but with the group size of 2:
1727  // 08 19 4C 5D
1728  // 2A 3B 6E 7F
1729  // If we then transpose that result, but with the group size of 4, we get:
1730  // 0819 2A3B
1731  // 4C5D 6E7F
1732  // If we concatenate these two rows, it will be
1733  // 0 8 1 9 2 A 3 B 4 C 5 D 6 E 7 F
1734  // which is the same as the "deal" [*] above.
1735  //
1736  // In general, a "deal" of individual elements is a series of 2x2 transposes,
1737  // with changing group size. HVX has two instructions:
1738  // Vdd = V6_vdealvdd Vu, Vv, Rt
1739  // Vdd = V6_shufvdd Vu, Vv, Rt
1740  // that perform exactly that. The register Rt controls which transposes are
1741  // going to happen: a bit at position n (counting from 0) indicates that a
1742  // transpose with a group size of 2^n will take place. If multiple bits are
1743  // set, multiple transposes will happen: vdealvdd will perform them starting
1744  // with the largest group size, vshuffvdd will do them in the reverse order.
1745  //
1746  // The main observation is that each 2x2 transpose corresponds to swapping
1747  // columns of bits in the binary representation of the values.
1748  //
1749  // The numbers {3,2,1,0} and the log2 of the number of contiguous 1 bits
1750  // in a given column. The * denote the columns that will be swapped.
1751  // The transpose with the group size 2^n corresponds to swapping columns
1752  // 3 (the highest log) and log2(n):
1753  //
1754  // 3 2 1 0 0 2 1 3 0 2 3 1
1755  // * * * * * *
1756  // 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
1757  // 1 0 0 0 1 8 1 0 0 0 8 1 0 0 0 8 1 0 0 0
1758  // 2 0 0 1 0 2 0 0 1 0 1 0 0 0 1 1 0 0 0 1
1759  // 3 0 0 1 1 A 1 0 1 0 9 1 0 0 1 9 1 0 0 1
1760  // 4 0 1 0 0 4 0 1 0 0 4 0 1 0 0 2 0 0 1 0
1761  // 5 0 1 0 1 C 1 1 0 0 C 1 1 0 0 A 1 0 1 0
1762  // 6 0 1 1 0 6 0 1 1 0 5 0 1 0 1 3 0 0 1 1
1763  // 7 0 1 1 1 E 1 1 1 0 D 1 1 0 1 B 1 0 1 1
1764  // 8 1 0 0 0 1 0 0 0 1 2 0 0 1 0 4 0 1 0 0
1765  // 9 1 0 0 1 9 1 0 0 1 A 1 0 1 0 C 1 1 0 0
1766  // A 1 0 1 0 3 0 0 1 1 3 0 0 1 1 5 0 1 0 1
1767  // B 1 0 1 1 B 1 0 1 1 B 1 0 1 1 D 1 1 0 1
1768  // C 1 1 0 0 5 0 1 0 1 6 0 1 1 0 6 0 1 1 0
1769  // D 1 1 0 1 D 1 1 0 1 E 1 1 1 0 E 1 1 1 0
1770  // E 1 1 1 0 7 0 1 1 1 7 0 1 1 1 7 0 1 1 1
1771  // F 1 1 1 1 F 1 1 1 1 F 1 1 1 1 F 1 1 1 1
1772 
1773  // There is one special case that is not a perfect shuffle, but
1774  // can be turned into one easily: when the shuffle operates on
1775  // a vector pair, but the two vectors in the pair are swapped.
1776  // The code below that identifies perfect shuffles will reject
1777  // it, unless the order is reversed.
1778  SmallVector<int,128> MaskStorage(SM.Mask.begin(), SM.Mask.end());
1779  bool InvertedPair = false;
1780  if (HavePairs && SM.Mask[0] >= int(HwLen)) {
1781  for (int i = 0, e = SM.Mask.size(); i != e; ++i) {
1782  int M = SM.Mask[i];
1783  MaskStorage[i] = M >= int(HwLen) ? M-HwLen : M+HwLen;
1784  }
1785  InvertedPair = true;
1786  }
1787  ArrayRef<int> LocalMask(MaskStorage);
1788 
1789  auto XorPow2 = [] (ArrayRef<int> Mask, unsigned Num) {
1790  unsigned X = Mask[0] ^ Mask[Num/2];
1791  // Check that the first half has the X's bits clear.
1792  if ((Mask[0] & X) != 0)
1793  return 0u;
1794  for (unsigned I = 1; I != Num/2; ++I) {
1795  if (unsigned(Mask[I] ^ Mask[I+Num/2]) != X)
1796  return 0u;
1797  if ((Mask[I] & X) != 0)
1798  return 0u;
1799  }
1800  return X;
1801  };
1802 
1803  // Create a vector of log2's for each column: Perm[i] corresponds to
1804  // the i-th bit (lsb is 0).
1805  assert(VecLen > 2);
1806  for (unsigned I = VecLen; I >= 2; I >>= 1) {
1807  // Examine the initial segment of Mask of size I.
1808  unsigned X = XorPow2(LocalMask, I);
1809  if (!isPowerOf2_32(X))
1810  return OpRef::fail();
1811  // Check the other segments of Mask.
1812  for (int J = I; J < VecLen; J += I) {
1813  if (XorPow2(LocalMask.slice(J, I), I) != X)
1814  return OpRef::fail();
1815  }
1816  Perm[Log2_32(X)] = Log2_32(I)-1;
1817  }
1818 
1819  // Once we have Perm, represent it as cycles. Denote the maximum log2
1820  // (equal to log2(VecLen)-1) as M. The cycle containing M can then be
1821  // written as (M a1 a2 a3 ... an). That cycle can be broken up into
1822  // simple swaps as (M a1)(M a2)(M a3)...(M an), with the composition
1823  // order being from left to right. Any (contiguous) segment where the
1824  // values ai, ai+1...aj are either all increasing or all decreasing,
1825  // can be implemented via a single vshuffvdd/vdealvdd respectively.
1826  //
1827  // If there is a cycle (a1 a2 ... an) that does not involve M, it can
1828  // be written as (M an)(a1 a2 ... an)(M a1). The first two cycles can
1829  // then be folded to get (M a1 a2 ... an)(M a1), and the above procedure
1830  // can be used to generate a sequence of vshuffvdd/vdealvdd.
1831  //
1832  // Example:
1833  // Assume M = 4 and consider a permutation (0 1)(2 3). It can be written
1834  // as (4 0 1)(4 0) composed with (4 2 3)(4 2), or simply
1835  // (4 0 1)(4 0)(4 2 3)(4 2).
1836  // It can then be expanded into swaps as
1837  // (4 0)(4 1)(4 0)(4 2)(4 3)(4 2),
1838  // and broken up into "increasing" segments as
1839  // [(4 0)(4 1)] [(4 0)(4 2)(4 3)] [(4 2)].
1840  // This is equivalent to
1841  // (4 0 1)(4 0 2 3)(4 2),
1842  // which can be implemented as 3 vshufvdd instructions.
1843 
1844  using CycleType = SmallVector<unsigned,8>;
1845  std::set<CycleType> Cycles;
1846  std::set<unsigned> All;
1847 
1848  for (unsigned I : Perm)
1849  All.insert(I);
1850 
1851  // If the cycle contains LogLen-1, move it to the front of the cycle.
1852  // Otherwise, return the cycle unchanged.
1853  auto canonicalize = [LogLen](const CycleType &C) -> CycleType {
1854  unsigned LogPos, N = C.size();
1855  for (LogPos = 0; LogPos != N; ++LogPos)
1856  if (C[LogPos] == LogLen-1)
1857  break;
1858  if (LogPos == N)
1859  return C;
1860 
1861  CycleType NewC(C.begin()+LogPos, C.end());
1862  NewC.append(C.begin(), C.begin()+LogPos);
1863  return NewC;
1864  };
1865 
1866  auto pfs = [](const std::set<CycleType> &Cs, unsigned Len) {
1867  // Ordering: shuff: 5 0 1 2 3 4, deal: 5 4 3 2 1 0 (for Log=6),
1868  // for bytes zero is included, for halfwords is not.
1869  if (Cs.size() != 1)
1870  return 0u;
1871  const CycleType &C = *Cs.begin();
1872  if (C[0] != Len-1)
1873  return 0u;
1874  int D = Len - C.size();
1875  if (D != 0 && D != 1)
1876  return 0u;
1877 
1878  bool IsDeal = true, IsShuff = true;
1879  for (unsigned I = 1; I != Len-D; ++I) {
1880  if (C[I] != Len-1-I)
1881  IsDeal = false;
1882  if (C[I] != I-(1-D)) // I-1, I
1883  IsShuff = false;
1884  }
1885  // At most one, IsDeal or IsShuff, can be non-zero.
1886  assert(!(IsDeal || IsShuff) || IsDeal != IsShuff);
1887  static unsigned Deals[] = { Hexagon::V6_vdealb, Hexagon::V6_vdealh };
1888  static unsigned Shufs[] = { Hexagon::V6_vshuffb, Hexagon::V6_vshuffh };
1889  return IsDeal ? Deals[D] : (IsShuff ? Shufs[D] : 0);
1890  };
1891 
1892  while (!All.empty()) {
1893  unsigned A = *All.begin();
1894  All.erase(A);
1895  CycleType C;
1896  C.push_back(A);
1897  for (unsigned B = Perm[A]; B != A; B = Perm[B]) {
1898  C.push_back(B);
1899  All.erase(B);
1900  }
1901  if (C.size() <= 1)
1902  continue;
1903  Cycles.insert(canonicalize(C));
1904  }
1905 
1906  MVT SingleTy = getSingleVT(MVT::i8);
1908 
1909  // Recognize patterns for V6_vdeal{b,h} and V6_vshuff{b,h}.
1910  if (unsigned(VecLen) == HwLen) {
1911  if (unsigned SingleOpc = pfs(Cycles, LogLen)) {
1912  Results.push(SingleOpc, SingleTy, {Va});
1913  return OpRef::res(Results.top());
1914  }
1915  }
1916 
1917  // From the cycles, construct the sequence of values that will
1918  // then form the control values for vdealvdd/vshuffvdd, i.e.
1919  // (M a1 a2)(M a3 a4 a5)... -> a1 a2 a3 a4 a5
1920  // This essentially strips the M value from the cycles where
1921  // it's present, and performs the insertion of M (then stripping)
1922  // for cycles without M (as described in an earlier comment).
1923  SmallVector<unsigned,8> SwapElems;
1924  // When the input is extended (i.e. single vector becomes a pair),
1925  // this is done by using an "undef" vector as the second input.
1926  // However, then we get
1927  // input 1: GOODBITS
1928  // input 2: ........
1929  // but we need
1930  // input 1: ....BITS
1931  // input 2: ....GOOD
1932  // Then at the end, this needs to be undone. To accomplish this,
1933  // artificially add "LogLen-1" at both ends of the sequence.
1934  if (!HavePairs)
1935  SwapElems.push_back(LogLen-1);
1936  for (const CycleType &C : Cycles) {
1937  // Do the transformation: (a1..an) -> (M a1..an)(M a1).
1938  unsigned First = (C[0] == LogLen-1) ? 1 : 0;
1939  SwapElems.append(C.begin()+First, C.end());
1940  if (First == 0)
1941  SwapElems.push_back(C[0]);
1942  }
1943  if (!HavePairs)
1944  SwapElems.push_back(LogLen-1);
1945 
1946  const SDLoc &dl(Results.InpNode);
1947  OpRef Arg = HavePairs ? Va
1948  : concat(Va, OpRef::undef(SingleTy), Results);
1949  if (InvertedPair)
1950  Arg = concat(OpRef::hi(Arg), OpRef::lo(Arg), Results);
1951 
1952  for (unsigned I = 0, E = SwapElems.size(); I != E; ) {
1953  bool IsInc = I == E-1 || SwapElems[I] < SwapElems[I+1];
1954  unsigned S = (1u << SwapElems[I]);
1955  if (I < E-1) {
1956  while (++I < E-1 && IsInc == (SwapElems[I] < SwapElems[I+1]))
1957  S |= 1u << SwapElems[I];
1958  // The above loop will not add a bit for the final SwapElems[I+1],
1959  // so add it here.
1960  S |= 1u << SwapElems[I];
1961  }
1962  ++I;
1963 
1964  NodeTemplate Res;
1965  Results.push(Hexagon::A2_tfrsi, MVT::i32,
1966  { DAG.getTargetConstant(S, dl, MVT::i32) });
1967  Res.Opc = IsInc ? Hexagon::V6_vshuffvdd : Hexagon::V6_vdealvdd;
1968  Res.Ty = PairTy;
1969  Res.Ops = { OpRef::hi(Arg), OpRef::lo(Arg), OpRef::res(-1) };
1970  Results.push(Res);
1971  Arg = OpRef::res(Results.top());
1972  }
1973 
1974  return HavePairs ? Arg : OpRef::lo(Arg);
1975 }
1976 
1977 OpRef HvxSelector::butterfly(ShuffleMask SM, OpRef Va, ResultStack &Results) {
1978  DEBUG_WITH_TYPE("isel", {dbgs() << __func__ << '\n';});
1979  // Butterfly shuffles.
1980  //
1981  // V6_vdelta
1982  // V6_vrdelta
1983  // V6_vror
1984 
1985  // The assumption here is that all elements picked by Mask are in the
1986  // first operand to the vector_shuffle. This assumption is enforced
1987  // by the caller.
1988 
1989  MVT ResTy = getSingleVT(MVT::i8);
1990  PermNetwork::Controls FC, RC;
1991  const SDLoc &dl(Results.InpNode);
1992  int VecLen = SM.Mask.size();
1993 
1994  for (int M : SM.Mask) {
1995  if (M != -1 && M >= VecLen)
1996  return OpRef::fail();
1997  }
1998 
1999  // Try the deltas/benes for both single vectors and vector pairs.
2000  ForwardDeltaNetwork FN(SM.Mask);
2001  if (FN.run(FC)) {
2002  SDValue Ctl = getVectorConstant(FC, dl);
2003  Results.push(Hexagon::V6_vdelta, ResTy, {Va, OpRef(Ctl)});
2004  return OpRef::res(Results.top());
2005  }
2006 
2007  // Try reverse delta.
2008  ReverseDeltaNetwork RN(SM.Mask);
2009  if (RN.run(RC)) {
2010  SDValue Ctl = getVectorConstant(RC, dl);
2011  Results.push(Hexagon::V6_vrdelta, ResTy, {Va, OpRef(Ctl)});
2012  return OpRef::res(Results.top());
2013  }
2014 
2015  // Do Benes.
2016  BenesNetwork BN(SM.Mask);
2017  if (BN.run(FC, RC)) {
2018  SDValue CtlF = getVectorConstant(FC, dl);
2019  SDValue CtlR = getVectorConstant(RC, dl);
2020  Results.push(Hexagon::V6_vdelta, ResTy, {Va, OpRef(CtlF)});
2021  Results.push(Hexagon::V6_vrdelta, ResTy,
2022  {OpRef::res(-1), OpRef(CtlR)});
2023  return OpRef::res(Results.top());
2024  }
2025 
2026  return OpRef::fail();
2027 }
2028 
2029 SDValue HvxSelector::getVectorConstant(ArrayRef<uint8_t> Data,
2030  const SDLoc &dl) {
2032  for (uint8_t C : Data)
2033  Elems.push_back(DAG.getConstant(C, dl, MVT::i8));
2034  MVT VecTy = MVT::getVectorVT(MVT::i8, Data.size());
2035  SDValue BV = DAG.getBuildVector(VecTy, dl, Elems);
2036  SDValue LV = Lower.LowerOperation(BV, DAG);
2037  DAG.RemoveDeadNode(BV.getNode());
2038  return DAG.getNode(HexagonISD::ISEL, dl, VecTy, LV);
2039 }
2040 
2042  DEBUG_WITH_TYPE("isel", {
2043  dbgs() << "Starting " << __func__ << " on node:\n";
2044  N->dump(&DAG);
2045  });
2046  MVT ResTy = N->getValueType(0).getSimpleVT();
2047  // Assume that vector shuffles operate on vectors of bytes.
2048  assert(ResTy.isVector() && ResTy.getVectorElementType() == MVT::i8);
2049 
2050  auto *SN = cast<ShuffleVectorSDNode>(N);
2051  std::vector<int> Mask(SN->getMask().begin(), SN->getMask().end());
2052  // This shouldn't really be necessary. Is it?
2053  for (int &Idx : Mask)
2054  if (Idx != -1 && Idx < 0)
2055  Idx = -1;
2056 
2057  unsigned VecLen = Mask.size();
2058  bool HavePairs = (2*HwLen == VecLen);
2059  assert(ResTy.getSizeInBits() / 8 == VecLen);
2060 
2061  // Vd = vector_shuffle Va, Vb, Mask
2062  //
2063 
2064  bool UseLeft = false, UseRight = false;
2065  for (unsigned I = 0; I != VecLen; ++I) {
2066  if (Mask[I] == -1)
2067  continue;
2068  unsigned Idx = Mask[I];
2069  assert(Idx < 2*VecLen);
2070  if (Idx < VecLen)
2071  UseLeft = true;
2072  else
2073  UseRight = true;
2074  }
2075 
2076  DEBUG_WITH_TYPE("isel", {
2077  dbgs() << "VecLen=" << VecLen << " HwLen=" << HwLen << " UseLeft="
2078  << UseLeft << " UseRight=" << UseRight << " HavePairs="
2079  << HavePairs << '\n';
2080  });
2081  // If the mask is all -1's, generate "undef".
2082  if (!UseLeft && !UseRight) {
2083  ISel.ReplaceNode(N, ISel.selectUndef(SDLoc(SN), ResTy).getNode());
2084  return;
2085  }
2086 
2087  SDValue Vec0 = N->getOperand(0);
2088  SDValue Vec1 = N->getOperand(1);
2089  ResultStack Results(SN);
2090  Results.push(TargetOpcode::COPY, ResTy, {Vec0});
2091  Results.push(TargetOpcode::COPY, ResTy, {Vec1});
2092  OpRef Va = OpRef::res(Results.top()-1);
2093  OpRef Vb = OpRef::res(Results.top());
2094 
2095  OpRef Res = !HavePairs ? shuffs2(ShuffleMask(Mask), Va, Vb, Results)
2096  : shuffp2(ShuffleMask(Mask), Va, Vb, Results);
2097 
2098  bool Done = Res.isValid();
2099  if (Done) {
2100  // Make sure that Res is on the stack before materializing.
2101  Results.push(TargetOpcode::COPY, ResTy, {Res});
2102  materialize(Results);
2103  } else {
2104  Done = scalarizeShuffle(Mask, SDLoc(N), ResTy, Vec0, Vec1, N);
2105  }
2106 
2107  if (!Done) {
2108 #ifndef NDEBUG
2109  dbgs() << "Unhandled shuffle:\n";
2110  SN->dumpr(&DAG);
2111 #endif
2112  llvm_unreachable("Failed to select vector shuffle");
2113  }
2114 }
2115 
2117  // If this is a rotation by less than 8, use V6_valignbi.
2118  MVT Ty = N->getValueType(0).getSimpleVT();
2119  const SDLoc &dl(N);
2120  SDValue VecV = N->getOperand(0);
2121  SDValue RotV = N->getOperand(1);
2122  SDNode *NewN = nullptr;
2123 
2124  if (auto *CN = dyn_cast<ConstantSDNode>(RotV.getNode())) {
2125  unsigned S = CN->getZExtValue() % HST.getVectorLength();
2126  if (S == 0) {
2127  NewN = VecV.getNode();
2128  } else if (isUInt<3>(S)) {
2130  NewN = DAG.getMachineNode(Hexagon::V6_valignbi, dl, Ty,
2131  {VecV, VecV, C});
2132  }
2133  }
2134 
2135  if (!NewN)
2136  NewN = DAG.getMachineNode(Hexagon::V6_vror, dl, Ty, {VecV, RotV});
2137 
2138  ISel.ReplaceNode(N, NewN);
2139 }
2140 
2142  SDValue Vv = N->getOperand(0);
2143  SDValue Vu = N->getOperand(1);
2144  SDValue Rt = N->getOperand(2);
2145  SDNode *NewN = DAG.getMachineNode(Hexagon::V6_valignb, SDLoc(N),
2146  N->getValueType(0), {Vv, Vu, Rt});
2147  ISel.ReplaceNode(N, NewN);
2148  DAG.RemoveDeadNode(N);
2149 }
2150 
2151 void HexagonDAGToDAGISel::SelectHvxShuffle(SDNode *N) {
2152  HvxSelector(*this, *CurDAG).selectShuffle(N);
2153 }
2154 
2155 void HexagonDAGToDAGISel::SelectHvxRor(SDNode *N) {
2156  HvxSelector(*this, *CurDAG).selectRor(N);
2157 }
2158 
2159 void HexagonDAGToDAGISel::SelectHvxVAlign(SDNode *N) {
2160  HvxSelector(*this, *CurDAG).selectVAlign(N);
2161 }
2162 
2164  const SDLoc &dl(N);
2165  SDValue Chain = N->getOperand(0);
2166  SDValue Address = N->getOperand(2);
2167  SDValue Predicate = N->getOperand(3);
2168  SDValue Base = N->getOperand(4);
2169  SDValue Modifier = N->getOperand(5);
2170  SDValue Offset = N->getOperand(6);
2171 
2172  unsigned Opcode;
2173  unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
2174  switch (IntNo) {
2175  default:
2176  llvm_unreachable("Unexpected HVX gather intrinsic.");
2177  case Intrinsic::hexagon_V6_vgathermhq:
2178  case Intrinsic::hexagon_V6_vgathermhq_128B:
2179  Opcode = Hexagon::V6_vgathermhq_pseudo;
2180  break;
2181  case Intrinsic::hexagon_V6_vgathermwq:
2182  case Intrinsic::hexagon_V6_vgathermwq_128B:
2183  Opcode = Hexagon::V6_vgathermwq_pseudo;
2184  break;
2185  case Intrinsic::hexagon_V6_vgathermhwq:
2186  case Intrinsic::hexagon_V6_vgathermhwq_128B:
2187  Opcode = Hexagon::V6_vgathermhwq_pseudo;
2188  break;
2189  }
2190 
2191  SDVTList VTs = CurDAG->getVTList(MVT::Other);
2192  SDValue Ops[] = { Address, Predicate, Base, Modifier, Offset, Chain };
2193  SDNode *Result = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
2194 
2195  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2196  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
2197 
2198  ReplaceNode(N, Result);
2199 }
2200 
2202  const SDLoc &dl(N);
2203  SDValue Chain = N->getOperand(0);
2204  SDValue Address = N->getOperand(2);
2205  SDValue Base = N->getOperand(3);
2206  SDValue Modifier = N->getOperand(4);
2207  SDValue Offset = N->getOperand(5);
2208 
2209  unsigned Opcode;
2210  unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
2211  switch (IntNo) {
2212  default:
2213  llvm_unreachable("Unexpected HVX gather intrinsic.");
2214  case Intrinsic::hexagon_V6_vgathermh:
2215  case Intrinsic::hexagon_V6_vgathermh_128B:
2216  Opcode = Hexagon::V6_vgathermh_pseudo;
2217  break;
2218  case Intrinsic::hexagon_V6_vgathermw:
2219  case Intrinsic::hexagon_V6_vgathermw_128B:
2220  Opcode = Hexagon::V6_vgathermw_pseudo;
2221  break;
2222  case Intrinsic::hexagon_V6_vgathermhw:
2223  case Intrinsic::hexagon_V6_vgathermhw_128B:
2224  Opcode = Hexagon::V6_vgathermhw_pseudo;
2225  break;
2226  }
2227 
2228  SDVTList VTs = CurDAG->getVTList(MVT::Other);
2229  SDValue Ops[] = { Address, Base, Modifier, Offset, Chain };
2230  SDNode *Result = CurDAG->getMachineNode(Opcode, dl, VTs, Ops);
2231 
2232  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2233  CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
2234 
2235  ReplaceNode(N, Result);
2236 }
2237 
2239  unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
2240  SDNode *Result;
2241  switch (IID) {
2242  case Intrinsic::hexagon_V6_vaddcarry: {
2243  std::array<SDValue, 3> Ops = {
2244  {N->getOperand(1), N->getOperand(2), N->getOperand(3)}};
2245  SDVTList VTs = CurDAG->getVTList(MVT::v16i32, MVT::v64i1);
2246  Result = CurDAG->getMachineNode(Hexagon::V6_vaddcarry, SDLoc(N), VTs, Ops);
2247  break;
2248  }
2249  case Intrinsic::hexagon_V6_vaddcarry_128B: {
2250  std::array<SDValue, 3> Ops = {
2251  {N->getOperand(1), N->getOperand(2), N->getOperand(3)}};
2252  SDVTList VTs = CurDAG->getVTList(MVT::v32i32, MVT::v128i1);
2253  Result = CurDAG->getMachineNode(Hexagon::V6_vaddcarry, SDLoc(N), VTs, Ops);
2254  break;
2255  }
2256  case Intrinsic::hexagon_V6_vsubcarry: {
2257  std::array<SDValue, 3> Ops = {
2258  {N->getOperand(1), N->getOperand(2), N->getOperand(3)}};
2259  SDVTList VTs = CurDAG->getVTList(MVT::v16i32, MVT::v64i1);
2260  Result = CurDAG->getMachineNode(Hexagon::V6_vsubcarry, SDLoc(N), VTs, Ops);
2261  break;
2262  }
2263  case Intrinsic::hexagon_V6_vsubcarry_128B: {
2264  std::array<SDValue, 3> Ops = {
2265  {N->getOperand(1), N->getOperand(2), N->getOperand(3)}};
2266  SDVTList VTs = CurDAG->getVTList(MVT::v32i32, MVT::v128i1);
2267  Result = CurDAG->getMachineNode(Hexagon::V6_vsubcarry, SDLoc(N), VTs, Ops);
2268  break;
2269  }
2270  default:
2271  llvm_unreachable("Unexpected HVX dual output intrinsic.");
2272  }
2273  ReplaceUses(N, Result);
2274  ReplaceUses(SDValue(N, 0), SDValue(Result, 0));
2275  ReplaceUses(SDValue(N, 1), SDValue(Result, 1));
2276  CurDAG->RemoveDeadNode(N);
2277 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
i
i
Definition: README.txt:29
llvm::HexagonSubtarget::getVectorLength
unsigned getVectorLength() const
Definition: HexagonSubtarget.h:273
llvm::MVT::getVectorElementType
MVT getVectorElementType() const
Definition: MachineValueType.h:501
B1
LLVM_DUMP_METHOD
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:499
llvm
Definition: AllocatorList.h:23
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::MVT::v128i1
@ v128i1
Definition: MachineValueType.h:68
Pass
print lazy value Lazy Value Info Printer Pass
Definition: LazyValueInfo.cpp:1947
llvm::SDLoc
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Definition: SelectionDAGNodes.h:1078
llvm::HvxSelector::Lower
const HexagonTargetLowering & Lower
Definition: HexagonISelDAGToDAGHVX.cpp:812
print
static void print(raw_ostream &Out, object::Archive::Kind Kind, T Val)
Definition: ArchiveWriter.cpp:147
llvm::NodeSet::end
iterator end()
Definition: MachinePipeliner.h:422
llvm::Function::empty
bool empty() const
Definition: Function.h:784
llvm::SDValue::getNode
SDNode * getNode() const
get the SDNode which holds the desired result
Definition: SelectionDAGNodes.h:152
llvm::Function::print
void print(raw_ostream &OS, AssemblyAnnotationWriter *AAW=nullptr, bool ShouldPreserveUseListOrder=false, bool IsForDebug=false) const
Print the function to an output stream with an optional AssemblyAnnotationWriter.
Definition: AsmWriter.cpp:4509
llvm::ISD::CONCAT_VECTORS
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:513
P
This currently compiles esp xmm0 movsd esp eax eax esp ret We should use not the dag combiner This is because dagcombine2 needs to be able to see through the X86ISD::Wrapper which DAGCombine can t really do The code for turning x load into a single vector load is target independent and should be moved to the dag combiner The code for turning x load into a vector load can only handle a direct load from a global or a direct load from the stack It should be generalized to handle any load from P
Definition: README-SSE.txt:411
isIdentity
static bool isIdentity(ArrayRef< int > Mask)
Definition: HexagonISelDAGToDAGHVX.cpp:911
getHexagonLowering
static const HexagonTargetLowering & getHexagonLowering(SelectionDAG &G)
Definition: HexagonISelDAGToDAGHVX.cpp:803
llvm::SetVector::size
size_type size() const
Determine the number of elements in the SetVector.
Definition: SetVector.h:77
llvm::HvxSelector::HwLen
const unsigned HwLen
Definition: HexagonISelDAGToDAGHVX.cpp:816
Ignore
ReachingDefAnalysis InstSet InstSet & Ignore
Definition: ARMLowOverheadLoops.cpp:540
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::RISCVFenceField::W
@ W
Definition: RISCVBaseInfo.h:134
llvm::HexagonTargetLowering::LowerOperation
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
Definition: HexagonISelLowering.cpp:3075
llvm::HvxSelector::selectRor
void selectRor(SDNode *N)
Definition: HexagonISelDAGToDAGHVX.cpp:2116
PairTy
std::pair< MCSymbol *, MachineModuleInfoImpl::StubValueTy > PairTy
Definition: MachineModuleInfoImpls.cpp:29
llvm::MipsISD::Lo
@ Lo
Definition: MipsISelLowering.h:79
llvm::HvxSelector::selectShuffle
void selectShuffle(SDNode *N)
Definition: HexagonISelDAGToDAGHVX.cpp:2041
R600_InstFlag::FC
@ FC
Definition: R600Defines.h:32
push
static void push(SmallVectorImpl< uint64_t > &R, StringRef Str)
Definition: BitstreamRemarkSerializer.cpp:23
llvm::SDNode
Represents one node in the SelectionDAG.
Definition: SelectionDAGNodes.h:455
llvm::MachO::Invalid
@ Invalid
Invalid file type.
Definition: InterfaceFile.h:59
llvm::MemOp
Definition: TargetLowering.h:109
llvm::coro::ABI::Switch
@ Switch
The "resume-switch" lowering, where there are separate resume and destroy functions that are shared b...
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:329
llvm::sys::path::end
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:233
llvm::MachineMemOperand
A description of a memory reference used in the backend.
Definition: MachineMemOperand.h:127
llvm::sys::path::begin
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:224
T
#define T
Definition: Mips16ISelLowering.cpp:341
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:81
Results
Function Alias Analysis Results
Definition: AliasAnalysis.cpp:851
llvm::HexagonDAGToDAGISel
Definition: HexagonISelDAGToDAG.h:29
llvm::SelectionDAG::RemoveDeadNodes
void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
Definition: SelectionDAG.cpp:801
llvm::dump
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
Definition: SparseBitVector.h:876
HexagonTargetMachine.h
llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:491
getHexagonSubtarget
static const HexagonSubtarget & getHexagonSubtarget(SelectionDAG &G)
Definition: HexagonISelDAGToDAGHVX.cpp:806
LLVM_ATTRIBUTE_UNUSED
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:188
BaseType
llvm::BitmaskEnumDetail::Mask
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
Coloring
Stack Slot Coloring
Definition: StackSlotColoring.cpp:140
llvm::Data
@ Data
Definition: SIMachineScheduler.h:56
llvm::SelectionDAG::getContext
LLVMContext * getContext() const
Definition: SelectionDAG.h:447
llvm::AMDGPU::HSAMD::ValueKind::Queue
@ Queue
llvm::ArrayRef::data
const T * data() const
Definition: ArrayRef.h:160
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:133
llvm::MipsISD::Hi
@ Hi
Definition: MipsISelLowering.h:75
isUndef
static bool isUndef(ArrayRef< int > Mask)
Definition: HexagonISelDAGToDAGHVX.cpp:904
HexagonISelDAGToDAG.h
llvm::MVT::SimpleValueType
SimpleValueType
Definition: MachineValueType.h:32
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:205
CommandLine.h
llvm::MVT::i1
@ i1
Definition: MachineValueType.h:40
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1482
llvm::SDNode::getOpcode
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
Definition: SelectionDAGNodes.h:621
DEBUG_WITH_TYPE
#define DEBUG_WITH_TYPE(TYPE, X)
DEBUG_WITH_TYPE macro - This macro should be used by passes to emit debug information.
Definition: Debug.h:64
llvm::TargetInstrInfo
TargetInstrInfo - Interface to description of machine instruction set.
Definition: TargetInstrInfo.h:97
llvm::MutableArrayRef
MutableArrayRef - Represent a mutable reference to an array (0 or more elements consecutively in memo...
Definition: ArrayRef.h:305
llvm::SelectionDAG
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:216
f
Itanium Name Demangler i e convert the string _Z1fv into f()". You can also use the CRTP base ManglingParser to perform some simple analysis on the mangled name
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::SmallVectorImpl::append
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:648
llvm::HexagonISD::ISEL
@ ISEL
Definition: HexagonISelLowering.h:95
llvm::EVT
Extended Value Type.
Definition: ValueTypes.h:35
Intrinsics.h
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::ARM_PROC::A
@ A
Definition: ARMBaseInfo.h:34
llvm::SelectionDAG::getConstant
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
Definition: SelectionDAG.cpp:1347
int
Clang compiles this i1 i64 store i64 i64 store i64 i64 store i64 i64 store i64 align Which gets codegen d xmm0 movaps rbp movaps rbp movaps rbp movaps rbp rbp rbp rbp rbp It would be better to have movq s of instead of the movaps s LLVM produces ret int
Definition: README.txt:536
llvm::MVT::SimpleTy
SimpleValueType SimpleTy
Definition: MachineValueType.h:304
llvm::MVT::v64i1
@ v64i1
Definition: MachineValueType.h:67
llvm::AArch64CC::HS
@ HS
Definition: AArch64BaseInfo.h:238
llvm::SelectionDAGISel::ReplaceNode
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
Definition: SelectionDAGISel.h:227
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:129
llvm::Log2_32
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:596
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
First
into llvm powi allowing the code generator to produce balanced multiplication trees First
Definition: README.txt:54
llvm::SDNode::uses
iterator_range< use_iterator > uses()
Definition: SelectionDAGNodes.h:781
llvm::SelectionDAG::DAGNodeDeletedListener
Definition: SelectionDAG.h:314
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:50
llvm::operator<<
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:230
llvm::BitVector
Definition: BitVector.h:74
llvm::NodeSet
A NodeSet contains a set of SUnit DAG nodes with additional information that assigns a priority to th...
Definition: MachinePipeliner.h:311
llvm::None
const NoneType None
Definition: None.h:23
llvm::TargetLoweringBase::getTypeToTransformTo
EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const
For types supported by the target, this is an identity function.
Definition: TargetLowering.h:929
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Hexagon.h
llvm::HvxSelector
Definition: HexagonISelDAGToDAGHVX.cpp:811
llvm::NodeSet::begin
iterator begin()
Definition: MachinePipeliner.h:421
G
const DataFlowGraph & G
Definition: RDFGraph.cpp:202
llvm::RISCVFenceField::O
@ O
Definition: RISCVBaseInfo.h:132
llvm::PPC::Predicate
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
llvm::SignExtend32
constexpr int32_t SignExtend32(uint32_t X)
Sign-extend the number in the bottom B bits of X to a 32-bit integer.
Definition: MathExtras.h:761
llvm::SelectionDAG::RemoveDeadNode
void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
Definition: SelectionDAG.cpp:855
llvm::countPopulation
unsigned countPopulation(T Value)
Count the number of set bits in a value.
Definition: MathExtras.h:567
llvm::ArrayRef::drop_front
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
Definition: ArrayRef.h:202
Index
uint32_t Index
Definition: ELFObjHandler.cpp:84
llvm::HexagonDAGToDAGISel::SelectV65GatherPred
void SelectV65GatherPred(SDNode *N)
Definition: HexagonISelDAGToDAGHVX.cpp:2163
D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::DenseMap
Definition: DenseMap.h:714
llvm::ISD::EXTRACT_VECTOR_ELT
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:505
fail
static void fail(const SDLoc &DL, SelectionDAG &DAG, const Twine &Msg)
Definition: BPFISelLowering.cpp:38
llvm::SDNode::getOperand
const SDValue & getOperand(unsigned Num) const
Definition: SelectionDAGNodes.h:896
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::SelectionDAG::getNode
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
Definition: SelectionDAG.cpp:7818
size
i< reg-> size
Definition: README.txt:166
llvm::HighlightColor::Address
@ Address
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
llvm::MVT::getVectorNumElements
unsigned getVectorNumElements() const
Definition: MachineValueType.h:654
llvm::MVT::i8
@ i8
Definition: MachineValueType.h:41
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::HvxSelector::HST
const HexagonSubtarget & HST
Definition: HexagonISelDAGToDAGHVX.cpp:815
llvm::RegState::Undef
@ Undef
Value of the register doesn't matter.
Definition: MachineInstrBuilder.h:52
llvm::MVT::Other
@ Other
Definition: MachineValueType.h:39
llvm::MVT::getSizeInBits
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
Definition: MachineValueType.h:821
llvm::isUInt< 8 >
constexpr bool isUInt< 8 >(uint64_t x)
Definition: MathExtras.h:405
memcpy
<%struct.s * > cast struct s *S to sbyte *< sbyte * > sbyte uint cast struct s *agg result to sbyte *< sbyte * > sbyte uint cast struct s *memtmp to sbyte *< sbyte * > sbyte uint ret void llc ends up issuing two memcpy or custom lower memcpy(of small size) to be ldmia/stmia. I think option 2 is better but the current register allocator cannot allocate a chunk of registers at a time. A feasible temporary solution is to use specific physical registers at the lowering time for small(<
llvm::SelectionDAG::getMachineNode
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
Definition: SelectionDAG.cpp:8491
llvm::MVT
Machine Value Type.
Definition: MachineValueType.h:30
llvm::SDNode::dumpr
void dumpr() const
Dump (recursively) this node and its use-def subgraph.
Definition: SelectionDAGDumper.cpp:992
HexagonISelLowering.h
llvm::SDNode::isMachineOpcode
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode.
Definition: SelectionDAGNodes.h:673
llvm::SetVector::insert
bool insert(const value_type &X)
Insert a new element into the SetVector.
Definition: SetVector.h:141
llvm::MVT::getVectorVT
static MVT getVectorVT(MVT VT, unsigned NumElements)
Definition: MachineValueType.h:1127
llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1463
SelectionDAGISel.h
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:136
llvm::HexagonTargetLowering
Definition: HexagonISelLowering.h:105
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::format
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:124
llvm::HvxSelector::ISel
HexagonDAGToDAGISel & ISel
Definition: HexagonISelDAGToDAGHVX.cpp:813
llvm::MutableArrayRef::data
T * data() const
Definition: ArrayRef.h:352
llvm::SDVTList
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
Definition: SelectionDAGNodes.h:79
llvm::MVT::v32i32
@ v32i32
Definition: MachineValueType.h:101
llvm::SelectionDAG::getBuildVector
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:790
llvm::object::Comma
@ Comma
Definition: COFFModuleDefinition.cpp:37
llvm::MVT::v16i32
@ v16i32
Definition: MachineValueType.h:100
llvm::MachO::All
@ All
Definition: InterfaceFile.h:73
j
return j(j<< 16)
llvm::MipsISD::Mult
@ Mult
Definition: MipsISelLowering.h:132
findStrip
static std::pair< int, unsigned > findStrip(ArrayRef< int > A, int Inc, unsigned MaxLen)
Definition: HexagonISelDAGToDAGHVX.cpp:891
llvm::empty
constexpr bool empty(const T &RangeOrContainer)
Test whether RangeOrContainer is empty. Similar to C++17 std::empty.
Definition: STLExtras.h:254
H
#define H(x, y, z)
Definition: MD5.cpp:58
llvm::HvxSelector::getPairVT
MVT getPairVT(MVT ElemTy) const
Definition: HexagonISelDAGToDAGHVX.cpp:827
llvm::DenseMapBase< DenseMap< KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >, KeyT, ValueT, DenseMapInfo< KeyT >, llvm::detail::DenseMapPair< KeyT, ValueT > >::end
iterator end()
Definition: DenseMap.h:83
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:314
llvm::ArrayRef::take_front
ArrayRef< T > take_front(size_t N=1) const
Return a copy of *this with only the first N elements.
Definition: ArrayRef.h:226
llvm::HexagonDAGToDAGISel::SelectV65Gather
void SelectV65Gather(SDNode *N)
Definition: HexagonISelDAGToDAGHVX.cpp:2201
llvm::c_str
SmallVectorImpl< T >::const_pointer c_str(SmallVectorImpl< T > &str)
Definition: WindowsSupport.h:193
llvm::SelectionDAG::getTargetExtractSubreg
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
Definition: SelectionDAG.cpp:8609
llvm::MVT::i32
@ i32
Definition: MachineValueType.h:43
llvm::SetVector::count
size_type count(const key_type &key) const
Count the number of elements of a given key in the SetVector.
Definition: SetVector.h:215
llvm::SDValue
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
Definition: SelectionDAGNodes.h:138
isPermutation
static bool isPermutation(ArrayRef< int > Mask)
Definition: HexagonISelDAGToDAGHVX.cpp:920
Predicate
llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:91
isValid
static bool isValid(const char C)
Returns true if C is a valid mangled character: <0-9a-zA-Z_>.
Definition: RustDemangle.cpp:88
llvm::HvxSelector::selectVAlign
void selectVAlign(SDNode *N)
Definition: HexagonISelDAGToDAGHVX.cpp:2141
MachineInstrBuilder.h
llvm::HvxSelector::DAG
SelectionDAG & DAG
Definition: HexagonISelDAGToDAGHVX.cpp:814
List
const NodeList & List
Definition: RDFGraph.cpp:201
N
#define N
splitMask
static void splitMask(ArrayRef< int > Mask, MutableArrayRef< int > MaskL, MutableArrayRef< int > MaskR)
Definition: HexagonISelDAGToDAGHVX.cpp:873
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:163
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
llvm::HvxSelector::HvxSelector
HvxSelector(HexagonDAGToDAGISel &HS, SelectionDAG &G)
Definition: HexagonISelDAGToDAGHVX.cpp:818
llvm::HexagonSubtarget
Definition: HexagonSubtarget.h:42
llvm::HexagonDAGToDAGISel::Select
void Select(SDNode *N) override
Main hook for targets to transform nodes into machine nodes.
Definition: HexagonISelDAGToDAG.cpp:883
llvm::SelectionDAG::getTargetConstant
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:637
llvm::NodeSet::insert
bool insert(SUnit *SU)
Definition: MachinePipeliner.h:345
RN
It looks like we only need to define PPCfmarto for these because according to these instructions perform RTO on fma s src2 rnd ← FPSCR RN
Definition: README_P9.txt:262
llvm::HvxSelector::getSingleVT
MVT getSingleVT(MVT ElemTy) const
Definition: HexagonISelDAGToDAGHVX.cpp:822
llvm::SetVector
A vector that has set insertion semantics.
Definition: SetVector.h:40
Debug.h
SetVector.h
llvm::HexagonDAGToDAGISel::SelectHVXDualOutput
void SelectHVXDualOutput(SDNode *N)
Definition: HexagonISelDAGToDAGHVX.cpp:2238
llvm::Function::size
size_t size() const
Definition: Function.h:783
llvm::EVT::getSimpleVT
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:281
llvm::irsymtab::build
Error build(ArrayRef< Module * > Mods, SmallVector< char, 0 > &Symtab, StringTableBuilder &StrtabBuilder, BumpPtrAllocator &Alloc)
Fills in Symtab and StrtabBuilder with a valid symbol and string table for Mods.
Definition: IRSymtab.cpp:351