LLVM  9.0.0svn
HexagonISelDAGToDAG.cpp
Go to the documentation of this file.
1 //===-- HexagonISelDAGToDAG.cpp - A dag to dag inst selector for Hexagon --===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines an instruction selector for the Hexagon target.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "Hexagon.h"
14 #include "HexagonISelDAGToDAG.h"
15 #include "HexagonISelLowering.h"
17 #include "HexagonTargetMachine.h"
21 #include "llvm/IR/Intrinsics.h"
23 #include "llvm/Support/Debug.h"
24 using namespace llvm;
25 
26 #define DEBUG_TYPE "hexagon-isel"
27 
28 static
30 EnableAddressRebalancing("isel-rebalance-addr", cl::Hidden, cl::init(true),
31  cl::desc("Rebalance address calculation trees to improve "
32  "instruction selection"));
33 
34 // Rebalance only if this allows e.g. combining a GA with an offset or
35 // factoring out a shift.
36 static
38 RebalanceOnlyForOptimizations("rebalance-only-opt", cl::Hidden, cl::init(false),
39  cl::desc("Rebalance address tree only if this allows optimizations"));
40 
41 static
43 RebalanceOnlyImbalancedTrees("rebalance-only-imbal", cl::Hidden,
44  cl::init(false), cl::desc("Rebalance address tree only if it is imbalanced"));
45 
46 static cl::opt<bool> CheckSingleUse("hexagon-isel-su", cl::Hidden,
47  cl::init(true), cl::desc("Enable checking of SDNode's single-use status"));
48 
49 //===----------------------------------------------------------------------===//
50 // Instruction Selector Implementation
51 //===----------------------------------------------------------------------===//
52 
53 #define GET_DAGISEL_BODY HexagonDAGToDAGISel
54 #include "HexagonGenDAGISel.inc"
55 
56 /// createHexagonISelDag - This pass converts a legalized DAG into a
57 /// Hexagon-specific DAG, ready for instruction scheduling.
58 ///
59 namespace llvm {
61  CodeGenOpt::Level OptLevel) {
62  return new HexagonDAGToDAGISel(TM, OptLevel);
63 }
64 }
65 
67  SDValue Chain = LD->getChain();
68  SDValue Base = LD->getBasePtr();
69  SDValue Offset = LD->getOffset();
70  int32_t Inc = cast<ConstantSDNode>(Offset.getNode())->getSExtValue();
71  EVT LoadedVT = LD->getMemoryVT();
72  unsigned Opcode = 0;
73 
74  // Check for zero extended loads. Treat any-extend loads as zero extended
75  // loads.
77  bool IsZeroExt = (ExtType == ISD::ZEXTLOAD || ExtType == ISD::EXTLOAD);
78  bool IsValidInc = HII->isValidAutoIncImm(LoadedVT, Inc);
79 
80  assert(LoadedVT.isSimple());
81  switch (LoadedVT.getSimpleVT().SimpleTy) {
82  case MVT::i8:
83  if (IsZeroExt)
84  Opcode = IsValidInc ? Hexagon::L2_loadrub_pi : Hexagon::L2_loadrub_io;
85  else
86  Opcode = IsValidInc ? Hexagon::L2_loadrb_pi : Hexagon::L2_loadrb_io;
87  break;
88  case MVT::i16:
89  if (IsZeroExt)
90  Opcode = IsValidInc ? Hexagon::L2_loadruh_pi : Hexagon::L2_loadruh_io;
91  else
92  Opcode = IsValidInc ? Hexagon::L2_loadrh_pi : Hexagon::L2_loadrh_io;
93  break;
94  case MVT::i32:
95  case MVT::f32:
96  case MVT::v2i16:
97  case MVT::v4i8:
98  Opcode = IsValidInc ? Hexagon::L2_loadri_pi : Hexagon::L2_loadri_io;
99  break;
100  case MVT::i64:
101  case MVT::f64:
102  case MVT::v2i32:
103  case MVT::v4i16:
104  case MVT::v8i8:
105  Opcode = IsValidInc ? Hexagon::L2_loadrd_pi : Hexagon::L2_loadrd_io;
106  break;
107  case MVT::v64i8:
108  case MVT::v32i16:
109  case MVT::v16i32:
110  case MVT::v8i64:
111  case MVT::v128i8:
112  case MVT::v64i16:
113  case MVT::v32i32:
114  case MVT::v16i64:
115  if (isAlignedMemNode(LD)) {
116  if (LD->isNonTemporal())
117  Opcode = IsValidInc ? Hexagon::V6_vL32b_nt_pi : Hexagon::V6_vL32b_nt_ai;
118  else
119  Opcode = IsValidInc ? Hexagon::V6_vL32b_pi : Hexagon::V6_vL32b_ai;
120  } else {
121  Opcode = IsValidInc ? Hexagon::V6_vL32Ub_pi : Hexagon::V6_vL32Ub_ai;
122  }
123  break;
124  default:
125  llvm_unreachable("Unexpected memory type in indexed load");
126  }
127 
128  SDValue IncV = CurDAG->getTargetConstant(Inc, dl, MVT::i32);
129  MachineMemOperand *MemOp = LD->getMemOperand();
130 
131  auto getExt64 = [this,ExtType] (MachineSDNode *N, const SDLoc &dl)
132  -> MachineSDNode* {
133  if (ExtType == ISD::ZEXTLOAD || ExtType == ISD::EXTLOAD) {
134  SDValue Zero = CurDAG->getTargetConstant(0, dl, MVT::i32);
135  return CurDAG->getMachineNode(Hexagon::A4_combineir, dl, MVT::i64,
136  Zero, SDValue(N, 0));
137  }
138  if (ExtType == ISD::SEXTLOAD)
139  return CurDAG->getMachineNode(Hexagon::A2_sxtw, dl, MVT::i64,
140  SDValue(N, 0));
141  return N;
142  };
143 
144  // Loaded value Next address Chain
145  SDValue From[3] = { SDValue(LD,0), SDValue(LD,1), SDValue(LD,2) };
146  SDValue To[3];
147 
148  EVT ValueVT = LD->getValueType(0);
149  if (ValueVT == MVT::i64 && ExtType != ISD::NON_EXTLOAD) {
150  // A load extending to i64 will actually produce i32, which will then
151  // need to be extended to i64.
152  assert(LoadedVT.getSizeInBits() <= 32);
153  ValueVT = MVT::i32;
154  }
155 
156  if (IsValidInc) {
157  MachineSDNode *L = CurDAG->getMachineNode(Opcode, dl, ValueVT,
158  MVT::i32, MVT::Other, Base,
159  IncV, Chain);
160  CurDAG->setNodeMemRefs(L, {MemOp});
161  To[1] = SDValue(L, 1); // Next address.
162  To[2] = SDValue(L, 2); // Chain.
163  // Handle special case for extension to i64.
164  if (LD->getValueType(0) == MVT::i64)
165  L = getExt64(L, dl);
166  To[0] = SDValue(L, 0); // Loaded (extended) value.
167  } else {
168  SDValue Zero = CurDAG->getTargetConstant(0, dl, MVT::i32);
169  MachineSDNode *L = CurDAG->getMachineNode(Opcode, dl, ValueVT, MVT::Other,
170  Base, Zero, Chain);
171  CurDAG->setNodeMemRefs(L, {MemOp});
172  To[2] = SDValue(L, 1); // Chain.
173  MachineSDNode *A = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32,
174  Base, IncV);
175  To[1] = SDValue(A, 0); // Next address.
176  // Handle special case for extension to i64.
177  if (LD->getValueType(0) == MVT::i64)
178  L = getExt64(L, dl);
179  To[0] = SDValue(L, 0); // Loaded (extended) value.
180  }
181  ReplaceUses(From, To, 3);
182  CurDAG->RemoveDeadNode(LD);
183 }
184 
186  if (IntN->getOpcode() != ISD::INTRINSIC_W_CHAIN)
187  return nullptr;
188 
189  SDLoc dl(IntN);
190  unsigned IntNo = cast<ConstantSDNode>(IntN->getOperand(1))->getZExtValue();
191 
192  static std::map<unsigned,unsigned> LoadPciMap = {
193  { Intrinsic::hexagon_circ_ldb, Hexagon::L2_loadrb_pci },
194  { Intrinsic::hexagon_circ_ldub, Hexagon::L2_loadrub_pci },
195  { Intrinsic::hexagon_circ_ldh, Hexagon::L2_loadrh_pci },
196  { Intrinsic::hexagon_circ_lduh, Hexagon::L2_loadruh_pci },
197  { Intrinsic::hexagon_circ_ldw, Hexagon::L2_loadri_pci },
198  { Intrinsic::hexagon_circ_ldd, Hexagon::L2_loadrd_pci },
199  };
200  auto FLC = LoadPciMap.find(IntNo);
201  if (FLC != LoadPciMap.end()) {
202  EVT ValTy = (IntNo == Intrinsic::hexagon_circ_ldd) ? MVT::i64 : MVT::i32;
203  EVT RTys[] = { ValTy, MVT::i32, MVT::Other };
204  // Operands: { Base, Increment, Modifier, Chain }
205  auto Inc = cast<ConstantSDNode>(IntN->getOperand(5));
206  SDValue I = CurDAG->getTargetConstant(Inc->getSExtValue(), dl, MVT::i32);
207  MachineSDNode *Res = CurDAG->getMachineNode(FLC->second, dl, RTys,
208  { IntN->getOperand(2), I, IntN->getOperand(4),
209  IntN->getOperand(0) });
210  return Res;
211  }
212 
213  return nullptr;
214 }
215 
217  SDNode *IntN) {
218  // The "LoadN" is just a machine load instruction. The intrinsic also
219  // involves storing it. Generate an appropriate store to the location
220  // given in the intrinsic's operand(3).
221  uint64_t F = HII->get(LoadN->getMachineOpcode()).TSFlags;
222  unsigned SizeBits = (F >> HexagonII::MemAccessSizePos) &
224  unsigned Size = 1U << (SizeBits-1);
225 
226  SDLoc dl(IntN);
228  SDValue TS;
229  SDValue Loc = IntN->getOperand(3);
230 
231  if (Size >= 4)
232  TS = CurDAG->getStore(SDValue(LoadN, 2), dl, SDValue(LoadN, 0), Loc, PI,
233  Size);
234  else
235  TS = CurDAG->getTruncStore(SDValue(LoadN, 2), dl, SDValue(LoadN, 0), Loc,
236  PI, MVT::getIntegerVT(Size * 8), Size);
237 
238  SDNode *StoreN;
239  {
240  HandleSDNode Handle(TS);
241  SelectStore(TS.getNode());
242  StoreN = Handle.getValue().getNode();
243  }
244 
245  // Load's results are { Loaded value, Updated pointer, Chain }
246  ReplaceUses(SDValue(IntN, 0), SDValue(LoadN, 1));
247  ReplaceUses(SDValue(IntN, 1), SDValue(StoreN, 0));
248  return StoreN;
249 }
250 
252  // The intrinsics for load circ/brev perform two operations:
253  // 1. Load a value V from the specified location, using the addressing
254  // mode corresponding to the intrinsic.
255  // 2. Store V into a specified location. This location is typically a
256  // local, temporary object.
257  // In many cases, the program using these intrinsics will immediately
258  // load V again from the local object. In those cases, when certain
259  // conditions are met, the last load can be removed.
260  // This function identifies and optimizes this pattern. If the pattern
261  // cannot be optimized, it returns nullptr, which will cause the load
262  // to be selected separately from the intrinsic (which will be handled
263  // in SelectIntrinsicWChain).
264 
265  SDValue Ch = N->getOperand(0);
266  SDValue Loc = N->getOperand(1);
267 
268  // Assume that the load and the intrinsic are connected directly with a
269  // chain:
270  // t1: i32,ch = int.load ..., ..., ..., Loc, ... // <-- C
271  // t2: i32,ch = load t1:1, Loc, ...
272  SDNode *C = Ch.getNode();
273 
274  if (C->getOpcode() != ISD::INTRINSIC_W_CHAIN)
275  return false;
276 
277  // The second load can only be eliminated if its extension type matches
278  // that of the load instruction corresponding to the intrinsic. The user
279  // can provide an address of an unsigned variable to store the result of
280  // a sign-extending intrinsic into (or the other way around).
281  ISD::LoadExtType IntExt;
282  switch (cast<ConstantSDNode>(C->getOperand(1))->getZExtValue()) {
283  case Intrinsic::hexagon_circ_ldub:
284  case Intrinsic::hexagon_circ_lduh:
285  IntExt = ISD::ZEXTLOAD;
286  break;
287  case Intrinsic::hexagon_circ_ldw:
288  case Intrinsic::hexagon_circ_ldd:
289  IntExt = ISD::NON_EXTLOAD;
290  break;
291  default:
292  IntExt = ISD::SEXTLOAD;
293  break;
294  }
295  if (N->getExtensionType() != IntExt)
296  return false;
297 
298  // Make sure the target location for the loaded value in the load intrinsic
299  // is the location from which LD (or N) is loading.
300  if (C->getNumOperands() < 4 || Loc.getNode() != C->getOperand(3).getNode())
301  return false;
302 
305  SDValue F[] = { SDValue(N,0), SDValue(N,1), SDValue(C,0), SDValue(C,1) };
306  SDValue T[] = { SDValue(L,0), SDValue(S,0), SDValue(L,1), SDValue(S,0) };
307  ReplaceUses(F, T, array_lengthof(T));
308  // This transformation will leave the intrinsic dead. If it remains in
309  // the DAG, the selection code will see it again, but without the load,
310  // and it will generate a store that is normally required for it.
312  return true;
313  }
314  return false;
315 }
316 
317 // Convert the bit-reverse load intrinsic to appropriate target instruction.
319  if (IntN->getOpcode() != ISD::INTRINSIC_W_CHAIN)
320  return false;
321 
322  const SDLoc &dl(IntN);
323  unsigned IntNo = cast<ConstantSDNode>(IntN->getOperand(1))->getZExtValue();
324 
325  static const std::map<unsigned, unsigned> LoadBrevMap = {
326  { Intrinsic::hexagon_L2_loadrb_pbr, Hexagon::L2_loadrb_pbr },
327  { Intrinsic::hexagon_L2_loadrub_pbr, Hexagon::L2_loadrub_pbr },
328  { Intrinsic::hexagon_L2_loadrh_pbr, Hexagon::L2_loadrh_pbr },
329  { Intrinsic::hexagon_L2_loadruh_pbr, Hexagon::L2_loadruh_pbr },
330  { Intrinsic::hexagon_L2_loadri_pbr, Hexagon::L2_loadri_pbr },
331  { Intrinsic::hexagon_L2_loadrd_pbr, Hexagon::L2_loadrd_pbr }
332  };
333  auto FLI = LoadBrevMap.find(IntNo);
334  if (FLI != LoadBrevMap.end()) {
335  EVT ValTy =
336  (IntNo == Intrinsic::hexagon_L2_loadrd_pbr) ? MVT::i64 : MVT::i32;
337  EVT RTys[] = { ValTy, MVT::i32, MVT::Other };
338  // Operands of Intrinsic: {chain, enum ID of intrinsic, baseptr,
339  // modifier}.
340  // Operands of target instruction: { Base, Modifier, Chain }.
342  FLI->second, dl, RTys,
343  {IntN->getOperand(2), IntN->getOperand(3), IntN->getOperand(0)});
344 
345  MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(IntN)->getMemOperand();
346  CurDAG->setNodeMemRefs(Res, {MemOp});
347 
348  ReplaceUses(SDValue(IntN, 0), SDValue(Res, 0));
349  ReplaceUses(SDValue(IntN, 1), SDValue(Res, 1));
350  ReplaceUses(SDValue(IntN, 2), SDValue(Res, 2));
351  CurDAG->RemoveDeadNode(IntN);
352  return true;
353  }
354  return false;
355 }
356 
357 /// Generate a machine instruction node for the new circlar buffer intrinsics.
358 /// The new versions use a CSx register instead of the K field.
360  if (IntN->getOpcode() != ISD::INTRINSIC_W_CHAIN)
361  return false;
362 
363  SDLoc DL(IntN);
364  unsigned IntNo = cast<ConstantSDNode>(IntN->getOperand(1))->getZExtValue();
366 
367  static std::map<unsigned,unsigned> LoadNPcMap = {
368  { Intrinsic::hexagon_L2_loadrub_pci, Hexagon::PS_loadrub_pci },
369  { Intrinsic::hexagon_L2_loadrb_pci, Hexagon::PS_loadrb_pci },
370  { Intrinsic::hexagon_L2_loadruh_pci, Hexagon::PS_loadruh_pci },
371  { Intrinsic::hexagon_L2_loadrh_pci, Hexagon::PS_loadrh_pci },
372  { Intrinsic::hexagon_L2_loadri_pci, Hexagon::PS_loadri_pci },
373  { Intrinsic::hexagon_L2_loadrd_pci, Hexagon::PS_loadrd_pci },
374  { Intrinsic::hexagon_L2_loadrub_pcr, Hexagon::PS_loadrub_pcr },
375  { Intrinsic::hexagon_L2_loadrb_pcr, Hexagon::PS_loadrb_pcr },
376  { Intrinsic::hexagon_L2_loadruh_pcr, Hexagon::PS_loadruh_pcr },
377  { Intrinsic::hexagon_L2_loadrh_pcr, Hexagon::PS_loadrh_pcr },
378  { Intrinsic::hexagon_L2_loadri_pcr, Hexagon::PS_loadri_pcr },
379  { Intrinsic::hexagon_L2_loadrd_pcr, Hexagon::PS_loadrd_pcr }
380  };
381  auto FLI = LoadNPcMap.find (IntNo);
382  if (FLI != LoadNPcMap.end()) {
383  EVT ValTy = MVT::i32;
384  if (IntNo == Intrinsic::hexagon_L2_loadrd_pci ||
385  IntNo == Intrinsic::hexagon_L2_loadrd_pcr)
386  ValTy = MVT::i64;
387  EVT RTys[] = { ValTy, MVT::i32, MVT::Other };
388  // Handle load.*_pci case which has 6 operands.
389  if (IntN->getNumOperands() == 6) {
390  auto Inc = cast<ConstantSDNode>(IntN->getOperand(3));
391  SDValue I = CurDAG->getTargetConstant(Inc->getSExtValue(), DL, MVT::i32);
392  // Operands: { Base, Increment, Modifier, Start, Chain }.
393  Ops = { IntN->getOperand(2), I, IntN->getOperand(4), IntN->getOperand(5),
394  IntN->getOperand(0) };
395  } else
396  // Handle load.*_pcr case which has 5 operands.
397  // Operands: { Base, Modifier, Start, Chain }.
398  Ops = { IntN->getOperand(2), IntN->getOperand(3), IntN->getOperand(4),
399  IntN->getOperand(0) };
400  MachineSDNode *Res = CurDAG->getMachineNode(FLI->second, DL, RTys, Ops);
401  ReplaceUses(SDValue(IntN, 0), SDValue(Res, 0));
402  ReplaceUses(SDValue(IntN, 1), SDValue(Res, 1));
403  ReplaceUses(SDValue(IntN, 2), SDValue(Res, 2));
404  CurDAG->RemoveDeadNode(IntN);
405  return true;
406  }
407 
408  static std::map<unsigned,unsigned> StoreNPcMap = {
409  { Intrinsic::hexagon_S2_storerb_pci, Hexagon::PS_storerb_pci },
410  { Intrinsic::hexagon_S2_storerh_pci, Hexagon::PS_storerh_pci },
411  { Intrinsic::hexagon_S2_storerf_pci, Hexagon::PS_storerf_pci },
412  { Intrinsic::hexagon_S2_storeri_pci, Hexagon::PS_storeri_pci },
413  { Intrinsic::hexagon_S2_storerd_pci, Hexagon::PS_storerd_pci },
414  { Intrinsic::hexagon_S2_storerb_pcr, Hexagon::PS_storerb_pcr },
415  { Intrinsic::hexagon_S2_storerh_pcr, Hexagon::PS_storerh_pcr },
416  { Intrinsic::hexagon_S2_storerf_pcr, Hexagon::PS_storerf_pcr },
417  { Intrinsic::hexagon_S2_storeri_pcr, Hexagon::PS_storeri_pcr },
418  { Intrinsic::hexagon_S2_storerd_pcr, Hexagon::PS_storerd_pcr }
419  };
420  auto FSI = StoreNPcMap.find (IntNo);
421  if (FSI != StoreNPcMap.end()) {
422  EVT RTys[] = { MVT::i32, MVT::Other };
423  // Handle store.*_pci case which has 7 operands.
424  if (IntN->getNumOperands() == 7) {
425  auto Inc = cast<ConstantSDNode>(IntN->getOperand(3));
426  SDValue I = CurDAG->getTargetConstant(Inc->getSExtValue(), DL, MVT::i32);
427  // Operands: { Base, Increment, Modifier, Value, Start, Chain }.
428  Ops = { IntN->getOperand(2), I, IntN->getOperand(4), IntN->getOperand(5),
429  IntN->getOperand(6), IntN->getOperand(0) };
430  } else
431  // Handle store.*_pcr case which has 6 operands.
432  // Operands: { Base, Modifier, Value, Start, Chain }.
433  Ops = { IntN->getOperand(2), IntN->getOperand(3), IntN->getOperand(4),
434  IntN->getOperand(5), IntN->getOperand(0) };
435  MachineSDNode *Res = CurDAG->getMachineNode(FSI->second, DL, RTys, Ops);
436  ReplaceUses(SDValue(IntN, 0), SDValue(Res, 0));
437  ReplaceUses(SDValue(IntN, 1), SDValue(Res, 1));
438  CurDAG->RemoveDeadNode(IntN);
439  return true;
440  }
441 
442  return false;
443 }
444 
446  SDLoc dl(N);
447  LoadSDNode *LD = cast<LoadSDNode>(N);
448 
449  // Handle indexed loads.
451  if (AM != ISD::UNINDEXED) {
452  SelectIndexedLoad(LD, dl);
453  return;
454  }
455 
456  // Handle patterns using circ/brev load intrinsics.
457  if (tryLoadOfLoadIntrinsic(LD))
458  return;
459 
460  SelectCode(LD);
461 }
462 
464  SDValue Chain = ST->getChain();
465  SDValue Base = ST->getBasePtr();
466  SDValue Offset = ST->getOffset();
467  SDValue Value = ST->getValue();
468  // Get the constant value.
469  int32_t Inc = cast<ConstantSDNode>(Offset.getNode())->getSExtValue();
470  EVT StoredVT = ST->getMemoryVT();
471  EVT ValueVT = Value.getValueType();
472 
473  bool IsValidInc = HII->isValidAutoIncImm(StoredVT, Inc);
474  unsigned Opcode = 0;
475 
476  assert(StoredVT.isSimple());
477  switch (StoredVT.getSimpleVT().SimpleTy) {
478  case MVT::i8:
479  Opcode = IsValidInc ? Hexagon::S2_storerb_pi : Hexagon::S2_storerb_io;
480  break;
481  case MVT::i16:
482  Opcode = IsValidInc ? Hexagon::S2_storerh_pi : Hexagon::S2_storerh_io;
483  break;
484  case MVT::i32:
485  case MVT::f32:
486  case MVT::v2i16:
487  case MVT::v4i8:
488  Opcode = IsValidInc ? Hexagon::S2_storeri_pi : Hexagon::S2_storeri_io;
489  break;
490  case MVT::i64:
491  case MVT::f64:
492  case MVT::v2i32:
493  case MVT::v4i16:
494  case MVT::v8i8:
495  Opcode = IsValidInc ? Hexagon::S2_storerd_pi : Hexagon::S2_storerd_io;
496  break;
497  case MVT::v64i8:
498  case MVT::v32i16:
499  case MVT::v16i32:
500  case MVT::v8i64:
501  case MVT::v128i8:
502  case MVT::v64i16:
503  case MVT::v32i32:
504  case MVT::v16i64:
505  if (isAlignedMemNode(ST)) {
506  if (ST->isNonTemporal())
507  Opcode = IsValidInc ? Hexagon::V6_vS32b_nt_pi : Hexagon::V6_vS32b_nt_ai;
508  else
509  Opcode = IsValidInc ? Hexagon::V6_vS32b_pi : Hexagon::V6_vS32b_ai;
510  } else {
511  Opcode = IsValidInc ? Hexagon::V6_vS32Ub_pi : Hexagon::V6_vS32Ub_ai;
512  }
513  break;
514  default:
515  llvm_unreachable("Unexpected memory type in indexed store");
516  }
517 
518  if (ST->isTruncatingStore() && ValueVT.getSizeInBits() == 64) {
519  assert(StoredVT.getSizeInBits() < 64 && "Not a truncating store");
520  Value = CurDAG->getTargetExtractSubreg(Hexagon::isub_lo,
521  dl, MVT::i32, Value);
522  }
523 
524  SDValue IncV = CurDAG->getTargetConstant(Inc, dl, MVT::i32);
525  MachineMemOperand *MemOp = ST->getMemOperand();
526 
527  // Next address Chain
528  SDValue From[2] = { SDValue(ST,0), SDValue(ST,1) };
529  SDValue To[2];
530 
531  if (IsValidInc) {
532  // Build post increment store.
533  SDValue Ops[] = { Base, IncV, Value, Chain };
535  Ops);
536  CurDAG->setNodeMemRefs(S, {MemOp});
537  To[0] = SDValue(S, 0);
538  To[1] = SDValue(S, 1);
539  } else {
540  SDValue Zero = CurDAG->getTargetConstant(0, dl, MVT::i32);
541  SDValue Ops[] = { Base, Zero, Value, Chain };
542  MachineSDNode *S = CurDAG->getMachineNode(Opcode, dl, MVT::Other, Ops);
543  CurDAG->setNodeMemRefs(S, {MemOp});
544  To[1] = SDValue(S, 0);
545  MachineSDNode *A = CurDAG->getMachineNode(Hexagon::A2_addi, dl, MVT::i32,
546  Base, IncV);
547  To[0] = SDValue(A, 0);
548  }
549 
550  ReplaceUses(From, To, 2);
551  CurDAG->RemoveDeadNode(ST);
552 }
553 
555  SDLoc dl(N);
556  StoreSDNode *ST = cast<StoreSDNode>(N);
557 
558  // Handle indexed stores.
560  if (AM != ISD::UNINDEXED) {
561  SelectIndexedStore(ST, dl);
562  return;
563  }
564 
565  SelectCode(ST);
566 }
567 
569  SDLoc dl(N);
570  SDValue Shl_0 = N->getOperand(0);
571  SDValue Shl_1 = N->getOperand(1);
572 
573  auto Default = [this,N] () -> void { SelectCode(N); };
574 
575  if (N->getValueType(0) != MVT::i32 || Shl_1.getOpcode() != ISD::Constant)
576  return Default();
577 
578  // RHS is const.
579  int32_t ShlConst = cast<ConstantSDNode>(Shl_1)->getSExtValue();
580 
581  if (Shl_0.getOpcode() == ISD::MUL) {
582  SDValue Mul_0 = Shl_0.getOperand(0); // Val
583  SDValue Mul_1 = Shl_0.getOperand(1); // Const
584  // RHS of mul is const.
585  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Mul_1)) {
586  int32_t ValConst = C->getSExtValue() << ShlConst;
587  if (isInt<9>(ValConst)) {
588  SDValue Val = CurDAG->getTargetConstant(ValConst, dl, MVT::i32);
589  SDNode *Result = CurDAG->getMachineNode(Hexagon::M2_mpysmi, dl,
590  MVT::i32, Mul_0, Val);
591  ReplaceNode(N, Result);
592  return;
593  }
594  }
595  return Default();
596  }
597 
598  if (Shl_0.getOpcode() == ISD::SUB) {
599  SDValue Sub_0 = Shl_0.getOperand(0); // Const 0
600  SDValue Sub_1 = Shl_0.getOperand(1); // Val
601  if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(Sub_0)) {
602  if (C1->getSExtValue() != 0 || Sub_1.getOpcode() != ISD::SHL)
603  return Default();
604  SDValue Shl2_0 = Sub_1.getOperand(0); // Val
605  SDValue Shl2_1 = Sub_1.getOperand(1); // Const
606  if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(Shl2_1)) {
607  int32_t ValConst = 1 << (ShlConst + C2->getSExtValue());
608  if (isInt<9>(-ValConst)) {
609  SDValue Val = CurDAG->getTargetConstant(-ValConst, dl, MVT::i32);
610  SDNode *Result = CurDAG->getMachineNode(Hexagon::M2_mpysmi, dl,
611  MVT::i32, Shl2_0, Val);
612  ReplaceNode(N, Result);
613  return;
614  }
615  }
616  }
617  }
618 
619  return Default();
620 }
621 
622 //
623 // Handling intrinsics for circular load and bitreverse load.
624 //
629  return;
630  }
631 
632  // Handle bit-reverse load intrinsics.
633  if (SelectBrevLdIntrinsic(N))
634  return;
635 
636  if (SelectNewCircIntrinsic(N))
637  return;
638 
639  unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
640  if (IntNo == Intrinsic::hexagon_V6_vgathermw ||
641  IntNo == Intrinsic::hexagon_V6_vgathermw_128B ||
642  IntNo == Intrinsic::hexagon_V6_vgathermh ||
643  IntNo == Intrinsic::hexagon_V6_vgathermh_128B ||
644  IntNo == Intrinsic::hexagon_V6_vgathermhw ||
645  IntNo == Intrinsic::hexagon_V6_vgathermhw_128B) {
646  SelectV65Gather(N);
647  return;
648  }
649  if (IntNo == Intrinsic::hexagon_V6_vgathermwq ||
650  IntNo == Intrinsic::hexagon_V6_vgathermwq_128B ||
651  IntNo == Intrinsic::hexagon_V6_vgathermhq ||
652  IntNo == Intrinsic::hexagon_V6_vgathermhq_128B ||
653  IntNo == Intrinsic::hexagon_V6_vgathermhwq ||
654  IntNo == Intrinsic::hexagon_V6_vgathermhwq_128B) {
656  return;
657  }
658 
659  SelectCode(N);
660 }
661 
663  unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
664  unsigned Bits;
665  switch (IID) {
666  case Intrinsic::hexagon_S2_vsplatrb:
667  Bits = 8;
668  break;
669  case Intrinsic::hexagon_S2_vsplatrh:
670  Bits = 16;
671  break;
672  case Intrinsic::hexagon_V6_vaddcarry:
673  case Intrinsic::hexagon_V6_vaddcarry_128B:
674  case Intrinsic::hexagon_V6_vsubcarry:
675  case Intrinsic::hexagon_V6_vsubcarry_128B:
677  return;
678  default:
679  SelectCode(N);
680  return;
681  }
682 
683  SDValue V = N->getOperand(1);
684  SDValue U;
685  if (keepsLowBits(V, Bits, U)) {
686  SDValue R = CurDAG->getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
687  N->getOperand(0), U);
688  ReplaceNode(N, R.getNode());
689  SelectCode(R.getNode());
690  return;
691  }
692  SelectCode(N);
693 }
694 
695 //
696 // Map floating point constant values.
697 //
699  SDLoc dl(N);
701  APInt A = CN->getValueAPF().bitcastToAPInt();
702  if (N->getValueType(0) == MVT::f32) {
703  SDValue V = CurDAG->getTargetConstant(A.getZExtValue(), dl, MVT::i32);
704  ReplaceNode(N, CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::f32, V));
705  return;
706  }
707  if (N->getValueType(0) == MVT::f64) {
708  SDValue V = CurDAG->getTargetConstant(A.getZExtValue(), dl, MVT::i64);
709  ReplaceNode(N, CurDAG->getMachineNode(Hexagon::CONST64, dl, MVT::f64, V));
710  return;
711  }
712 
713  SelectCode(N);
714 }
715 
716 //
717 // Map boolean values.
718 //
720  if (N->getValueType(0) == MVT::i1) {
721  assert(!(cast<ConstantSDNode>(N)->getZExtValue() >> 1));
722  unsigned Opc = (cast<ConstantSDNode>(N)->getSExtValue() != 0)
723  ? Hexagon::PS_true
724  : Hexagon::PS_false;
726  return;
727  }
728 
729  SelectCode(N);
730 }
731 
733  MachineFrameInfo &MFI = MF->getFrameInfo();
734  const HexagonFrameLowering *HFI = HST->getFrameLowering();
735  int FX = cast<FrameIndexSDNode>(N)->getIndex();
736  unsigned StkA = HFI->getStackAlignment();
737  unsigned MaxA = MFI.getMaxAlignment();
739  SDLoc DL(N);
740  SDValue Zero = CurDAG->getTargetConstant(0, DL, MVT::i32);
741  SDNode *R = nullptr;
742 
743  // Use PS_fi when:
744  // - the object is fixed, or
745  // - there are no objects with higher-than-default alignment, or
746  // - there are no dynamically allocated objects.
747  // Otherwise, use PS_fia.
748  if (FX < 0 || MaxA <= StkA || !MFI.hasVarSizedObjects()) {
749  R = CurDAG->getMachineNode(Hexagon::PS_fi, DL, MVT::i32, FI, Zero);
750  } else {
751  auto &HMFI = *MF->getInfo<HexagonMachineFunctionInfo>();
752  unsigned AR = HMFI.getStackAlignBaseVReg();
753  SDValue CH = CurDAG->getEntryNode();
754  SDValue Ops[] = { CurDAG->getCopyFromReg(CH, DL, AR, MVT::i32), FI, Zero };
755  R = CurDAG->getMachineNode(Hexagon::PS_fia, DL, MVT::i32, Ops);
756  }
757 
758  ReplaceNode(N, R);
759 }
760 
762  unsigned OpcCarry = N->getOpcode() == HexagonISD::ADDC ? Hexagon::A4_addp_c
763  : Hexagon::A4_subp_c;
764  SDNode *C = CurDAG->getMachineNode(OpcCarry, SDLoc(N), N->getVTList(),
765  { N->getOperand(0), N->getOperand(1),
766  N->getOperand(2) });
767  ReplaceNode(N, C);
768 }
769 
771  MVT ResTy = N->getValueType(0).getSimpleVT();
772  if (HST->isHVXVectorType(ResTy, true))
773  return SelectHvxVAlign(N);
774 
775  const SDLoc &dl(N);
776  unsigned VecLen = ResTy.getSizeInBits();
777  if (VecLen == 32) {
778  SDValue Ops[] = {
779  CurDAG->getTargetConstant(Hexagon::DoubleRegsRegClassID, dl, MVT::i32),
780  N->getOperand(0),
781  CurDAG->getTargetConstant(Hexagon::isub_hi, dl, MVT::i32),
782  N->getOperand(1),
783  CurDAG->getTargetConstant(Hexagon::isub_lo, dl, MVT::i32)
784  };
785  SDNode *R = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl,
786  MVT::i64, Ops);
787 
788  // Shift right by "(Addr & 0x3) * 8" bytes.
789  SDValue M0 = CurDAG->getTargetConstant(0x18, dl, MVT::i32);
790  SDValue M1 = CurDAG->getTargetConstant(0x03, dl, MVT::i32);
791  SDNode *C = CurDAG->getMachineNode(Hexagon::S4_andi_asl_ri, dl, MVT::i32,
792  M0, N->getOperand(2), M1);
793  SDNode *S = CurDAG->getMachineNode(Hexagon::S2_lsr_r_p, dl, MVT::i64,
794  SDValue(R, 0), SDValue(C, 0));
795  SDValue E = CurDAG->getTargetExtractSubreg(Hexagon::isub_lo, dl, ResTy,
796  SDValue(S, 0));
797  ReplaceNode(N, E.getNode());
798  } else {
799  assert(VecLen == 64);
800  SDNode *Pu = CurDAG->getMachineNode(Hexagon::C2_tfrrp, dl, MVT::v8i1,
801  N->getOperand(2));
802  SDNode *VA = CurDAG->getMachineNode(Hexagon::S2_valignrb, dl, ResTy,
803  N->getOperand(0), N->getOperand(1),
804  SDValue(Pu,0));
805  ReplaceNode(N, VA);
806  }
807 }
808 
810  const SDLoc &dl(N);
811  SDValue A = N->getOperand(1);
812  int Mask = -cast<ConstantSDNode>(A.getNode())->getSExtValue();
813  assert(isPowerOf2_32(-Mask));
814 
815  SDValue M = CurDAG->getTargetConstant(Mask, dl, MVT::i32);
816  SDNode *AA = CurDAG->getMachineNode(Hexagon::A2_andir, dl, MVT::i32,
817  N->getOperand(0), M);
818  ReplaceNode(N, AA);
819 }
820 
821 // Handle these nodes here to avoid having to write patterns for all
822 // combinations of input/output types. In all cases, the resulting
823 // instruction is the same.
825  SDValue Op = N->getOperand(0);
826  MVT OpTy = Op.getValueType().getSimpleVT();
827  SDNode *T = CurDAG->MorphNodeTo(N, N->getOpcode(),
828  CurDAG->getVTList(OpTy), {Op});
829  ReplaceNode(T, Op.getNode());
830 }
831 
833  MVT ResTy = N->getValueType(0).getSimpleVT();
834  SDNode *T = CurDAG->getMachineNode(Hexagon::C2_mask, SDLoc(N), ResTy,
835  N->getOperand(0));
836  ReplaceNode(N, T);
837 }
838 
840  const SDLoc &dl(N);
841  MVT ResTy = N->getValueType(0).getSimpleVT();
842  SDValue Zero = CurDAG->getTargetConstant(0, dl, MVT::i32);
843  SDNode *T = CurDAG->getMachineNode(Hexagon::A4_vcmpbgtui, dl, ResTy,
844  N->getOperand(0), Zero);
845  ReplaceNode(N, T);
846 }
847 
849  const SDLoc &dl(N);
850  MVT ResTy = N->getValueType(0).getSimpleVT();
851 
853  SDNode *R = CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, C);
854  SDNode *T = CurDAG->getMachineNode(Hexagon::V6_vandvrt, dl, ResTy,
855  N->getOperand(0), SDValue(R,0));
856  ReplaceNode(N, T);
857 }
858 
860  const SDLoc &dl(N);
861  MVT ResTy = N->getValueType(0).getSimpleVT();
862 
864  SDNode *R = CurDAG->getMachineNode(Hexagon::A2_tfrsi, dl, MVT::i32, C);
865  SDNode *T = CurDAG->getMachineNode(Hexagon::V6_vandqrt, dl, ResTy,
866  N->getOperand(0), SDValue(R,0));
867  ReplaceNode(N, T);
868 }
869 
871  if (N->isMachineOpcode())
872  return N->setNodeId(-1); // Already selected.
873 
874  switch (N->getOpcode()) {
875  case ISD::Constant: return SelectConstant(N);
876  case ISD::ConstantFP: return SelectConstantFP(N);
877  case ISD::FrameIndex: return SelectFrameIndex(N);
878  case ISD::SHL: return SelectSHL(N);
879  case ISD::LOAD: return SelectLoad(N);
880  case ISD::STORE: return SelectStore(N);
883 
884  case HexagonISD::ADDC:
885  case HexagonISD::SUBC: return SelectAddSubCarry(N);
886  case HexagonISD::VALIGN: return SelectVAlign(N);
888  case HexagonISD::TYPECAST: return SelectTypecast(N);
889  case HexagonISD::P2D: return SelectP2D(N);
890  case HexagonISD::D2P: return SelectD2P(N);
891  case HexagonISD::Q2V: return SelectQ2V(N);
892  case HexagonISD::V2Q: return SelectV2Q(N);
893  }
894 
895  if (HST->useHVXOps()) {
896  switch (N->getOpcode()) {
897  case ISD::VECTOR_SHUFFLE: return SelectHvxShuffle(N);
898  case HexagonISD::VROR: return SelectHvxRor(N);
899  }
900  }
901 
902  SelectCode(N);
903 }
904 
906 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
907  std::vector<SDValue> &OutOps) {
908  SDValue Inp = Op, Res;
909 
910  switch (ConstraintID) {
911  default:
912  return true;
914  case InlineAsm::Constraint_o: // Offsetable.
915  case InlineAsm::Constraint_v: // Not offsetable.
916  case InlineAsm::Constraint_m: // Memory.
917  if (SelectAddrFI(Inp, Res))
918  OutOps.push_back(Res);
919  else
920  OutOps.push_back(Inp);
921  break;
922  }
923 
924  OutOps.push_back(CurDAG->getTargetConstant(0, SDLoc(Op), MVT::i32));
925  return false;
926 }
927 
928 
929 static bool isMemOPCandidate(SDNode *I, SDNode *U) {
930  // I is an operand of U. Check if U is an arithmetic (binary) operation
931  // usable in a memop, where the other operand is a loaded value, and the
932  // result of U is stored in the same location.
933 
934  if (!U->hasOneUse())
935  return false;
936  unsigned Opc = U->getOpcode();
937  switch (Opc) {
938  case ISD::ADD:
939  case ISD::SUB:
940  case ISD::AND:
941  case ISD::OR:
942  break;
943  default:
944  return false;
945  }
946 
947  SDValue S0 = U->getOperand(0);
948  SDValue S1 = U->getOperand(1);
949  SDValue SY = (S0.getNode() == I) ? S1 : S0;
950 
951  SDNode *UUse = *U->use_begin();
952  if (UUse->getNumValues() != 1)
953  return false;
954 
955  // Check if one of the inputs to U is a load instruction and the output
956  // is used by a store instruction. If so and they also have the same
957  // base pointer, then don't preoprocess this node sequence as it
958  // can be matched to a memop.
959  SDNode *SYNode = SY.getNode();
960  if (UUse->getOpcode() == ISD::STORE && SYNode->getOpcode() == ISD::LOAD) {
961  SDValue LDBasePtr = cast<MemSDNode>(SYNode)->getBasePtr();
962  SDValue STBasePtr = cast<MemSDNode>(UUse)->getBasePtr();
963  if (LDBasePtr == STBasePtr)
964  return true;
965  }
966  return false;
967 }
968 
969 
970 // Transform: (or (select c x 0) z) -> (select c (or x z) z)
971 // (or (select c 0 y) z) -> (select c z (or y z))
972 void HexagonDAGToDAGISel::ppSimplifyOrSelect0(std::vector<SDNode*> &&Nodes) {
973  SelectionDAG &DAG = *CurDAG;
974 
975  for (auto I : Nodes) {
976  if (I->getOpcode() != ISD::OR)
977  continue;
978 
979  auto IsZero = [] (const SDValue &V) -> bool {
980  if (ConstantSDNode *SC = dyn_cast<ConstantSDNode>(V.getNode()))
981  return SC->isNullValue();
982  return false;
983  };
984  auto IsSelect0 = [IsZero] (const SDValue &Op) -> bool {
985  if (Op.getOpcode() != ISD::SELECT)
986  return false;
987  return IsZero(Op.getOperand(1)) || IsZero(Op.getOperand(2));
988  };
989 
990  SDValue N0 = I->getOperand(0), N1 = I->getOperand(1);
991  EVT VT = I->getValueType(0);
992  bool SelN0 = IsSelect0(N0);
993  SDValue SOp = SelN0 ? N0 : N1;
994  SDValue VOp = SelN0 ? N1 : N0;
995 
996  if (SOp.getOpcode() == ISD::SELECT && SOp.getNode()->hasOneUse()) {
997  SDValue SC = SOp.getOperand(0);
998  SDValue SX = SOp.getOperand(1);
999  SDValue SY = SOp.getOperand(2);
1000  SDLoc DLS = SOp;
1001  if (IsZero(SY)) {
1002  SDValue NewOr = DAG.getNode(ISD::OR, DLS, VT, SX, VOp);
1003  SDValue NewSel = DAG.getNode(ISD::SELECT, DLS, VT, SC, NewOr, VOp);
1004  DAG.ReplaceAllUsesWith(I, NewSel.getNode());
1005  } else if (IsZero(SX)) {
1006  SDValue NewOr = DAG.getNode(ISD::OR, DLS, VT, SY, VOp);
1007  SDValue NewSel = DAG.getNode(ISD::SELECT, DLS, VT, SC, VOp, NewOr);
1008  DAG.ReplaceAllUsesWith(I, NewSel.getNode());
1009  }
1010  }
1011  }
1012 }
1013 
1014 // Transform: (store ch val (add x (add (shl y c) e)))
1015 // to: (store ch val (add x (shl (add y d) c))),
1016 // where e = (shl d c) for some integer d.
1017 // The purpose of this is to enable generation of loads/stores with
1018 // shifted addressing mode, i.e. mem(x+y<<#c). For that, the shift
1019 // value c must be 0, 1 or 2.
1020 void HexagonDAGToDAGISel::ppAddrReorderAddShl(std::vector<SDNode*> &&Nodes) {
1021  SelectionDAG &DAG = *CurDAG;
1022 
1023  for (auto I : Nodes) {
1024  if (I->getOpcode() != ISD::STORE)
1025  continue;
1026 
1027  // I matched: (store ch val Off)
1028  SDValue Off = I->getOperand(2);
1029  // Off needs to match: (add x (add (shl y c) (shl d c))))
1030  if (Off.getOpcode() != ISD::ADD)
1031  continue;
1032  // Off matched: (add x T0)
1033  SDValue T0 = Off.getOperand(1);
1034  // T0 needs to match: (add T1 T2):
1035  if (T0.getOpcode() != ISD::ADD)
1036  continue;
1037  // T0 matched: (add T1 T2)
1038  SDValue T1 = T0.getOperand(0);
1039  SDValue T2 = T0.getOperand(1);
1040  // T1 needs to match: (shl y c)
1041  if (T1.getOpcode() != ISD::SHL)
1042  continue;
1043  SDValue C = T1.getOperand(1);
1045  if (CN == nullptr)
1046  continue;
1047  unsigned CV = CN->getZExtValue();
1048  if (CV > 2)
1049  continue;
1050  // T2 needs to match e, where e = (shl d c) for some d.
1052  if (EN == nullptr)
1053  continue;
1054  unsigned EV = EN->getZExtValue();
1055  if (EV % (1 << CV) != 0)
1056  continue;
1057  unsigned DV = EV / (1 << CV);
1058 
1059  // Replace T0 with: (shl (add y d) c)
1060  SDLoc DL = SDLoc(I);
1061  EVT VT = T0.getValueType();
1062  SDValue D = DAG.getConstant(DV, DL, VT);
1063  // NewAdd = (add y d)
1064  SDValue NewAdd = DAG.getNode(ISD::ADD, DL, VT, T1.getOperand(0), D);
1065  // NewShl = (shl NewAdd c)
1066  SDValue NewShl = DAG.getNode(ISD::SHL, DL, VT, NewAdd, C);
1067  ReplaceNode(T0.getNode(), NewShl.getNode());
1068  }
1069 }
1070 
1071 // Transform: (load ch (add x (and (srl y c) Mask)))
1072 // to: (load ch (add x (shl (srl y d) d-c)))
1073 // where
1074 // Mask = 00..0 111..1 0.0
1075 // | | +-- d-c 0s, and d-c is 0, 1 or 2.
1076 // | +-------- 1s
1077 // +-------------- at most c 0s
1078 // Motivating example:
1079 // DAG combiner optimizes (add x (shl (srl y 5) 2))
1080 // to (add x (and (srl y 3) 1FFFFFFC))
1081 // which results in a constant-extended and(##...,lsr). This transformation
1082 // undoes this simplification for cases where the shl can be folded into
1083 // an addressing mode.
1084 void HexagonDAGToDAGISel::ppAddrRewriteAndSrl(std::vector<SDNode*> &&Nodes) {
1085  SelectionDAG &DAG = *CurDAG;
1086 
1087  for (SDNode *N : Nodes) {
1088  unsigned Opc = N->getOpcode();
1089  if (Opc != ISD::LOAD && Opc != ISD::STORE)
1090  continue;
1091  SDValue Addr = Opc == ISD::LOAD ? N->getOperand(1) : N->getOperand(2);
1092  // Addr must match: (add x T0)
1093  if (Addr.getOpcode() != ISD::ADD)
1094  continue;
1095  SDValue T0 = Addr.getOperand(1);
1096  // T0 must match: (and T1 Mask)
1097  if (T0.getOpcode() != ISD::AND)
1098  continue;
1099 
1100  // We have an AND.
1101  //
1102  // Check the first operand. It must be: (srl y c).
1103  SDValue S = T0.getOperand(0);
1104  if (S.getOpcode() != ISD::SRL)
1105  continue;
1107  if (SN == nullptr)
1108  continue;
1109  if (SN->getAPIntValue().getBitWidth() != 32)
1110  continue;
1111  uint32_t CV = SN->getZExtValue();
1112 
1113  // Check the second operand: the supposed mask.
1115  if (MN == nullptr)
1116  continue;
1117  if (MN->getAPIntValue().getBitWidth() != 32)
1118  continue;
1119  uint32_t Mask = MN->getZExtValue();
1120  // Examine the mask.
1121  uint32_t TZ = countTrailingZeros(Mask);
1122  uint32_t M1 = countTrailingOnes(Mask >> TZ);
1123  uint32_t LZ = countLeadingZeros(Mask);
1124  // Trailing zeros + middle ones + leading zeros must equal the width.
1125  if (TZ + M1 + LZ != 32)
1126  continue;
1127  // The number of trailing zeros will be encoded in the addressing mode.
1128  if (TZ > 2)
1129  continue;
1130  // The number of leading zeros must be at most c.
1131  if (LZ > CV)
1132  continue;
1133 
1134  // All looks good.
1135  SDValue Y = S.getOperand(0);
1136  EVT VT = Addr.getValueType();
1137  SDLoc dl(S);
1138  // TZ = D-C, so D = TZ+C.
1139  SDValue D = DAG.getConstant(TZ+CV, dl, VT);
1140  SDValue DC = DAG.getConstant(TZ, dl, VT);
1141  SDValue NewSrl = DAG.getNode(ISD::SRL, dl, VT, Y, D);
1142  SDValue NewShl = DAG.getNode(ISD::SHL, dl, VT, NewSrl, DC);
1143  ReplaceNode(T0.getNode(), NewShl.getNode());
1144  }
1145 }
1146 
1147 // Transform: (op ... (zext i1 c) ...) -> (select c (op ... 0 ...)
1148 // (op ... 1 ...))
1149 void HexagonDAGToDAGISel::ppHoistZextI1(std::vector<SDNode*> &&Nodes) {
1150  SelectionDAG &DAG = *CurDAG;
1151 
1152  for (SDNode *N : Nodes) {
1153  unsigned Opc = N->getOpcode();
1154  if (Opc != ISD::ZERO_EXTEND)
1155  continue;
1156  SDValue OpI1 = N->getOperand(0);
1157  EVT OpVT = OpI1.getValueType();
1158  if (!OpVT.isSimple() || OpVT.getSimpleVT() != MVT::i1)
1159  continue;
1160  for (auto I = N->use_begin(), E = N->use_end(); I != E; ++I) {
1161  SDNode *U = *I;
1162  if (U->getNumValues() != 1)
1163  continue;
1164  EVT UVT = U->getValueType(0);
1165  if (!UVT.isSimple() || !UVT.isInteger() || UVT.getSimpleVT() == MVT::i1)
1166  continue;
1167  if (isMemOPCandidate(N, U))
1168  continue;
1169 
1170  // Potentially simplifiable operation.
1171  unsigned I1N = I.getOperandNo();
1173  for (unsigned i = 0, n = U->getNumOperands(); i != n; ++i)
1174  Ops[i] = U->getOperand(i);
1175  EVT BVT = Ops[I1N].getValueType();
1176 
1177  SDLoc dl(U);
1178  SDValue C0 = DAG.getConstant(0, dl, BVT);
1179  SDValue C1 = DAG.getConstant(1, dl, BVT);
1180  SDValue If0, If1;
1181 
1182  if (isa<MachineSDNode>(U)) {
1183  unsigned UseOpc = U->getMachineOpcode();
1184  Ops[I1N] = C0;
1185  If0 = SDValue(DAG.getMachineNode(UseOpc, dl, UVT, Ops), 0);
1186  Ops[I1N] = C1;
1187  If1 = SDValue(DAG.getMachineNode(UseOpc, dl, UVT, Ops), 0);
1188  } else {
1189  unsigned UseOpc = U->getOpcode();
1190  Ops[I1N] = C0;
1191  If0 = DAG.getNode(UseOpc, dl, UVT, Ops);
1192  Ops[I1N] = C1;
1193  If1 = DAG.getNode(UseOpc, dl, UVT, Ops);
1194  }
1195  SDValue Sel = DAG.getNode(ISD::SELECT, dl, UVT, OpI1, If1, If0);
1196  DAG.ReplaceAllUsesWith(U, Sel.getNode());
1197  }
1198  }
1199 }
1200 
1202  // Repack all nodes before calling each preprocessing function,
1203  // because each of them can modify the set of nodes.
1204  auto getNodes = [this] () -> std::vector<SDNode*> {
1205  std::vector<SDNode*> T;
1206  T.reserve(CurDAG->allnodes_size());
1207  for (SDNode &N : CurDAG->allnodes())
1208  T.push_back(&N);
1209  return T;
1210  };
1211 
1212  // Transform: (or (select c x 0) z) -> (select c (or x z) z)
1213  // (or (select c 0 y) z) -> (select c z (or y z))
1214  ppSimplifyOrSelect0(getNodes());
1215 
1216  // Transform: (store ch val (add x (add (shl y c) e)))
1217  // to: (store ch val (add x (shl (add y d) c))),
1218  // where e = (shl d c) for some integer d.
1219  // The purpose of this is to enable generation of loads/stores with
1220  // shifted addressing mode, i.e. mem(x+y<<#c). For that, the shift
1221  // value c must be 0, 1 or 2.
1222  ppAddrReorderAddShl(getNodes());
1223 
1224  // Transform: (load ch (add x (and (srl y c) Mask)))
1225  // to: (load ch (add x (shl (srl y d) d-c)))
1226  // where
1227  // Mask = 00..0 111..1 0.0
1228  // | | +-- d-c 0s, and d-c is 0, 1 or 2.
1229  // | +-------- 1s
1230  // +-------------- at most c 0s
1231  // Motivating example:
1232  // DAG combiner optimizes (add x (shl (srl y 5) 2))
1233  // to (add x (and (srl y 3) 1FFFFFFC))
1234  // which results in a constant-extended and(##...,lsr). This transformation
1235  // undoes this simplification for cases where the shl can be folded into
1236  // an addressing mode.
1237  ppAddrRewriteAndSrl(getNodes());
1238 
1239  // Transform: (op ... (zext i1 c) ...) -> (select c (op ... 0 ...)
1240  // (op ... 1 ...))
1241  ppHoistZextI1(getNodes());
1242 
1243  DEBUG_WITH_TYPE("isel", {
1244  dbgs() << "Preprocessed (Hexagon) selection DAG:";
1245  CurDAG->dump();
1246  });
1247 
1249  rebalanceAddressTrees();
1250 
1251  DEBUG_WITH_TYPE("isel", {
1252  dbgs() << "Address tree balanced selection DAG:";
1253  CurDAG->dump();
1254  });
1255  }
1256 }
1257 
1259  auto &HST = static_cast<const HexagonSubtarget&>(MF->getSubtarget());
1260  auto &HFI = *HST.getFrameLowering();
1261  if (!HFI.needsAligna(*MF))
1262  return;
1263 
1264  MachineFrameInfo &MFI = MF->getFrameInfo();
1265  MachineBasicBlock *EntryBB = &MF->front();
1266  unsigned AR = FuncInfo->CreateReg(MVT::i32);
1267  unsigned MaxA = MFI.getMaxAlignment();
1268  BuildMI(EntryBB, DebugLoc(), HII->get(Hexagon::PS_aligna), AR)
1269  .addImm(MaxA);
1270  MF->getInfo<HexagonMachineFunctionInfo>()->setStackAlignBaseVReg(AR);
1271 }
1272 
1273 // Match a frame index that can be used in an addressing mode.
1275  if (N.getOpcode() != ISD::FrameIndex)
1276  return false;
1277  auto &HFI = *HST->getFrameLowering();
1278  MachineFrameInfo &MFI = MF->getFrameInfo();
1279  int FX = cast<FrameIndexSDNode>(N)->getIndex();
1280  if (!MFI.isFixedObjectIndex(FX) && HFI.needsAligna(*MF))
1281  return false;
1283  return true;
1284 }
1285 
1287  return SelectGlobalAddress(N, R, false, 0);
1288 }
1289 
1291  return SelectGlobalAddress(N, R, true, 0);
1292 }
1293 
1295  return SelectAnyImmediate(N, R, 0);
1296 }
1297 
1299  return SelectAnyImmediate(N, R, 0);
1300 }
1302  return SelectAnyImmediate(N, R, 1);
1303 }
1305  return SelectAnyImmediate(N, R, 2);
1306 }
1308  return SelectAnyImmediate(N, R, 3);
1309 }
1310 
1312  EVT T = N.getValueType();
1313  if (!T.isInteger() || T.getSizeInBits() != 32 || !isa<ConstantSDNode>(N))
1314  return false;
1315  R = N;
1316  return true;
1317 }
1318 
1320  uint32_t LogAlign) {
1321  auto IsAligned = [LogAlign] (uint64_t V) -> bool {
1322  return alignTo(V, (uint64_t)1 << LogAlign) == V;
1323  };
1324 
1325  switch (N.getOpcode()) {
1326  case ISD::Constant: {
1327  if (N.getValueType() != MVT::i32)
1328  return false;
1329  int32_t V = cast<const ConstantSDNode>(N)->getZExtValue();
1330  if (!IsAligned(V))
1331  return false;
1332  R = CurDAG->getTargetConstant(V, SDLoc(N), N.getValueType());
1333  return true;
1334  }
1335  case HexagonISD::JT:
1336  case HexagonISD::CP:
1337  // These are assumed to always be aligned at least 8-byte boundary.
1338  if (LogAlign > 3)
1339  return false;
1340  R = N.getOperand(0);
1341  return true;
1342  case ISD::ExternalSymbol:
1343  // Symbols may be aligned at any boundary.
1344  if (LogAlign > 0)
1345  return false;
1346  R = N;
1347  return true;
1348  case ISD::BlockAddress:
1349  // Block address is always aligned at least 4-byte boundary.
1350  if (LogAlign > 2 || !IsAligned(cast<BlockAddressSDNode>(N)->getOffset()))
1351  return false;
1352  R = N;
1353  return true;
1354  }
1355 
1356  if (SelectGlobalAddress(N, R, false, LogAlign) ||
1357  SelectGlobalAddress(N, R, true, LogAlign))
1358  return true;
1359 
1360  return false;
1361 }
1362 
1364  bool UseGP, uint32_t LogAlign) {
1365  auto IsAligned = [LogAlign] (uint64_t V) -> bool {
1366  return alignTo(V, (uint64_t)1 << LogAlign) == V;
1367  };
1368 
1369  switch (N.getOpcode()) {
1370  case ISD::ADD: {
1371  SDValue N0 = N.getOperand(0);
1372  SDValue N1 = N.getOperand(1);
1373  unsigned GAOpc = N0.getOpcode();
1374  if (UseGP && GAOpc != HexagonISD::CONST32_GP)
1375  return false;
1376  if (!UseGP && GAOpc != HexagonISD::CONST32)
1377  return false;
1378  if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N1)) {
1379  SDValue Addr = N0.getOperand(0);
1380  // For the purpose of alignment, sextvalue and zextvalue are the same.
1381  if (!IsAligned(Const->getZExtValue()))
1382  return false;
1383  if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Addr)) {
1384  if (GA->getOpcode() == ISD::TargetGlobalAddress) {
1385  uint64_t NewOff = GA->getOffset() + (uint64_t)Const->getSExtValue();
1386  R = CurDAG->getTargetGlobalAddress(GA->getGlobal(), SDLoc(Const),
1387  N.getValueType(), NewOff);
1388  return true;
1389  }
1390  }
1391  }
1392  break;
1393  }
1394  case HexagonISD::CP:
1395  case HexagonISD::JT:
1396  case HexagonISD::CONST32:
1397  // The operand(0) of CONST32 is TargetGlobalAddress, which is what we
1398  // want in the instruction.
1399  if (!UseGP)
1400  R = N.getOperand(0);
1401  return !UseGP;
1403  if (UseGP)
1404  R = N.getOperand(0);
1405  return UseGP;
1406  default:
1407  return false;
1408  }
1409 
1410  return false;
1411 }
1412 
1414  // This (complex pattern) function is meant to detect a sign-extension
1415  // i32->i64 on a per-operand basis. This would allow writing single
1416  // patterns that would cover a number of combinations of different ways
1417  // a sign-extensions could be written. For example:
1418  // (mul (DetectUseSxtw x) (DetectUseSxtw y)) -> (M2_dpmpyss_s0 x y)
1419  // could match either one of these:
1420  // (mul (sext x) (sext_inreg y))
1421  // (mul (sext-load *p) (sext_inreg y))
1422  // (mul (sext_inreg x) (sext y))
1423  // etc.
1424  //
1425  // The returned value will have type i64 and its low word will
1426  // contain the value being extended. The high bits are not specified.
1427  // The returned type is i64 because the original type of N was i64,
1428  // but the users of this function should only use the low-word of the
1429  // result, e.g.
1430  // (mul sxtw:x, sxtw:y) -> (M2_dpmpyss_s0 (LoReg sxtw:x), (LoReg sxtw:y))
1431 
1432  if (N.getValueType() != MVT::i64)
1433  return false;
1434  unsigned Opc = N.getOpcode();
1435  switch (Opc) {
1436  case ISD::SIGN_EXTEND:
1437  case ISD::SIGN_EXTEND_INREG: {
1438  // sext_inreg has the source type as a separate operand.
1439  EVT T = Opc == ISD::SIGN_EXTEND
1440  ? N.getOperand(0).getValueType()
1441  : cast<VTSDNode>(N.getOperand(1))->getVT();
1442  unsigned SW = T.getSizeInBits();
1443  if (SW == 32)
1444  R = N.getOperand(0);
1445  else if (SW < 32)
1446  R = N;
1447  else
1448  return false;
1449  break;
1450  }
1451  case ISD::LOAD: {
1452  LoadSDNode *L = cast<LoadSDNode>(N);
1453  if (L->getExtensionType() != ISD::SEXTLOAD)
1454  return false;
1455  // All extending loads extend to i32, so even if the value in
1456  // memory is shorter than 32 bits, it will be i32 after the load.
1457  if (L->getMemoryVT().getSizeInBits() > 32)
1458  return false;
1459  R = N;
1460  break;
1461  }
1462  case ISD::SRA: {
1463  auto *S = dyn_cast<ConstantSDNode>(N.getOperand(1));
1464  if (!S || S->getZExtValue() != 32)
1465  return false;
1466  R = N;
1467  break;
1468  }
1469  default:
1470  return false;
1471  }
1472  EVT RT = R.getValueType();
1473  if (RT == MVT::i64)
1474  return true;
1475  assert(RT == MVT::i32);
1476  // This is only to produce a value of type i64. Do not rely on the
1477  // high bits produced by this.
1478  const SDLoc &dl(N);
1479  SDValue Ops[] = {
1480  CurDAG->getTargetConstant(Hexagon::DoubleRegsRegClassID, dl, MVT::i32),
1481  R, CurDAG->getTargetConstant(Hexagon::isub_hi, dl, MVT::i32),
1482  R, CurDAG->getTargetConstant(Hexagon::isub_lo, dl, MVT::i32)
1483  };
1484  SDNode *T = CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl,
1485  MVT::i64, Ops);
1486  R = SDValue(T, 0);
1487  return true;
1488 }
1489 
1490 bool HexagonDAGToDAGISel::keepsLowBits(const SDValue &Val, unsigned NumBits,
1491  SDValue &Src) {
1492  unsigned Opc = Val.getOpcode();
1493  switch (Opc) {
1494  case ISD::SIGN_EXTEND:
1495  case ISD::ZERO_EXTEND:
1496  case ISD::ANY_EXTEND: {
1497  const SDValue &Op0 = Val.getOperand(0);
1498  EVT T = Op0.getValueType();
1499  if (T.isInteger() && T.getSizeInBits() == NumBits) {
1500  Src = Op0;
1501  return true;
1502  }
1503  break;
1504  }
1506  case ISD::AssertSext:
1507  case ISD::AssertZext:
1508  if (Val.getOperand(0).getValueType().isInteger()) {
1509  VTSDNode *T = cast<VTSDNode>(Val.getOperand(1));
1510  if (T->getVT().getSizeInBits() == NumBits) {
1511  Src = Val.getOperand(0);
1512  return true;
1513  }
1514  }
1515  break;
1516  case ISD::AND: {
1517  // Check if this is an AND with NumBits of lower bits set to 1.
1518  uint64_t Mask = (1 << NumBits) - 1;
1519  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(0))) {
1520  if (C->getZExtValue() == Mask) {
1521  Src = Val.getOperand(1);
1522  return true;
1523  }
1524  }
1525  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(1))) {
1526  if (C->getZExtValue() == Mask) {
1527  Src = Val.getOperand(0);
1528  return true;
1529  }
1530  }
1531  break;
1532  }
1533  case ISD::OR:
1534  case ISD::XOR: {
1535  // OR/XOR with the lower NumBits bits set to 0.
1536  uint64_t Mask = (1 << NumBits) - 1;
1537  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(0))) {
1538  if ((C->getZExtValue() & Mask) == 0) {
1539  Src = Val.getOperand(1);
1540  return true;
1541  }
1542  }
1543  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val.getOperand(1))) {
1544  if ((C->getZExtValue() & Mask) == 0) {
1545  Src = Val.getOperand(0);
1546  return true;
1547  }
1548  }
1549  break;
1550  }
1551  default:
1552  break;
1553  }
1554  return false;
1555 }
1556 
1557 bool HexagonDAGToDAGISel::isAlignedMemNode(const MemSDNode *N) const {
1558  return N->getAlignment() >= N->getMemoryVT().getStoreSize();
1559 }
1560 
1561 bool HexagonDAGToDAGISel::isSmallStackStore(const StoreSDNode *N) const {
1562  unsigned StackSize = MF->getFrameInfo().estimateStackSize(*MF);
1563  switch (N->getMemoryVT().getStoreSize()) {
1564  case 1:
1565  return StackSize <= 56; // 1*2^6 - 8
1566  case 2:
1567  return StackSize <= 120; // 2*2^6 - 8
1568  case 4:
1569  return StackSize <= 248; // 4*2^6 - 8
1570  default:
1571  return false;
1572  }
1573 }
1574 
1575 // Return true when the given node fits in a positive half word.
1576 bool HexagonDAGToDAGISel::isPositiveHalfWord(const SDNode *N) const {
1577  if (const ConstantSDNode *CN = dyn_cast<const ConstantSDNode>(N)) {
1578  int64_t V = CN->getSExtValue();
1579  return V > 0 && isInt<16>(V);
1580  }
1581  if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
1582  const VTSDNode *VN = dyn_cast<const VTSDNode>(N->getOperand(1));
1583  return VN->getVT().getSizeInBits() <= 16;
1584  }
1585  return false;
1586 }
1587 
1588 bool HexagonDAGToDAGISel::hasOneUse(const SDNode *N) const {
1589  return !CheckSingleUse || N->hasOneUse();
1590 }
1591 
1592 ////////////////////////////////////////////////////////////////////////////////
1593 // Rebalancing of address calculation trees
1594 
1595 static bool isOpcodeHandled(const SDNode *N) {
1596  switch (N->getOpcode()) {
1597  case ISD::ADD:
1598  case ISD::MUL:
1599  return true;
1600  case ISD::SHL:
1601  // We only handle constant shifts because these can be easily flattened
1602  // into multiplications by 2^Op1.
1603  return isa<ConstantSDNode>(N->getOperand(1).getNode());
1604  default:
1605  return false;
1606  }
1607 }
1608 
1609 /// Return the weight of an SDNode
1610 int HexagonDAGToDAGISel::getWeight(SDNode *N) {
1611  if (!isOpcodeHandled(N))
1612  return 1;
1613  assert(RootWeights.count(N) && "Cannot get weight of unseen root!");
1614  assert(RootWeights[N] != -1 && "Cannot get weight of unvisited root!");
1615  assert(RootWeights[N] != -2 && "Cannot get weight of RAWU'd root!");
1616  return RootWeights[N];
1617 }
1618 
1619 int HexagonDAGToDAGISel::getHeight(SDNode *N) {
1620  if (!isOpcodeHandled(N))
1621  return 0;
1622  assert(RootWeights.count(N) && RootWeights[N] >= 0 &&
1623  "Cannot query height of unvisited/RAUW'd node!");
1624  return RootHeights[N];
1625 }
1626 
1627 namespace {
1628 struct WeightedLeaf {
1629  SDValue Value;
1630  int Weight;
1631  int InsertionOrder;
1632 
1633  WeightedLeaf() : Value(SDValue()) { }
1634 
1635  WeightedLeaf(SDValue Value, int Weight, int InsertionOrder) :
1636  Value(Value), Weight(Weight), InsertionOrder(InsertionOrder) {
1637  assert(Weight >= 0 && "Weight must be >= 0");
1638  }
1639 
1640  static bool Compare(const WeightedLeaf &A, const WeightedLeaf &B) {
1641  assert(A.Value.getNode() && B.Value.getNode());
1642  return A.Weight == B.Weight ?
1643  (A.InsertionOrder > B.InsertionOrder) :
1644  (A.Weight > B.Weight);
1645  }
1646 };
1647 
1648 /// A specialized priority queue for WeigthedLeaves. It automatically folds
1649 /// constants and allows removal of non-top elements while maintaining the
1650 /// priority order.
1651 class LeafPrioQueue {
1653  bool HaveConst;
1654  WeightedLeaf ConstElt;
1655  unsigned Opcode;
1656 
1657 public:
1658  bool empty() {
1659  return (!HaveConst && Q.empty());
1660  }
1661 
1662  size_t size() {
1663  return Q.size() + HaveConst;
1664  }
1665 
1666  bool hasConst() {
1667  return HaveConst;
1668  }
1669 
1670  const WeightedLeaf &top() {
1671  if (HaveConst)
1672  return ConstElt;
1673  return Q.front();
1674  }
1675 
1676  WeightedLeaf pop() {
1677  if (HaveConst) {
1678  HaveConst = false;
1679  return ConstElt;
1680  }
1681  std::pop_heap(Q.begin(), Q.end(), WeightedLeaf::Compare);
1682  return Q.pop_back_val();
1683  }
1684 
1685  void push(WeightedLeaf L, bool SeparateConst=true) {
1686  if (!HaveConst && SeparateConst && isa<ConstantSDNode>(L.Value)) {
1687  if (Opcode == ISD::MUL &&
1688  cast<ConstantSDNode>(L.Value)->getSExtValue() == 1)
1689  return;
1690  if (Opcode == ISD::ADD &&
1691  cast<ConstantSDNode>(L.Value)->getSExtValue() == 0)
1692  return;
1693 
1694  HaveConst = true;
1695  ConstElt = L;
1696  } else {
1697  Q.push_back(L);
1698  std::push_heap(Q.begin(), Q.end(), WeightedLeaf::Compare);
1699  }
1700  }
1701 
1702  /// Push L to the bottom of the queue regardless of its weight. If L is
1703  /// constant, it will not be folded with other constants in the queue.
1704  void pushToBottom(WeightedLeaf L) {
1705  L.Weight = 1000;
1706  push(L, false);
1707  }
1708 
1709  /// Search for a SHL(x, [<=MaxAmount]) subtree in the queue, return the one of
1710  /// lowest weight and remove it from the queue.
1711  WeightedLeaf findSHL(uint64_t MaxAmount);
1712 
1713  WeightedLeaf findMULbyConst();
1714 
1715  LeafPrioQueue(unsigned Opcode) :
1716  HaveConst(false), Opcode(Opcode) { }
1717 };
1718 } // end anonymous namespace
1719 
1720 WeightedLeaf LeafPrioQueue::findSHL(uint64_t MaxAmount) {
1721  int ResultPos;
1722  WeightedLeaf Result;
1723 
1724  for (int Pos = 0, End = Q.size(); Pos != End; ++Pos) {
1725  const WeightedLeaf &L = Q[Pos];
1726  const SDValue &Val = L.Value;
1727  if (Val.getOpcode() != ISD::SHL ||
1728  !isa<ConstantSDNode>(Val.getOperand(1)) ||
1729  Val.getConstantOperandVal(1) > MaxAmount)
1730  continue;
1731  if (!Result.Value.getNode() || Result.Weight > L.Weight ||
1732  (Result.Weight == L.Weight && Result.InsertionOrder > L.InsertionOrder))
1733  {
1734  Result = L;
1735  ResultPos = Pos;
1736  }
1737  }
1738 
1739  if (Result.Value.getNode()) {
1740  Q.erase(&Q[ResultPos]);
1741  std::make_heap(Q.begin(), Q.end(), WeightedLeaf::Compare);
1742  }
1743 
1744  return Result;
1745 }
1746 
1747 WeightedLeaf LeafPrioQueue::findMULbyConst() {
1748  int ResultPos;
1749  WeightedLeaf Result;
1750 
1751  for (int Pos = 0, End = Q.size(); Pos != End; ++Pos) {
1752  const WeightedLeaf &L = Q[Pos];
1753  const SDValue &Val = L.Value;
1754  if (Val.getOpcode() != ISD::MUL ||
1755  !isa<ConstantSDNode>(Val.getOperand(1)) ||
1756  Val.getConstantOperandVal(1) > 127)
1757  continue;
1758  if (!Result.Value.getNode() || Result.Weight > L.Weight ||
1759  (Result.Weight == L.Weight && Result.InsertionOrder > L.InsertionOrder))
1760  {
1761  Result = L;
1762  ResultPos = Pos;
1763  }
1764  }
1765 
1766  if (Result.Value.getNode()) {
1767  Q.erase(&Q[ResultPos]);
1768  std::make_heap(Q.begin(), Q.end(), WeightedLeaf::Compare);
1769  }
1770 
1771  return Result;
1772 }
1773 
1774 SDValue HexagonDAGToDAGISel::getMultiplierForSHL(SDNode *N) {
1775  uint64_t MulFactor = 1ull << N->getConstantOperandVal(1);
1776  return CurDAG->getConstant(MulFactor, SDLoc(N),
1777  N->getOperand(1).getValueType());
1778 }
1779 
1780 /// @returns the value x for which 2^x is a factor of Val
1781 static unsigned getPowerOf2Factor(SDValue Val) {
1782  if (Val.getOpcode() == ISD::MUL) {
1783  unsigned MaxFactor = 0;
1784  for (int i = 0; i < 2; ++i) {
1786  if (!C)
1787  continue;
1788  const APInt &CInt = C->getAPIntValue();
1789  if (CInt.getBoolValue())
1790  MaxFactor = CInt.countTrailingZeros();
1791  }
1792  return MaxFactor;
1793  }
1794  if (Val.getOpcode() == ISD::SHL) {
1795  if (!isa<ConstantSDNode>(Val.getOperand(1).getNode()))
1796  return 0;
1797  return (unsigned) Val.getConstantOperandVal(1);
1798  }
1799 
1800  return 0;
1801 }
1802 
1803 /// @returns true if V>>Amount will eliminate V's operation on its child
1804 static bool willShiftRightEliminate(SDValue V, unsigned Amount) {
1805  if (V.getOpcode() == ISD::MUL) {
1806  SDValue Ops[] = { V.getOperand(0), V.getOperand(1) };
1807  for (int i = 0; i < 2; ++i)
1808  if (isa<ConstantSDNode>(Ops[i].getNode()) &&
1809  V.getConstantOperandVal(i) % (1ULL << Amount) == 0) {
1810  uint64_t NewConst = V.getConstantOperandVal(i) >> Amount;
1811  return (NewConst == 1);
1812  }
1813  } else if (V.getOpcode() == ISD::SHL) {
1814  return (Amount == V.getConstantOperandVal(1));
1815  }
1816 
1817  return false;
1818 }
1819 
1820 SDValue HexagonDAGToDAGISel::factorOutPowerOf2(SDValue V, unsigned Power) {
1821  SDValue Ops[] = { V.getOperand(0), V.getOperand(1) };
1822  if (V.getOpcode() == ISD::MUL) {
1823  for (int i=0; i < 2; ++i) {
1824  if (isa<ConstantSDNode>(Ops[i].getNode()) &&
1825  V.getConstantOperandVal(i) % ((uint64_t)1 << Power) == 0) {
1826  uint64_t NewConst = V.getConstantOperandVal(i) >> Power;
1827  if (NewConst == 1)
1828  return Ops[!i];
1829  Ops[i] = CurDAG->getConstant(NewConst,
1830  SDLoc(V), V.getValueType());
1831  break;
1832  }
1833  }
1834  } else if (V.getOpcode() == ISD::SHL) {
1835  uint64_t ShiftAmount = V.getConstantOperandVal(1);
1836  if (ShiftAmount == Power)
1837  return Ops[0];
1838  Ops[1] = CurDAG->getConstant(ShiftAmount - Power,
1839  SDLoc(V), V.getValueType());
1840  }
1841 
1842  return CurDAG->getNode(V.getOpcode(), SDLoc(V), V.getValueType(), Ops);
1843 }
1844 
1845 static bool isTargetConstant(const SDValue &V) {
1846  return V.getOpcode() == HexagonISD::CONST32 ||
1848 }
1849 
1850 unsigned HexagonDAGToDAGISel::getUsesInFunction(const Value *V) {
1851  if (GAUsesInFunction.count(V))
1852  return GAUsesInFunction[V];
1853 
1854  unsigned Result = 0;
1855  const Function &CurF = CurDAG->getMachineFunction().getFunction();
1856  for (const User *U : V->users()) {
1857  if (isa<Instruction>(U) &&
1858  cast<Instruction>(U)->getParent()->getParent() == &CurF)
1859  ++Result;
1860  }
1861 
1862  GAUsesInFunction[V] = Result;
1863 
1864  return Result;
1865 }
1866 
1867 /// Note - After calling this, N may be dead. It may have been replaced by a
1868 /// new node, so always use the returned value in place of N.
1869 ///
1870 /// @returns The SDValue taking the place of N (which could be N if it is
1871 /// unchanged)
1872 SDValue HexagonDAGToDAGISel::balanceSubTree(SDNode *N, bool TopLevel) {
1873  assert(RootWeights.count(N) && "Cannot balance non-root node.");
1874  assert(RootWeights[N] != -2 && "This node was RAUW'd!");
1875  assert(!TopLevel || N->getOpcode() == ISD::ADD);
1876 
1877  // Return early if this node was already visited
1878  if (RootWeights[N] != -1)
1879  return SDValue(N, 0);
1880 
1881  assert(isOpcodeHandled(N));
1882 
1883  SDValue Op0 = N->getOperand(0);
1884  SDValue Op1 = N->getOperand(1);
1885 
1886  // Return early if the operands will remain unchanged or are all roots
1887  if ((!isOpcodeHandled(Op0.getNode()) || RootWeights.count(Op0.getNode())) &&
1888  (!isOpcodeHandled(Op1.getNode()) || RootWeights.count(Op1.getNode()))) {
1889  SDNode *Op0N = Op0.getNode();
1890  int Weight;
1891  if (isOpcodeHandled(Op0N) && RootWeights[Op0N] == -1) {
1892  Weight = getWeight(balanceSubTree(Op0N).getNode());
1893  // Weight = calculateWeight(Op0N);
1894  } else
1895  Weight = getWeight(Op0N);
1896 
1897  SDNode *Op1N = N->getOperand(1).getNode(); // Op1 may have been RAUWd
1898  if (isOpcodeHandled(Op1N) && RootWeights[Op1N] == -1) {
1899  Weight += getWeight(balanceSubTree(Op1N).getNode());
1900  // Weight += calculateWeight(Op1N);
1901  } else
1902  Weight += getWeight(Op1N);
1903 
1904  RootWeights[N] = Weight;
1905  RootHeights[N] = std::max(getHeight(N->getOperand(0).getNode()),
1906  getHeight(N->getOperand(1).getNode())) + 1;
1907 
1908  LLVM_DEBUG(dbgs() << "--> No need to balance root (Weight=" << Weight
1909  << " Height=" << RootHeights[N] << "): ");
1910  LLVM_DEBUG(N->dump(CurDAG));
1911 
1912  return SDValue(N, 0);
1913  }
1914 
1915  LLVM_DEBUG(dbgs() << "** Balancing root node: ");
1916  LLVM_DEBUG(N->dump(CurDAG));
1917 
1918  unsigned NOpcode = N->getOpcode();
1919 
1920  LeafPrioQueue Leaves(NOpcode);
1921  SmallVector<SDValue, 4> Worklist;
1922  Worklist.push_back(SDValue(N, 0));
1923 
1924  // SHL nodes will be converted to MUL nodes
1925  if (NOpcode == ISD::SHL)
1926  NOpcode = ISD::MUL;
1927 
1928  bool CanFactorize = false;
1929  WeightedLeaf Mul1, Mul2;
1930  unsigned MaxPowerOf2 = 0;
1931  WeightedLeaf GA;
1932 
1933  // Do not try to factor out a shift if there is already a shift at the tip of
1934  // the tree.
1935  bool HaveTopLevelShift = false;
1936  if (TopLevel &&
1937  ((isOpcodeHandled(Op0.getNode()) && Op0.getOpcode() == ISD::SHL &&
1938  Op0.getConstantOperandVal(1) < 4) ||
1939  (isOpcodeHandled(Op1.getNode()) && Op1.getOpcode() == ISD::SHL &&
1940  Op1.getConstantOperandVal(1) < 4)))
1941  HaveTopLevelShift = true;
1942 
1943  // Flatten the subtree into an ordered list of leaves; at the same time
1944  // determine whether the tree is already balanced.
1945  int InsertionOrder = 0;
1946  SmallDenseMap<SDValue, int> NodeHeights;
1947  bool Imbalanced = false;
1948  int CurrentWeight = 0;
1949  while (!Worklist.empty()) {
1950  SDValue Child = Worklist.pop_back_val();
1951 
1952  if (Child.getNode() != N && RootWeights.count(Child.getNode())) {
1953  // CASE 1: Child is a root note
1954 
1955  int Weight = RootWeights[Child.getNode()];
1956  if (Weight == -1) {
1957  Child = balanceSubTree(Child.getNode());
1958  // calculateWeight(Child.getNode());
1959  Weight = getWeight(Child.getNode());
1960  } else if (Weight == -2) {
1961  // Whoops, this node was RAUWd by one of the balanceSubTree calls we
1962  // made. Our worklist isn't up to date anymore.
1963  // Restart the whole process.
1964  LLVM_DEBUG(dbgs() << "--> Subtree was RAUWd. Restarting...\n");
1965  return balanceSubTree(N, TopLevel);
1966  }
1967 
1968  NodeHeights[Child] = 1;
1969  CurrentWeight += Weight;
1970 
1971  unsigned PowerOf2;
1972  if (TopLevel && !CanFactorize && !HaveTopLevelShift &&
1973  (Child.getOpcode() == ISD::MUL || Child.getOpcode() == ISD::SHL) &&
1974  Child.hasOneUse() && (PowerOf2 = getPowerOf2Factor(Child))) {
1975  // Try to identify two factorizable MUL/SHL children greedily. Leave
1976  // them out of the priority queue for now so we can deal with them
1977  // after.
1978  if (!Mul1.Value.getNode()) {
1979  Mul1 = WeightedLeaf(Child, Weight, InsertionOrder++);
1980  MaxPowerOf2 = PowerOf2;
1981  } else {
1982  Mul2 = WeightedLeaf(Child, Weight, InsertionOrder++);
1983  MaxPowerOf2 = std::min(MaxPowerOf2, PowerOf2);
1984 
1985  // Our addressing modes can only shift by a maximum of 3
1986  if (MaxPowerOf2 > 3)
1987  MaxPowerOf2 = 3;
1988 
1989  CanFactorize = true;
1990  }
1991  } else
1992  Leaves.push(WeightedLeaf(Child, Weight, InsertionOrder++));
1993  } else if (!isOpcodeHandled(Child.getNode())) {
1994  // CASE 2: Child is an unhandled kind of node (e.g. constant)
1995  int Weight = getWeight(Child.getNode());
1996 
1997  NodeHeights[Child] = getHeight(Child.getNode());
1998  CurrentWeight += Weight;
1999 
2000  if (isTargetConstant(Child) && !GA.Value.getNode())
2001  GA = WeightedLeaf(Child, Weight, InsertionOrder++);
2002  else
2003  Leaves.push(WeightedLeaf(Child, Weight, InsertionOrder++));
2004  } else {
2005  // CASE 3: Child is a subtree of same opcode
2006  // Visit children first, then flatten.
2007  unsigned ChildOpcode = Child.getOpcode();
2008  assert(ChildOpcode == NOpcode ||
2009  (NOpcode == ISD::MUL && ChildOpcode == ISD::SHL));
2010 
2011  // Convert SHL to MUL
2012  SDValue Op1;
2013  if (ChildOpcode == ISD::SHL)
2014  Op1 = getMultiplierForSHL(Child.getNode());
2015  else
2016  Op1 = Child->getOperand(1);
2017 
2018  if (!NodeHeights.count(Op1) || !NodeHeights.count(Child->getOperand(0))) {
2019  assert(!NodeHeights.count(Child) && "Parent visited before children?");
2020  // Visit children first, then re-visit this node
2021  Worklist.push_back(Child);
2022  Worklist.push_back(Op1);
2023  Worklist.push_back(Child->getOperand(0));
2024  } else {
2025  // Back at this node after visiting the children
2026  if (std::abs(NodeHeights[Op1] - NodeHeights[Child->getOperand(0)]) > 1)
2027  Imbalanced = true;
2028 
2029  NodeHeights[Child] = std::max(NodeHeights[Op1],
2030  NodeHeights[Child->getOperand(0)]) + 1;
2031  }
2032  }
2033  }
2034 
2035  LLVM_DEBUG(dbgs() << "--> Current height=" << NodeHeights[SDValue(N, 0)]
2036  << " weight=" << CurrentWeight
2037  << " imbalanced=" << Imbalanced << "\n");
2038 
2039  // Transform MUL(x, C * 2^Y) + SHL(z, Y) -> SHL(ADD(MUL(x, C), z), Y)
2040  // This factors out a shift in order to match memw(a<<Y+b).
2041  if (CanFactorize && (willShiftRightEliminate(Mul1.Value, MaxPowerOf2) ||
2042  willShiftRightEliminate(Mul2.Value, MaxPowerOf2))) {
2043  LLVM_DEBUG(dbgs() << "--> Found common factor for two MUL children!\n");
2044  int Weight = Mul1.Weight + Mul2.Weight;
2045  int Height = std::max(NodeHeights[Mul1.Value], NodeHeights[Mul2.Value]) + 1;
2046  SDValue Mul1Factored = factorOutPowerOf2(Mul1.Value, MaxPowerOf2);
2047  SDValue Mul2Factored = factorOutPowerOf2(Mul2.Value, MaxPowerOf2);
2048  SDValue Sum = CurDAG->getNode(ISD::ADD, SDLoc(N), Mul1.Value.getValueType(),
2049  Mul1Factored, Mul2Factored);
2050  SDValue Const = CurDAG->getConstant(MaxPowerOf2, SDLoc(N),
2051  Mul1.Value.getValueType());
2052  SDValue New = CurDAG->getNode(ISD::SHL, SDLoc(N), Mul1.Value.getValueType(),
2053  Sum, Const);
2054  NodeHeights[New] = Height;
2055  Leaves.push(WeightedLeaf(New, Weight, Mul1.InsertionOrder));
2056  } else if (Mul1.Value.getNode()) {
2057  // We failed to factorize two MULs, so now the Muls are left outside the
2058  // queue... add them back.
2059  Leaves.push(Mul1);
2060  if (Mul2.Value.getNode())
2061  Leaves.push(Mul2);
2062  CanFactorize = false;
2063  }
2064 
2065  // Combine GA + Constant -> GA+Offset, but only if GA is not used elsewhere
2066  // and the root node itself is not used more than twice. This reduces the
2067  // amount of additional constant extenders introduced by this optimization.
2068  bool CombinedGA = false;
2069  if (NOpcode == ISD::ADD && GA.Value.getNode() && Leaves.hasConst() &&
2070  GA.Value.hasOneUse() && N->use_size() < 3) {
2071  GlobalAddressSDNode *GANode =
2072  cast<GlobalAddressSDNode>(GA.Value.getOperand(0));
2073  ConstantSDNode *Offset = cast<ConstantSDNode>(Leaves.top().Value);
2074 
2075  if (getUsesInFunction(GANode->getGlobal()) == 1 && Offset->hasOneUse() &&
2077  LLVM_DEBUG(dbgs() << "--> Combining GA and offset ("
2078  << Offset->getSExtValue() << "): ");
2079  LLVM_DEBUG(GANode->dump(CurDAG));
2080 
2081  SDValue NewTGA =
2082  CurDAG->getTargetGlobalAddress(GANode->getGlobal(), SDLoc(GA.Value),
2083  GANode->getValueType(0),
2084  GANode->getOffset() + (uint64_t)Offset->getSExtValue());
2085  GA.Value = CurDAG->getNode(GA.Value.getOpcode(), SDLoc(GA.Value),
2086  GA.Value.getValueType(), NewTGA);
2087  GA.Weight += Leaves.top().Weight;
2088 
2089  NodeHeights[GA.Value] = getHeight(GA.Value.getNode());
2090  CombinedGA = true;
2091 
2092  Leaves.pop(); // Remove the offset constant from the queue
2093  }
2094  }
2095 
2096  if ((RebalanceOnlyForOptimizations && !CanFactorize && !CombinedGA) ||
2097  (RebalanceOnlyImbalancedTrees && !Imbalanced)) {
2098  RootWeights[N] = CurrentWeight;
2099  RootHeights[N] = NodeHeights[SDValue(N, 0)];
2100 
2101  return SDValue(N, 0);
2102  }
2103 
2104  // Combine GA + SHL(x, C<=31) so we will match Rx=add(#u8,asl(Rx,#U5))
2105  if (NOpcode == ISD::ADD && GA.Value.getNode()) {
2106  WeightedLeaf SHL = Leaves.findSHL(31);
2107  if (SHL.Value.getNode()) {
2108  int Height = std::max(NodeHeights[GA.Value], NodeHeights[SHL.Value]) + 1;
2109  GA.Value = CurDAG->getNode(ISD::ADD, SDLoc(GA.Value),
2110  GA.Value.getValueType(),
2111  GA.Value, SHL.Value);
2112  GA.Weight = SHL.Weight; // Specifically ignore the GA weight here
2113  NodeHeights[GA.Value] = Height;
2114  }
2115  }
2116 
2117  if (GA.Value.getNode())
2118  Leaves.push(GA);
2119 
2120  // If this is the top level and we haven't factored out a shift, we should try
2121  // to move a constant to the bottom to match addressing modes like memw(rX+C)
2122  if (TopLevel && !CanFactorize && Leaves.hasConst()) {
2123  LLVM_DEBUG(dbgs() << "--> Pushing constant to tip of tree.");
2124  Leaves.pushToBottom(Leaves.pop());
2125  }
2126 
2127  const DataLayout &DL = CurDAG->getDataLayout();
2128  const TargetLowering &TLI = *getTargetLowering();
2129 
2130  // Rebuild the tree using Huffman's algorithm
2131  while (Leaves.size() > 1) {
2132  WeightedLeaf L0 = Leaves.pop();
2133 
2134  // See whether we can grab a MUL to form an add(Rx,mpyi(Ry,#u6)),
2135  // otherwise just get the next leaf
2136  WeightedLeaf L1 = Leaves.findMULbyConst();
2137  if (!L1.Value.getNode())
2138  L1 = Leaves.pop();
2139 
2140  assert(L0.Weight <= L1.Weight && "Priority queue is broken!");
2141 
2142  SDValue V0 = L0.Value;
2143  int V0Weight = L0.Weight;
2144  SDValue V1 = L1.Value;
2145  int V1Weight = L1.Weight;
2146 
2147  // Make sure that none of these nodes have been RAUW'd
2148  if ((RootWeights.count(V0.getNode()) && RootWeights[V0.getNode()] == -2) ||
2149  (RootWeights.count(V1.getNode()) && RootWeights[V1.getNode()] == -2)) {
2150  LLVM_DEBUG(dbgs() << "--> Subtree was RAUWd. Restarting...\n");
2151  return balanceSubTree(N, TopLevel);
2152  }
2153 
2156  EVT VT = N->getValueType(0);
2157  SDValue NewNode;
2158 
2159  if (V0C && !V1C) {
2160  std::swap(V0, V1);
2161  std::swap(V0C, V1C);
2162  }
2163 
2164  // Calculate height of this node
2165  assert(NodeHeights.count(V0) && NodeHeights.count(V1) &&
2166  "Children must have been visited before re-combining them!");
2167  int Height = std::max(NodeHeights[V0], NodeHeights[V1]) + 1;
2168 
2169  // Rebuild this node (and restore SHL from MUL if needed)
2170  if (V1C && NOpcode == ISD::MUL && V1C->getAPIntValue().isPowerOf2())
2171  NewNode = CurDAG->getNode(
2172  ISD::SHL, SDLoc(V0), VT, V0,
2174  V1C->getAPIntValue().logBase2(), SDLoc(N),
2175  TLI.getScalarShiftAmountTy(DL, V0.getValueType())));
2176  else
2177  NewNode = CurDAG->getNode(NOpcode, SDLoc(N), VT, V0, V1);
2178 
2179  NodeHeights[NewNode] = Height;
2180 
2181  int Weight = V0Weight + V1Weight;
2182  Leaves.push(WeightedLeaf(NewNode, Weight, L0.InsertionOrder));
2183 
2184  LLVM_DEBUG(dbgs() << "--> Built new node (Weight=" << Weight
2185  << ",Height=" << Height << "):\n");
2186  LLVM_DEBUG(NewNode.dump());
2187  }
2188 
2189  assert(Leaves.size() == 1);
2190  SDValue NewRoot = Leaves.top().Value;
2191 
2192  assert(NodeHeights.count(NewRoot));
2193  int Height = NodeHeights[NewRoot];
2194 
2195  // Restore SHL if we earlier converted it to a MUL
2196  if (NewRoot.getOpcode() == ISD::MUL) {
2197  ConstantSDNode *V1C = dyn_cast<ConstantSDNode>(NewRoot.getOperand(1));
2198  if (V1C && V1C->getAPIntValue().isPowerOf2()) {
2199  EVT VT = NewRoot.getValueType();
2200  SDValue V0 = NewRoot.getOperand(0);
2201  NewRoot = CurDAG->getNode(
2202  ISD::SHL, SDLoc(NewRoot), VT, V0,
2204  V1C->getAPIntValue().logBase2(), SDLoc(NewRoot),
2205  TLI.getScalarShiftAmountTy(DL, V0.getValueType())));
2206  }
2207  }
2208 
2209  if (N != NewRoot.getNode()) {
2210  LLVM_DEBUG(dbgs() << "--> Root is now: ");
2211  LLVM_DEBUG(NewRoot.dump());
2212 
2213  // Replace all uses of old root by new root
2214  CurDAG->ReplaceAllUsesWith(N, NewRoot.getNode());
2215  // Mark that we have RAUW'd N
2216  RootWeights[N] = -2;
2217  } else {
2218  LLVM_DEBUG(dbgs() << "--> Root unchanged.\n");
2219  }
2220 
2221  RootWeights[NewRoot.getNode()] = Leaves.top().Weight;
2222  RootHeights[NewRoot.getNode()] = Height;
2223 
2224  return NewRoot;
2225 }
2226 
2227 void HexagonDAGToDAGISel::rebalanceAddressTrees() {
2228  for (auto I = CurDAG->allnodes_begin(), E = CurDAG->allnodes_end(); I != E;) {
2229  SDNode *N = &*I++;
2230  if (N->getOpcode() != ISD::LOAD && N->getOpcode() != ISD::STORE)
2231  continue;
2232 
2233  SDValue BasePtr = cast<MemSDNode>(N)->getBasePtr();
2234  if (BasePtr.getOpcode() != ISD::ADD)
2235  continue;
2236 
2237  // We've already processed this node
2238  if (RootWeights.count(BasePtr.getNode()))
2239  continue;
2240 
2241  LLVM_DEBUG(dbgs() << "** Rebalancing address calculation in node: ");
2242  LLVM_DEBUG(N->dump(CurDAG));
2243 
2244  // FindRoots
2245  SmallVector<SDNode *, 4> Worklist;
2246 
2247  Worklist.push_back(BasePtr.getOperand(0).getNode());
2248  Worklist.push_back(BasePtr.getOperand(1).getNode());
2249 
2250  while (!Worklist.empty()) {
2251  SDNode *N = Worklist.pop_back_val();
2252  unsigned Opcode = N->getOpcode();
2253 
2254  if (!isOpcodeHandled(N))
2255  continue;
2256 
2257  Worklist.push_back(N->getOperand(0).getNode());
2258  Worklist.push_back(N->getOperand(1).getNode());
2259 
2260  // Not a root if it has only one use and same opcode as its parent
2261  if (N->hasOneUse() && Opcode == N->use_begin()->getOpcode())
2262  continue;
2263 
2264  // This root node has already been processed
2265  if (RootWeights.count(N))
2266  continue;
2267 
2268  RootWeights[N] = -1;
2269  }
2270 
2271  // Balance node itself
2272  RootWeights[BasePtr.getNode()] = -1;
2273  SDValue NewBasePtr = balanceSubTree(BasePtr.getNode(), /*TopLevel=*/ true);
2274 
2275  if (N->getOpcode() == ISD::LOAD)
2276  N = CurDAG->UpdateNodeOperands(N, N->getOperand(0),
2277  NewBasePtr, N->getOperand(2));
2278  else
2279  N = CurDAG->UpdateNodeOperands(N, N->getOperand(0), N->getOperand(1),
2280  NewBasePtr, N->getOperand(3));
2281 
2282  LLVM_DEBUG(dbgs() << "--> Final node: ");
2283  LLVM_DEBUG(N->dump(CurDAG));
2284  }
2285 
2287  GAUsesInFunction.clear();
2288  RootHeights.clear();
2289  RootWeights.clear();
2290 }
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDNode * MorphNodeTo(SDNode *N, unsigned Opc, SDVTList VTs, ArrayRef< SDValue > Ops)
This mutates the specified node to have the specified return type, opcode, and operands.
uint64_t CallInst * C
static MVT getIntegerVT(unsigned BitWidth)
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:110
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOffset() const
const GlobalValue * getGlobal() const
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
static cl::opt< bool > CheckSingleUse("hexagon-isel-su", cl::Hidden, cl::init(true), cl::desc("Enable checking of SDNode's single-use status"))
This class represents lattice values for constants.
Definition: AllocatorList.h:23
static bool isMemOPCandidate(SDNode *I, SDNode *U)
static bool willShiftRightEliminate(SDValue V, unsigned Amount)
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:366
void ReplaceUses(SDValue F, SDValue T)
ReplaceUses - replace all uses of the old node F with the use of the new node T.
const SDValue & getBasePtr() const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
bool DetectUseSxtw(SDValue &N, SDValue &R)
const SDValue & getValue() const
SDVTList getVTList() const
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:252
bool SelectAddrFI(SDValue &N, SDValue &R)
const SDValue & getChain() const
static unsigned getPowerOf2Factor(SDValue Val)
ISD::MemIndexedMode getAddressingMode() const
Return the addressing mode for this load or store: unindexed, pre-inc, pre-dec, post-inc, or post-dec.
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition: ISDOpcodes.h:130
unsigned getAlignment() const
Hexagon target-specific information for each MachineFunction.
constexpr bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:305
A debug info location.
Definition: DebugLoc.h:33
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:140
F(f)
uint64_t alignTo(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the next integer (mod 2**64) that is greater than or equal to Value and is a multiple of Alig...
Definition: MathExtras.h:684
const HexagonFrameLowering * getFrameLowering() const override
void setNodeId(int Id)
Set unique node id.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
std::size_t countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the most significant bit to the least stopping at the first 1...
Definition: MathExtras.h:188
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
MachineFunction * MF
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1508
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:158
bool isTruncatingStore() const
Return true if the op does a truncation before store.
#define DEBUG_WITH_TYPE(TYPE, X)
DEBUG_WITH_TYPE macro - This macro should be used by passes to emit debug information.
Definition: Debug.h:64
unsigned countTrailingZeros() const
Count the number of trailing zero bits.
Definition: APInt.h:1631
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
bool hasOneUse() const
Return true if there is exactly one use of this node.
A description of a memory reference used in the backend.
static ManagedStatic< DebugCounter > DC
static cl::opt< bool > RebalanceOnlyForOptimizations("rebalance-only-opt", cl::Hidden, cl::init(false), cl::desc("Rebalance address tree only if this allows optimizations"))
Shift and rotation operations.
Definition: ISDOpcodes.h:409
std::size_t countTrailingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the least significant bit to the first zero bit.
Definition: MathExtras.h:477
SDValue getTargetExtractSubreg(int SRIdx, const SDLoc &DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s), MachineInstr opcode, and operands.
const TargetLowering * TLI
static bool isTargetConstant(const SDValue &V)
uint64_t getConstantOperandVal(unsigned i) const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
SimpleValueType SimpleTy
unsigned getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:303
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted...
void SelectIndexedLoad(LoadSDNode *LD, const SDLoc &dl)
bool hasVarSizedObjects() const
This method may be called any time after instruction selection is complete to determine if the stack ...
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:459
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:400
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
This class is used to represent EVT&#39;s, which are used to parameterize some operations.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
iterator_range< allnodes_iterator > allnodes()
Definition: SelectionDAG.h:448
unsigned getSizeInBits() const
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:291
bool getBoolValue() const
Convert APInt to a boolean value.
Definition: APInt.h:477
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:397
SDValue getTargetFrameIndex(int FI, EVT VT)
Definition: SelectionDAG.h:627
#define T
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:200
static Error getOffset(const SymbolRef &Sym, SectionRef Sec, uint64_t &Result)
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:575
bool SelectNewCircIntrinsic(SDNode *IntN)
Generate a machine instruction node for the new circlar buffer intrinsics.
static cl::opt< bool > EnableAddressRebalancing("isel-rebalance-addr", cl::Hidden, cl::init(true), cl::desc("Rebalance address calculation trees to improve " "instruction selection"))
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:150
bool SelectBrevLdIntrinsic(SDNode *IntN)
SDNode * StoreInstrForLoadIntrinsic(MachineSDNode *LoadN, SDNode *IntN)
This class is used to represent ISD::STORE nodes.
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
const SDValue & getBasePtr() const
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:422
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
std::size_t countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:119
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:428
Machine Value Type.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
bool SelectAnyImm0(SDValue &N, SDValue &R)
bool SelectAnyInt(SDValue &N, SDValue &R)
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator begin()
Definition: SmallVector.h:128
const SDValue & getOperand(unsigned Num) const
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:933
bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, std::vector< SDValue > &OutOps) override
SelectInlineAsmMemoryOperand - Implement addressing mode selection for inline asm expressions...
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
const SDValue & getOffset() const
void RemoveDeadNodes()
This method deletes all unreachable nodes in the SelectionDAG.
unsigned getMaxAlignment() const
Return the alignment in bytes that this function must be aligned to, which is greater than the defaul...
bool isValidAutoIncImm(const EVT VT, const int Offset) const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using &#39;From&#39; to use &#39;To&#39; instead.
const APInt & getAPIntValue() const
bool SelectAnyImm2(SDValue &N, SDValue &R)
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:56
void RemoveDeadNode(SDNode *N)
Remove the specified node from the system.
Extended Value Type.
Definition: ValueTypes.h:33
void SelectIndexedStore(StoreSDNode *ST, const SDLoc &dl)
const MachineBasicBlock & front() const
size_t size() const
Definition: SmallVector.h:52
This class contains a discriminated union of information about pointers in memory operands...
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
unsigned getNumOperands() const
Return the number of values used by this operation.
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr bool empty(const T &RangeOrContainer)
Test whether RangeOrContainer is empty. Similar to C++17 std::empty.
Definition: STLExtras.h:209
static bool isOpcodeHandled(const SDNode *N)
void dump() const
Dump this node, for debugging.
bool isHVXVectorType(MVT VecTy, bool IncludeBool=false) const
bool SelectAddrGA(SDValue &N, SDValue &R)
auto size(R &&Range, typename std::enable_if< std::is_same< typename std::iterator_traits< decltype(Range.begin())>::iterator_category, std::random_access_iterator_tag >::value, void >::type *=nullptr) -> decltype(std::distance(Range.begin(), Range.end()))
Get the size of a range.
Definition: STLExtras.h:1166
BlockVerifier::State From
ilist< SDNode >::size_type allnodes_size() const
Definition: SelectionDAG.h:444
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:221
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:845
bool SelectGlobalAddress(SDValue &N, SDValue &R, bool UseGP, uint32_t LogAlign)
unsigned estimateStackSize(const MachineFunction &MF) const
Estimate and return the size of the stack frame.
void dump() const
constexpr size_t array_lengthof(T(&)[N])
Find the length of an array.
Definition: STLExtras.h:1043
virtual MVT getScalarShiftAmountTy(const DataLayout &, EVT) const
EVT is not used in-tree, but is used by out-of-tree target.
An SDNode that represents everything that will be needed to construct a MachineInstr.
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:379
CHAIN = SC CHAIN, Imm128 - System call.
This is an abstract virtual class for memory operations.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
Represents one node in the SelectionDAG.
allnodes_const_iterator allnodes_begin() const
Definition: SelectionDAG.h:436
const Function & getFunction() const
Return the LLVM function that this machine code represents.
bool SelectAnyImmediate(SDValue &N, SDValue &R, uint32_t LogAlign)
unsigned logBase2() const
Definition: APInt.h:1747
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
size_t use_size() const
Return the number of uses of this node.
EVT getMemoryVT() const
Return the type of the in-memory value.
Class for arbitrary precision integers.
Definition: APInt.h:69
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:419
bool isPowerOf2() const
Check if this APInt&#39;s value is a power of two greater than zero.
Definition: APInt.h:463
iterator_range< user_iterator > users()
Definition: Value.h:399
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:467
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:470
static cl::opt< bool > RebalanceOnlyImbalancedTrees("rebalance-only-imbal", cl::Hidden, cl::init(false), cl::desc("Rebalance address tree only if it is imbalanced"))
bool SelectAnyImm3(SDValue &N, SDValue &R)
bool SelectAnyImm1(SDValue &N, SDValue &R)
allnodes_const_iterator allnodes_end() const
Definition: SelectionDAG.h:437
LLVM_ATTRIBUTE_ALWAYS_INLINE iterator end()
Definition: SmallVector.h:132
const SDValue & getValue() const
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:386
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:704
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:55
bool SelectAddrGP(SDValue &N, SDValue &R)
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:485
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:613
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1212
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:322
uint32_t Size
Definition: Profile.cpp:46
bool tryLoadOfLoadIntrinsic(LoadSDNode *N)
FunctionPass * createHexagonISelDag(HexagonTargetMachine &TM, CodeGenOpt::Level OptLevel)
unsigned getOpcode() const
unsigned CreateReg(MVT VT)
CreateReg - Allocate a single virtual register for the given type.
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:171
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
const TargetLowering * getTargetLowering() const
bool SelectAnyImm(SDValue &N, SDValue &R)
This class is used to form a handle around another node that is persistent and is updated across invo...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
void PreprocessISelDAG() override
PreprocessISelDAG - This hook allows targets to hack on the graph before instruction selection starts...
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:565
LLVM Value Representation.
Definition: Value.h:72
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:80
static const Function * getParent(const Value *V)
bool isNonTemporal() const
const APFloat & getValueAPF() const
void ReplaceNode(SDNode *F, SDNode *T)
Replace all uses of F with T, then remove F from the DAG.
virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const
Return true if folding a constant offset with the given GlobalAddress is legal.
APInt bitcastToAPInt() const
Definition: APFloat.h:1093
Conversion operators.
Definition: ISDOpcodes.h:464
const SDValue & getOperand(unsigned i) const
uint64_t getZExtValue() const
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:125
MachineSDNode * LoadInstrForLoadIntrinsic(SDNode *IntN)
void Select(SDNode *N) override
Main hook for targets to transform nodes into machine nodes.
#define LLVM_DEBUG(X)
Definition: Debug.h:122
void setNodeMemRefs(MachineSDNode *N, ArrayRef< MachineMemOperand *> NewMemRefs)
Mutate the specified machine node&#39;s memory references to the provided list.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
FunctionLoweringInfo * FuncInfo
#define T1
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:621
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:913
This class is used to represent ISD::LOAD nodes.