LLVM  13.0.0git
HexagonVectorCombine.cpp
Go to the documentation of this file.
1 //===-- HexagonVectorCombine.cpp ------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // HexagonVectorCombine is a utility class implementing a variety of functions
9 // that assist in vector-based optimizations.
10 //
11 // AlignVectors: replace unaligned vector loads and stores with aligned ones.
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/ADT/APInt.h"
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/ADT/Optional.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/ADT/SmallVector.h"
26 #include "llvm/IR/Dominators.h"
27 #include "llvm/IR/IRBuilder.h"
28 #include "llvm/IR/IntrinsicInst.h"
29 #include "llvm/IR/Intrinsics.h"
30 #include "llvm/IR/IntrinsicsHexagon.h"
31 #include "llvm/InitializePasses.h"
32 #include "llvm/Pass.h"
33 #include "llvm/Support/KnownBits.h"
37 
38 #include "HexagonSubtarget.h"
39 #include "HexagonTargetMachine.h"
40 
41 #include <algorithm>
42 #include <deque>
43 #include <map>
44 #include <set>
45 #include <utility>
46 #include <vector>
47 
48 #define DEBUG_TYPE "hexagon-vc"
49 
50 using namespace llvm;
51 
52 namespace {
53 class HexagonVectorCombine {
54 public:
55  HexagonVectorCombine(Function &F_, AliasAnalysis &AA_, AssumptionCache &AC_,
56  DominatorTree &DT_, TargetLibraryInfo &TLI_,
57  const TargetMachine &TM_)
58  : F(F_), DL(F.getParent()->getDataLayout()), AA(AA_), AC(AC_), DT(DT_),
59  TLI(TLI_),
60  HST(static_cast<const HexagonSubtarget &>(*TM_.getSubtargetImpl(F))) {}
61 
62  bool run();
63 
64  // Common integer type.
65  IntegerType *getIntTy() const;
66  // Byte type: either scalar (when Length = 0), or vector with given
67  // element count.
68  Type *getByteTy(int ElemCount = 0) const;
69  // Boolean type: either scalar (when Length = 0), or vector with given
70  // element count.
71  Type *getBoolTy(int ElemCount = 0) const;
72  // Create a ConstantInt of type returned by getIntTy with the value Val.
73  ConstantInt *getConstInt(int Val) const;
74  // Get the integer value of V, if it exists.
75  Optional<APInt> getIntValue(const Value *Val) const;
76  // Is V a constant 0, or a vector of 0s?
77  bool isZero(const Value *Val) const;
78  // Is V an undef value?
79  bool isUndef(const Value *Val) const;
80 
81  int getSizeOf(const Value *Val) const;
82  int getSizeOf(const Type *Ty) const;
83  int getTypeAlignment(Type *Ty) const;
84 
85  VectorType *getByteVectorTy(int ScLen) const;
86  Constant *getNullValue(Type *Ty) const;
87  Constant *getFullValue(Type *Ty) const;
88 
89  Value *insertb(IRBuilder<> &Builder, Value *Dest, Value *Src, int Start,
90  int Length, int Where) const;
91  Value *vlalignb(IRBuilder<> &Builder, Value *Lo, Value *Hi, Value *Amt) const;
92  Value *vralignb(IRBuilder<> &Builder, Value *Lo, Value *Hi, Value *Amt) const;
94  Value *vresize(IRBuilder<> &Builder, Value *Val, int NewSize,
95  Value *Pad) const;
96  Value *rescale(IRBuilder<> &Builder, Value *Mask, Type *FromTy,
97  Type *ToTy) const;
98  Value *vlsb(IRBuilder<> &Builder, Value *Val) const;
99  Value *vbytes(IRBuilder<> &Builder, Value *Val) const;
100 
101  Value *createHvxIntrinsic(IRBuilder<> &Builder, Intrinsic::ID IntID,
102  Type *RetTy, ArrayRef<Value *> Args) const;
103 
104  Optional<int> calculatePointerDifference(Value *Ptr0, Value *Ptr1) const;
105 
106  template <typename T = std::vector<Instruction *>>
107  bool isSafeToMoveBeforeInBB(const Instruction &In,
109  const T &Ignore = {}) const;
110 
111  Function &F;
112  const DataLayout &DL;
113  AliasAnalysis &AA;
114  AssumptionCache &AC;
115  DominatorTree &DT;
116  TargetLibraryInfo &TLI;
117  const HexagonSubtarget &HST;
118 
119 private:
120 #ifndef NDEBUG
121  // These two functions are only used for assertions at the moment.
122  bool isByteVecTy(Type *Ty) const;
123  bool isSectorTy(Type *Ty) const;
124 #endif
125  Value *getElementRange(IRBuilder<> &Builder, Value *Lo, Value *Hi, int Start,
126  int Length) const;
127 };
128 
129 class AlignVectors {
130 public:
131  AlignVectors(HexagonVectorCombine &HVC_) : HVC(HVC_) {}
132 
133  bool run();
134 
135 private:
136  using InstList = std::vector<Instruction *>;
137 
138  struct Segment {
139  void *Data;
140  int Start;
141  int Size;
142  };
143 
144  struct AddrInfo {
145  AddrInfo(const AddrInfo &) = default;
146  AddrInfo(const HexagonVectorCombine &HVC, Instruction *I, Value *A, Type *T,
147  Align H)
148  : Inst(I), Addr(A), ValTy(T), HaveAlign(H),
149  NeedAlign(HVC.getTypeAlignment(ValTy)) {}
150 
151  // XXX: add Size member?
152  Instruction *Inst;
153  Value *Addr;
154  Type *ValTy;
155  Align HaveAlign;
156  Align NeedAlign;
157  int Offset = 0; // Offset (in bytes) from the first member of the
158  // containing AddrList.
159  };
160  using AddrList = std::vector<AddrInfo>;
161 
162  struct InstrLess {
163  bool operator()(const Instruction *A, const Instruction *B) const {
164  return A->comesBefore(B);
165  }
166  };
167  using DepList = std::set<Instruction *, InstrLess>;
168 
169  struct MoveGroup {
170  MoveGroup(const AddrInfo &AI, Instruction *B, bool Hvx, bool Load)
171  : Base(B), Main{AI.Inst}, IsHvx(Hvx), IsLoad(Load) {}
172  Instruction *Base; // Base instruction of the parent address group.
173  InstList Main; // Main group of instructions.
174  InstList Deps; // List of dependencies.
175  bool IsHvx; // Is this group of HVX instructions?
176  bool IsLoad; // Is this a load group?
177  };
178  using MoveList = std::vector<MoveGroup>;
179 
180  struct ByteSpan {
181  struct Segment {
182  Segment(Value *Val, int Begin, int Len)
183  : Val(Val), Start(Begin), Size(Len) {}
184  Segment(const Segment &Seg) = default;
185  Value *Val;
186  int Start;
187  int Size;
188  };
189 
190  struct Block {
191  Block(Value *Val, int Len, int Pos) : Seg(Val, 0, Len), Pos(Pos) {}
192  Block(Value *Val, int Off, int Len, int Pos)
193  : Seg(Val, Off, Len), Pos(Pos) {}
194  Block(const Block &Blk) = default;
195  Segment Seg;
196  int Pos;
197  };
198 
199  int extent() const;
200  ByteSpan section(int Start, int Length) const;
201  ByteSpan &shift(int Offset);
202 
203  int size() const { return Blocks.size(); }
204  Block &operator[](int i) { return Blocks[i]; }
205 
206  std::vector<Block> Blocks;
207 
208  using iterator = decltype(Blocks)::iterator;
209  iterator begin() { return Blocks.begin(); }
210  iterator end() { return Blocks.end(); }
211  using const_iterator = decltype(Blocks)::const_iterator;
212  const_iterator begin() const { return Blocks.begin(); }
213  const_iterator end() const { return Blocks.end(); }
214  };
215 
216  Align getAlignFromValue(const Value *V) const;
218  Optional<AddrInfo> getAddrInfo(Instruction &In) const;
219  bool isHvx(const AddrInfo &AI) const;
220 
221  Value *getPayload(Value *Val) const;
222  Value *getMask(Value *Val) const;
223  Value *getPassThrough(Value *Val) const;
224 
225  Value *createAdjustedPointer(IRBuilder<> &Builder, Value *Ptr, Type *ValTy,
226  int Adjust) const;
227  Value *createAlignedPointer(IRBuilder<> &Builder, Value *Ptr, Type *ValTy,
228  int Alignment) const;
229  Value *createAlignedLoad(IRBuilder<> &Builder, Type *ValTy, Value *Ptr,
230  int Alignment, Value *Mask, Value *PassThru) const;
231  Value *createAlignedStore(IRBuilder<> &Builder, Value *Val, Value *Ptr,
232  int Alignment, Value *Mask) const;
233 
234  bool createAddressGroups();
235  MoveList createLoadGroups(const AddrList &Group) const;
236  MoveList createStoreGroups(const AddrList &Group) const;
237  bool move(const MoveGroup &Move) const;
238  bool realignGroup(const MoveGroup &Move) const;
239 
240  friend raw_ostream &operator<<(raw_ostream &OS, const AddrInfo &AI);
241  friend raw_ostream &operator<<(raw_ostream &OS, const MoveGroup &MG);
242  friend raw_ostream &operator<<(raw_ostream &OS, const ByteSpan &BS);
243 
244  std::map<Instruction *, AddrList> AddrGroups;
245  HexagonVectorCombine &HVC;
246 };
247 
249 raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::AddrInfo &AI) {
250  OS << "Inst: " << AI.Inst << " " << *AI.Inst << '\n';
251  OS << "Addr: " << *AI.Addr << '\n';
252  OS << "Type: " << *AI.ValTy << '\n';
253  OS << "HaveAlign: " << AI.HaveAlign.value() << '\n';
254  OS << "NeedAlign: " << AI.NeedAlign.value() << '\n';
255  OS << "Offset: " << AI.Offset;
256  return OS;
257 }
258 
260 raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::MoveGroup &MG) {
261  OS << "Main\n";
262  for (Instruction *I : MG.Main)
263  OS << " " << *I << '\n';
264  OS << "Deps\n";
265  for (Instruction *I : MG.Deps)
266  OS << " " << *I << '\n';
267  return OS;
268 }
269 
271 raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::ByteSpan &BS) {
272  OS << "ByteSpan[size=" << BS.size() << ", extent=" << BS.extent() << '\n';
273  for (const AlignVectors::ByteSpan::Block &B : BS) {
274  OS << " @" << B.Pos << " [" << B.Seg.Start << ',' << B.Seg.Size << "] "
275  << *B.Seg.Val << '\n';
276  }
277  OS << ']';
278  return OS;
279 }
280 
281 } // namespace
282 
283 namespace {
284 
285 template <typename T> T *getIfUnordered(T *MaybeT) {
286  return MaybeT && MaybeT->isUnordered() ? MaybeT : nullptr;
287 }
288 template <typename T> T *isCandidate(Instruction *In) {
289  return dyn_cast<T>(In);
290 }
291 template <> LoadInst *isCandidate<LoadInst>(Instruction *In) {
292  return getIfUnordered(dyn_cast<LoadInst>(In));
293 }
294 template <> StoreInst *isCandidate<StoreInst>(Instruction *In) {
295  return getIfUnordered(dyn_cast<StoreInst>(In));
296 }
297 
298 #if !defined(_MSC_VER) || _MSC_VER >= 1924
299 // VS2017 has trouble compiling this:
300 // error C2976: 'std::map': too few template arguments
301 template <typename Pred, typename... Ts>
302 void erase_if(std::map<Ts...> &map, Pred p)
303 #else
304 template <typename Pred, typename T, typename U>
305 void erase_if(std::map<T, U> &map, Pred p)
306 #endif
307 {
308  for (auto i = map.begin(), e = map.end(); i != e;) {
309  if (p(*i))
310  i = map.erase(i);
311  else
312  i = std::next(i);
313  }
314 }
315 
316 // Forward other erase_ifs to the LLVM implementations.
317 template <typename Pred, typename T> void erase_if(T &&container, Pred p) {
318  llvm::erase_if(std::forward<T>(container), p);
319 }
320 
321 } // namespace
322 
323 // --- Begin AlignVectors
324 
325 auto AlignVectors::ByteSpan::extent() const -> int {
326  if (size() == 0)
327  return 0;
328  int Min = Blocks[0].Pos;
329  int Max = Blocks[0].Pos + Blocks[0].Seg.Size;
330  for (int i = 1, e = size(); i != e; ++i) {
331  Min = std::min(Min, Blocks[i].Pos);
332  Max = std::max(Max, Blocks[i].Pos + Blocks[i].Seg.Size);
333  }
334  return Max - Min;
335 }
336 
337 auto AlignVectors::ByteSpan::section(int Start, int Length) const -> ByteSpan {
338  ByteSpan Section;
339  for (const ByteSpan::Block &B : Blocks) {
340  int L = std::max(B.Pos, Start); // Left end.
341  int R = std::min(B.Pos + B.Seg.Size, Start + Length); // Right end+1.
342  if (L < R) {
343  // How much to chop off the beginning of the segment:
344  int Off = L > B.Pos ? L - B.Pos : 0;
345  Section.Blocks.emplace_back(B.Seg.Val, B.Seg.Start + Off, R - L, L);
346  }
347  }
348  return Section;
349 }
350 
351 auto AlignVectors::ByteSpan::shift(int Offset) -> ByteSpan & {
352  for (Block &B : Blocks)
353  B.Pos += Offset;
354  return *this;
355 }
356 
357 auto AlignVectors::getAlignFromValue(const Value *V) const -> Align {
358  const auto *C = dyn_cast<ConstantInt>(V);
359  assert(C && "Alignment must be a compile-time constant integer");
360  return C->getAlignValue();
361 }
362 
363 auto AlignVectors::getAddrInfo(Instruction &In) const -> Optional<AddrInfo> {
364  if (auto *L = isCandidate<LoadInst>(&In))
365  return AddrInfo(HVC, L, L->getPointerOperand(), L->getType(),
366  L->getAlign());
367  if (auto *S = isCandidate<StoreInst>(&In))
368  return AddrInfo(HVC, S, S->getPointerOperand(),
369  S->getValueOperand()->getType(), S->getAlign());
370  if (auto *II = isCandidate<IntrinsicInst>(&In)) {
371  Intrinsic::ID ID = II->getIntrinsicID();
372  switch (ID) {
373  case Intrinsic::masked_load:
374  return AddrInfo(HVC, II, II->getArgOperand(0), II->getType(),
375  getAlignFromValue(II->getArgOperand(1)));
376  case Intrinsic::masked_store:
377  return AddrInfo(HVC, II, II->getArgOperand(1),
378  II->getArgOperand(0)->getType(),
379  getAlignFromValue(II->getArgOperand(2)));
380  }
381  }
382  return Optional<AddrInfo>();
383 }
384 
385 auto AlignVectors::isHvx(const AddrInfo &AI) const -> bool {
386  return HVC.HST.isTypeForHVX(AI.ValTy);
387 }
388 
389 auto AlignVectors::getPayload(Value *Val) const -> Value * {
390  if (auto *In = dyn_cast<Instruction>(Val)) {
391  Intrinsic::ID ID = 0;
392  if (auto *II = dyn_cast<IntrinsicInst>(In))
393  ID = II->getIntrinsicID();
394  if (isa<StoreInst>(In) || ID == Intrinsic::masked_store)
395  return In->getOperand(0);
396  }
397  return Val;
398 }
399 
400 auto AlignVectors::getMask(Value *Val) const -> Value * {
401  if (auto *II = dyn_cast<IntrinsicInst>(Val)) {
402  switch (II->getIntrinsicID()) {
403  case Intrinsic::masked_load:
404  return II->getArgOperand(2);
405  case Intrinsic::masked_store:
406  return II->getArgOperand(3);
407  }
408  }
409 
410  Type *ValTy = getPayload(Val)->getType();
411  if (auto *VecTy = dyn_cast<VectorType>(ValTy)) {
412  int ElemCount = VecTy->getElementCount().getFixedValue();
413  return HVC.getFullValue(HVC.getBoolTy(ElemCount));
414  }
415  return HVC.getFullValue(HVC.getBoolTy());
416 }
417 
418 auto AlignVectors::getPassThrough(Value *Val) const -> Value * {
419  if (auto *II = dyn_cast<IntrinsicInst>(Val)) {
420  if (II->getIntrinsicID() == Intrinsic::masked_load)
421  return II->getArgOperand(3);
422  }
423  return UndefValue::get(getPayload(Val)->getType());
424 }
425 
426 auto AlignVectors::createAdjustedPointer(IRBuilder<> &Builder, Value *Ptr,
427  Type *ValTy, int Adjust) const
428  -> Value * {
429  // The adjustment is in bytes, but if it's a multiple of the type size,
430  // we don't need to do pointer casts.
431  Type *ElemTy = cast<PointerType>(Ptr->getType())->getElementType();
432  int ElemSize = HVC.getSizeOf(ElemTy);
433  if (Adjust % ElemSize == 0) {
434  Value *Tmp0 = Builder.CreateGEP(Ptr, HVC.getConstInt(Adjust / ElemSize));
435  return Builder.CreatePointerCast(Tmp0, ValTy->getPointerTo());
436  }
437 
438  PointerType *CharPtrTy = Type::getInt8PtrTy(HVC.F.getContext());
439  Value *Tmp0 = Builder.CreatePointerCast(Ptr, CharPtrTy);
440  Value *Tmp1 = Builder.CreateGEP(Tmp0, HVC.getConstInt(Adjust));
441  return Builder.CreatePointerCast(Tmp1, ValTy->getPointerTo());
442 }
443 
444 auto AlignVectors::createAlignedPointer(IRBuilder<> &Builder, Value *Ptr,
445  Type *ValTy, int Alignment) const
446  -> Value * {
447  Value *AsInt = Builder.CreatePtrToInt(Ptr, HVC.getIntTy());
448  Value *Mask = HVC.getConstInt(-Alignment);
449  Value *And = Builder.CreateAnd(AsInt, Mask);
450  return Builder.CreateIntToPtr(And, ValTy->getPointerTo());
451 }
452 
453 auto AlignVectors::createAlignedLoad(IRBuilder<> &Builder, Type *ValTy,
454  Value *Ptr, int Alignment, Value *Mask,
455  Value *PassThru) const -> Value * {
456  assert(!HVC.isUndef(Mask)); // Should this be allowed?
457  if (HVC.isZero(Mask))
458  return PassThru;
459  if (Mask == ConstantInt::getTrue(Mask->getType()))
460  return Builder.CreateAlignedLoad(ValTy, Ptr, Align(Alignment));
461  return Builder.CreateMaskedLoad(Ptr, Align(Alignment), Mask, PassThru);
462 }
463 
464 auto AlignVectors::createAlignedStore(IRBuilder<> &Builder, Value *Val,
465  Value *Ptr, int Alignment,
466  Value *Mask) const -> Value * {
467  if (HVC.isZero(Mask) || HVC.isUndef(Val) || HVC.isUndef(Mask))
468  return UndefValue::get(Val->getType());
469  if (Mask == ConstantInt::getTrue(Mask->getType()))
470  return Builder.CreateAlignedStore(Val, Ptr, Align(Alignment));
471  return Builder.CreateMaskedStore(Val, Ptr, Align(Alignment), Mask);
472 }
473 
474 auto AlignVectors::createAddressGroups() -> bool {
475  // An address group created here may contain instructions spanning
476  // multiple basic blocks.
477  AddrList WorkStack;
478 
479  auto findBaseAndOffset = [&](AddrInfo &AI) -> std::pair<Instruction *, int> {
480  for (AddrInfo &W : WorkStack) {
481  if (auto D = HVC.calculatePointerDifference(AI.Addr, W.Addr))
482  return std::make_pair(W.Inst, *D);
483  }
484  return std::make_pair(nullptr, 0);
485  };
486 
487  auto traverseBlock = [&](DomTreeNode *DomN, auto Visit) -> void {
488  BasicBlock &Block = *DomN->getBlock();
489  for (Instruction &I : Block) {
490  auto AI = this->getAddrInfo(I); // Use this-> for gcc6.
491  if (!AI)
492  continue;
493  auto F = findBaseAndOffset(*AI);
494  Instruction *GroupInst;
495  if (Instruction *BI = F.first) {
496  AI->Offset = F.second;
497  GroupInst = BI;
498  } else {
499  WorkStack.push_back(*AI);
500  GroupInst = AI->Inst;
501  }
502  AddrGroups[GroupInst].push_back(*AI);
503  }
504 
505  for (DomTreeNode *C : DomN->children())
506  Visit(C, Visit);
507 
508  while (!WorkStack.empty() && WorkStack.back().Inst->getParent() == &Block)
509  WorkStack.pop_back();
510  };
511 
512  traverseBlock(HVC.DT.getRootNode(), traverseBlock);
513  assert(WorkStack.empty());
514 
515  // AddrGroups are formed.
516 
517  // Remove groups of size 1.
518  erase_if(AddrGroups, [](auto &G) { return G.second.size() == 1; });
519  // Remove groups that don't use HVX types.
520  erase_if(AddrGroups, [&](auto &G) {
521  return !llvm::any_of(
522  G.second, [&](auto &I) { return HVC.HST.isTypeForHVX(I.ValTy); });
523  });
524 
525  return !AddrGroups.empty();
526 }
527 
528 auto AlignVectors::createLoadGroups(const AddrList &Group) const -> MoveList {
529  // Form load groups.
530  // To avoid complications with moving code across basic blocks, only form
531  // groups that are contained within a single basic block.
532 
533  auto getUpwardDeps = [](Instruction *In, Instruction *Base) {
534  BasicBlock *Parent = Base->getParent();
535  assert(In->getParent() == Parent &&
536  "Base and In should be in the same block");
537  assert(Base->comesBefore(In) && "Base should come before In");
538 
539  DepList Deps;
540  std::deque<Instruction *> WorkQ = {In};
541  while (!WorkQ.empty()) {
542  Instruction *D = WorkQ.front();
543  WorkQ.pop_front();
544  Deps.insert(D);
545  for (Value *Op : D->operands()) {
546  if (auto *I = dyn_cast<Instruction>(Op)) {
547  if (I->getParent() == Parent && Base->comesBefore(I))
548  WorkQ.push_back(I);
549  }
550  }
551  }
552  return Deps;
553  };
554 
555  auto tryAddTo = [&](const AddrInfo &Info, MoveGroup &Move) {
556  assert(!Move.Main.empty() && "Move group should have non-empty Main");
557  // Don't mix HVX and non-HVX instructions.
558  if (Move.IsHvx != isHvx(Info))
559  return false;
560  // Leading instruction in the load group.
561  Instruction *Base = Move.Main.front();
562  if (Base->getParent() != Info.Inst->getParent())
563  return false;
564 
565  auto isSafeToMoveToBase = [&](const Instruction *I) {
566  return HVC.isSafeToMoveBeforeInBB(*I, Base->getIterator());
567  };
568  DepList Deps = getUpwardDeps(Info.Inst, Base);
569  if (!llvm::all_of(Deps, isSafeToMoveToBase))
570  return false;
571 
572  // The dependencies will be moved together with the load, so make sure
573  // that none of them could be moved independently in another group.
574  Deps.erase(Info.Inst);
575  auto inAddrMap = [&](Instruction *I) { return AddrGroups.count(I) > 0; };
576  if (llvm::any_of(Deps, inAddrMap))
577  return false;
578  Move.Main.push_back(Info.Inst);
579  llvm::append_range(Move.Deps, Deps);
580  return true;
581  };
582 
583  MoveList LoadGroups;
584 
585  for (const AddrInfo &Info : Group) {
586  if (!Info.Inst->mayReadFromMemory())
587  continue;
588  if (LoadGroups.empty() || !tryAddTo(Info, LoadGroups.back()))
589  LoadGroups.emplace_back(Info, Group.front().Inst, isHvx(Info), true);
590  }
591 
592  // Erase singleton groups.
593  erase_if(LoadGroups, [](const MoveGroup &G) { return G.Main.size() <= 1; });
594  return LoadGroups;
595 }
596 
597 auto AlignVectors::createStoreGroups(const AddrList &Group) const -> MoveList {
598  // Form store groups.
599  // To avoid complications with moving code across basic blocks, only form
600  // groups that are contained within a single basic block.
601 
602  auto tryAddTo = [&](const AddrInfo &Info, MoveGroup &Move) {
603  assert(!Move.Main.empty() && "Move group should have non-empty Main");
604  // For stores with return values we'd have to collect downward depenencies.
605  // There are no such stores that we handle at the moment, so omit that.
606  assert(Info.Inst->getType()->isVoidTy() &&
607  "Not handling stores with return values");
608  // Don't mix HVX and non-HVX instructions.
609  if (Move.IsHvx != isHvx(Info))
610  return false;
611  // For stores we need to be careful whether it's safe to move them.
612  // Stores that are otherwise safe to move together may not appear safe
613  // to move over one another (i.e. isSafeToMoveBefore may return false).
614  Instruction *Base = Move.Main.front();
615  if (Base->getParent() != Info.Inst->getParent())
616  return false;
617  if (!HVC.isSafeToMoveBeforeInBB(*Info.Inst, Base->getIterator(), Move.Main))
618  return false;
619  Move.Main.push_back(Info.Inst);
620  return true;
621  };
622 
623  MoveList StoreGroups;
624 
625  for (auto I = Group.rbegin(), E = Group.rend(); I != E; ++I) {
626  const AddrInfo &Info = *I;
627  if (!Info.Inst->mayWriteToMemory())
628  continue;
629  if (StoreGroups.empty() || !tryAddTo(Info, StoreGroups.back()))
630  StoreGroups.emplace_back(Info, Group.front().Inst, isHvx(Info), false);
631  }
632 
633  // Erase singleton groups.
634  erase_if(StoreGroups, [](const MoveGroup &G) { return G.Main.size() <= 1; });
635  return StoreGroups;
636 }
637 
638 auto AlignVectors::move(const MoveGroup &Move) const -> bool {
639  assert(!Move.Main.empty() && "Move group should have non-empty Main");
640  Instruction *Where = Move.Main.front();
641 
642  if (Move.IsLoad) {
643  // Move all deps to before Where, keeping order.
644  for (Instruction *D : Move.Deps)
645  D->moveBefore(Where);
646  // Move all main instructions to after Where, keeping order.
647  ArrayRef<Instruction *> Main(Move.Main);
648  for (Instruction *M : Main.drop_front(1)) {
649  M->moveAfter(Where);
650  Where = M;
651  }
652  } else {
653  // NOTE: Deps are empty for "store" groups. If they need to be
654  // non-empty, decide on the order.
655  assert(Move.Deps.empty());
656  // Move all main instructions to before Where, inverting order.
657  ArrayRef<Instruction *> Main(Move.Main);
658  for (Instruction *M : Main.drop_front(1)) {
659  M->moveBefore(Where);
660  Where = M;
661  }
662  }
663 
664  return Move.Main.size() + Move.Deps.size() > 1;
665 }
666 
667 auto AlignVectors::realignGroup(const MoveGroup &Move) const -> bool {
668  // TODO: Needs support for masked loads/stores of "scalar" vectors.
669  if (!Move.IsHvx)
670  return false;
671 
672  // Return the element with the maximum alignment from Range,
673  // where GetValue obtains the value to compare from an element.
674  auto getMaxOf = [](auto Range, auto GetValue) {
675  return *std::max_element(
676  Range.begin(), Range.end(),
677  [&GetValue](auto &A, auto &B) { return GetValue(A) < GetValue(B); });
678  };
679 
680  const AddrList &BaseInfos = AddrGroups.at(Move.Base);
681 
682  // Conceptually, there is a vector of N bytes covering the addresses
683  // starting from the minimum offset (i.e. Base.Addr+Start). This vector
684  // represents a contiguous memory region that spans all accessed memory
685  // locations.
686  // The correspondence between loaded or stored values will be expressed
687  // in terms of this vector. For example, the 0th element of the vector
688  // from the Base address info will start at byte Start from the beginning
689  // of this conceptual vector.
690  //
691  // This vector will be loaded/stored starting at the nearest down-aligned
692  // address and the amount od the down-alignment will be AlignVal:
693  // valign(load_vector(align_down(Base+Start)), AlignVal)
694 
695  std::set<Instruction *> TestSet(Move.Main.begin(), Move.Main.end());
696  AddrList MoveInfos;
698  BaseInfos, std::back_inserter(MoveInfos),
699  [&TestSet](const AddrInfo &AI) { return TestSet.count(AI.Inst); });
700 
701  // Maximum alignment present in the whole address group.
702  const AddrInfo &WithMaxAlign =
703  getMaxOf(BaseInfos, [](const AddrInfo &AI) { return AI.HaveAlign; });
704  Align MaxGiven = WithMaxAlign.HaveAlign;
705 
706  // Minimum alignment present in the move address group.
707  const AddrInfo &WithMinOffset =
708  getMaxOf(MoveInfos, [](const AddrInfo &AI) { return -AI.Offset; });
709 
710  const AddrInfo &WithMaxNeeded =
711  getMaxOf(MoveInfos, [](const AddrInfo &AI) { return AI.NeedAlign; });
712  Align MinNeeded = WithMaxNeeded.NeedAlign;
713 
714  // Set the builder at the top instruction in the move group.
715  Instruction *TopIn = Move.IsLoad ? Move.Main.front() : Move.Main.back();
716  IRBuilder<> Builder(TopIn);
717  Value *AlignAddr = nullptr; // Actual aligned address.
718  Value *AlignVal = nullptr; // Right-shift amount (for valign).
719 
720  if (MinNeeded <= MaxGiven) {
721  int Start = WithMinOffset.Offset;
722  int OffAtMax = WithMaxAlign.Offset;
723  // Shift the offset of the maximally aligned instruction (OffAtMax)
724  // back by just enough multiples of the required alignment to cover the
725  // distance from Start to OffAtMax.
726  // Calculate the address adjustment amount based on the address with the
727  // maximum alignment. This is to allow a simple gep instruction instead
728  // of potential bitcasts to i8*.
729  int Adjust = -alignTo(OffAtMax - Start, MinNeeded.value());
730  AlignAddr = createAdjustedPointer(Builder, WithMaxAlign.Addr,
731  WithMaxAlign.ValTy, Adjust);
732  int Diff = Start - (OffAtMax + Adjust);
733  AlignVal = HVC.getConstInt(Diff);
734  // Sanity.
735  assert(Diff >= 0);
736  assert(static_cast<decltype(MinNeeded.value())>(Diff) < MinNeeded.value());
737  } else {
738  // WithMinOffset is the lowest address in the group,
739  // WithMinOffset.Addr = Base+Start.
740  // Align instructions for both HVX (V6_valign) and scalar (S2_valignrb)
741  // mask off unnecessary bits, so it's ok to just the original pointer as
742  // the alignment amount.
743  // Do an explicit down-alignment of the address to avoid creating an
744  // aligned instruction with an address that is not really aligned.
745  AlignAddr = createAlignedPointer(Builder, WithMinOffset.Addr,
746  WithMinOffset.ValTy, MinNeeded.value());
747  AlignVal = Builder.CreatePtrToInt(WithMinOffset.Addr, HVC.getIntTy());
748  }
749 
750  ByteSpan VSpan;
751  for (const AddrInfo &AI : MoveInfos) {
752  VSpan.Blocks.emplace_back(AI.Inst, HVC.getSizeOf(AI.ValTy),
753  AI.Offset - WithMinOffset.Offset);
754  }
755 
756  // The aligned loads/stores will use blocks that are either scalars,
757  // or HVX vectors. Let "sector" be the unified term for such a block.
758  // blend(scalar, vector) -> sector...
759  int ScLen = Move.IsHvx ? HVC.HST.getVectorLength()
760  : std::max<int>(MinNeeded.value(), 4);
761  assert(!Move.IsHvx || ScLen == 64 || ScLen == 128);
762  assert(Move.IsHvx || ScLen == 4 || ScLen == 8);
763 
764  Type *SecTy = HVC.getByteTy(ScLen);
765  int NumSectors = (VSpan.extent() + ScLen - 1) / ScLen;
766 
767  if (Move.IsLoad) {
768  ByteSpan ASpan;
769  auto *True = HVC.getFullValue(HVC.getBoolTy(ScLen));
770  auto *Undef = UndefValue::get(SecTy);
771 
772  for (int i = 0; i != NumSectors + 1; ++i) {
773  Value *Ptr = createAdjustedPointer(Builder, AlignAddr, SecTy, i * ScLen);
774  // FIXME: generate a predicated load?
775  Value *Load = createAlignedLoad(Builder, SecTy, Ptr, ScLen, True, Undef);
776  ASpan.Blocks.emplace_back(Load, ScLen, i * ScLen);
777  }
778 
779  for (int j = 0; j != NumSectors; ++j) {
780  ASpan[j].Seg.Val = HVC.vralignb(Builder, ASpan[j].Seg.Val,
781  ASpan[j + 1].Seg.Val, AlignVal);
782  }
783 
784  for (ByteSpan::Block &B : VSpan) {
785  ByteSpan Section = ASpan.section(B.Pos, B.Seg.Size).shift(-B.Pos);
786  Value *Accum = UndefValue::get(HVC.getByteTy(B.Seg.Size));
787  for (ByteSpan::Block &S : Section) {
788  Value *Pay = HVC.vbytes(Builder, getPayload(S.Seg.Val));
789  Accum =
790  HVC.insertb(Builder, Accum, Pay, S.Seg.Start, S.Seg.Size, S.Pos);
791  }
792  // Instead of casting everything to bytes for the vselect, cast to the
793  // original value type. This will avoid complications with casting masks.
794  // For example, in cases when the original mask applied to i32, it could
795  // be converted to a mask applicable to i8 via pred_typecast intrinsic,
796  // but if the mask is not exactly of HVX length, extra handling would be
797  // needed to make it work.
798  Type *ValTy = getPayload(B.Seg.Val)->getType();
799  Value *Cast = Builder.CreateBitCast(Accum, ValTy);
800  Value *Sel = Builder.CreateSelect(getMask(B.Seg.Val), Cast,
801  getPassThrough(B.Seg.Val));
802  B.Seg.Val->replaceAllUsesWith(Sel);
803  }
804  } else {
805  // Stores.
806  ByteSpan ASpanV, ASpanM;
807 
808  // Return a vector value corresponding to the input value Val:
809  // either <1 x Val> for scalar Val, or Val itself for vector Val.
810  auto MakeVec = [](IRBuilder<> &Builder, Value *Val) -> Value * {
811  Type *Ty = Val->getType();
812  if (Ty->isVectorTy())
813  return Val;
814  auto *VecTy = VectorType::get(Ty, 1, /*Scalable*/ false);
815  return Builder.CreateBitCast(Val, VecTy);
816  };
817 
818  // Create an extra "undef" sector at the beginning and at the end.
819  // They will be used as the left/right filler in the vlalign step.
820  for (int i = -1; i != NumSectors + 1; ++i) {
821  // For stores, the size of each section is an aligned vector length.
822  // Adjust the store offsets relative to the section start offset.
823  ByteSpan Section = VSpan.section(i * ScLen, ScLen).shift(-i * ScLen);
824  Value *AccumV = UndefValue::get(SecTy);
825  Value *AccumM = HVC.getNullValue(SecTy);
826  for (ByteSpan::Block &S : Section) {
827  Value *Pay = getPayload(S.Seg.Val);
828  Value *Mask = HVC.rescale(Builder, MakeVec(Builder, getMask(S.Seg.Val)),
829  Pay->getType(), HVC.getByteTy());
830  AccumM = HVC.insertb(Builder, AccumM, HVC.vbytes(Builder, Mask),
831  S.Seg.Start, S.Seg.Size, S.Pos);
832  AccumV = HVC.insertb(Builder, AccumV, HVC.vbytes(Builder, Pay),
833  S.Seg.Start, S.Seg.Size, S.Pos);
834  }
835  ASpanV.Blocks.emplace_back(AccumV, ScLen, i * ScLen);
836  ASpanM.Blocks.emplace_back(AccumM, ScLen, i * ScLen);
837  }
838 
839  // vlalign
840  for (int j = 1; j != NumSectors + 2; ++j) {
841  ASpanV[j - 1].Seg.Val = HVC.vlalignb(Builder, ASpanV[j - 1].Seg.Val,
842  ASpanV[j].Seg.Val, AlignVal);
843  ASpanM[j - 1].Seg.Val = HVC.vlalignb(Builder, ASpanM[j - 1].Seg.Val,
844  ASpanM[j].Seg.Val, AlignVal);
845  }
846 
847  for (int i = 0; i != NumSectors + 1; ++i) {
848  Value *Ptr = createAdjustedPointer(Builder, AlignAddr, SecTy, i * ScLen);
849  Value *Val = ASpanV[i].Seg.Val;
850  Value *Mask = ASpanM[i].Seg.Val; // bytes
851  if (!HVC.isUndef(Val) && !HVC.isZero(Mask))
852  createAlignedStore(Builder, Val, Ptr, ScLen, HVC.vlsb(Builder, Mask));
853  }
854  }
855 
856  for (auto *Inst : Move.Main)
857  Inst->eraseFromParent();
858 
859  return true;
860 }
861 
862 auto AlignVectors::run() -> bool {
863  if (!createAddressGroups())
864  return false;
865 
866  bool Changed = false;
867  MoveList LoadGroups, StoreGroups;
868 
869  for (auto &G : AddrGroups) {
870  llvm::append_range(LoadGroups, createLoadGroups(G.second));
871  llvm::append_range(StoreGroups, createStoreGroups(G.second));
872  }
873 
874  for (auto &M : LoadGroups)
875  Changed |= move(M);
876  for (auto &M : StoreGroups)
877  Changed |= move(M);
878 
879  for (auto &M : LoadGroups)
880  Changed |= realignGroup(M);
881  for (auto &M : StoreGroups)
882  Changed |= realignGroup(M);
883 
884  return Changed;
885 }
886 
887 // --- End AlignVectors
888 
889 auto HexagonVectorCombine::run() -> bool {
890  if (!HST.useHVXOps())
891  return false;
892 
893  bool Changed = AlignVectors(*this).run();
894  return Changed;
895 }
896 
897 auto HexagonVectorCombine::getIntTy() const -> IntegerType * {
898  return Type::getInt32Ty(F.getContext());
899 }
900 
901 auto HexagonVectorCombine::getByteTy(int ElemCount) const -> Type * {
902  assert(ElemCount >= 0);
903  IntegerType *ByteTy = Type::getInt8Ty(F.getContext());
904  if (ElemCount == 0)
905  return ByteTy;
906  return VectorType::get(ByteTy, ElemCount, /*Scalable*/ false);
907 }
908 
909 auto HexagonVectorCombine::getBoolTy(int ElemCount) const -> Type * {
910  assert(ElemCount >= 0);
911  IntegerType *BoolTy = Type::getInt1Ty(F.getContext());
912  if (ElemCount == 0)
913  return BoolTy;
914  return VectorType::get(BoolTy, ElemCount, /*Scalable*/ false);
915 }
916 
917 auto HexagonVectorCombine::getConstInt(int Val) const -> ConstantInt * {
918  return ConstantInt::getSigned(getIntTy(), Val);
919 }
920 
921 auto HexagonVectorCombine::isZero(const Value *Val) const -> bool {
922  if (auto *C = dyn_cast<Constant>(Val))
923  return C->isZeroValue();
924  return false;
925 }
926 
927 auto HexagonVectorCombine::getIntValue(const Value *Val) const
928  -> Optional<APInt> {
929  if (auto *CI = dyn_cast<ConstantInt>(Val))
930  return CI->getValue();
931  return None;
932 }
933 
934 auto HexagonVectorCombine::isUndef(const Value *Val) const -> bool {
935  return isa<UndefValue>(Val);
936 }
937 
938 auto HexagonVectorCombine::getSizeOf(const Value *Val) const -> int {
939  return getSizeOf(Val->getType());
940 }
941 
942 auto HexagonVectorCombine::getSizeOf(const Type *Ty) const -> int {
943  return DL.getTypeStoreSize(const_cast<Type *>(Ty)).getFixedValue();
944 }
945 
946 auto HexagonVectorCombine::getTypeAlignment(Type *Ty) const -> int {
947  // The actual type may be shorter than the HVX vector, so determine
948  // the alignment based on subtarget info.
949  if (HST.isTypeForHVX(Ty))
950  return HST.getVectorLength();
951  return DL.getABITypeAlign(Ty).value();
952 }
953 
954 auto HexagonVectorCombine::getNullValue(Type *Ty) const -> Constant * {
955  assert(Ty->isIntOrIntVectorTy());
956  auto Zero = ConstantInt::get(Ty->getScalarType(), 0);
957  if (auto *VecTy = dyn_cast<VectorType>(Ty))
958  return ConstantVector::getSplat(VecTy->getElementCount(), Zero);
959  return Zero;
960 }
961 
962 auto HexagonVectorCombine::getFullValue(Type *Ty) const -> Constant * {
963  assert(Ty->isIntOrIntVectorTy());
964  auto Minus1 = ConstantInt::get(Ty->getScalarType(), -1);
965  if (auto *VecTy = dyn_cast<VectorType>(Ty))
966  return ConstantVector::getSplat(VecTy->getElementCount(), Minus1);
967  return Minus1;
968 }
969 
970 // Insert bytes [Start..Start+Length) of Src into Dst at byte Where.
971 auto HexagonVectorCombine::insertb(IRBuilder<> &Builder, Value *Dst, Value *Src,
972  int Start, int Length, int Where) const
973  -> Value * {
974  assert(isByteVecTy(Dst->getType()) && isByteVecTy(Src->getType()));
975  int SrcLen = getSizeOf(Src);
976  int DstLen = getSizeOf(Dst);
977  assert(0 <= Start && Start + Length <= SrcLen);
978  assert(0 <= Where && Where + Length <= DstLen);
979 
980  int P2Len = PowerOf2Ceil(SrcLen | DstLen);
981  auto *Undef = UndefValue::get(getByteTy());
982  Value *P2Src = vresize(Builder, Src, P2Len, Undef);
983  Value *P2Dst = vresize(Builder, Dst, P2Len, Undef);
984 
985  SmallVector<int, 256> SMask(P2Len);
986  for (int i = 0; i != P2Len; ++i) {
987  // If i is in [Where, Where+Length), pick Src[Start+(i-Where)].
988  // Otherwise, pick Dst[i];
989  SMask[i] =
990  (Where <= i && i < Where + Length) ? P2Len + Start + (i - Where) : i;
991  }
992 
993  Value *P2Insert = Builder.CreateShuffleVector(P2Dst, P2Src, SMask);
994  return vresize(Builder, P2Insert, DstLen, Undef);
995 }
996 
997 auto HexagonVectorCombine::vlalignb(IRBuilder<> &Builder, Value *Lo, Value *Hi,
998  Value *Amt) const -> Value * {
999  assert(Lo->getType() == Hi->getType() && "Argument type mismatch");
1000  assert(isSectorTy(Hi->getType()));
1001  if (isZero(Amt))
1002  return Hi;
1003  int VecLen = getSizeOf(Hi);
1004  if (auto IntAmt = getIntValue(Amt))
1005  return getElementRange(Builder, Lo, Hi, VecLen - IntAmt->getSExtValue(),
1006  VecLen);
1007 
1008  if (HST.isTypeForHVX(Hi->getType())) {
1009  int HwLen = HST.getVectorLength();
1010  assert(VecLen == HwLen && "Expecting an exact HVX type");
1011  Intrinsic::ID V6_vlalignb = HwLen == 64
1012  ? Intrinsic::hexagon_V6_vlalignb
1013  : Intrinsic::hexagon_V6_vlalignb_128B;
1014  return createHvxIntrinsic(Builder, V6_vlalignb, Hi->getType(),
1015  {Hi, Lo, Amt});
1016  }
1017 
1018  if (VecLen == 4) {
1019  Value *Pair = concat(Builder, {Lo, Hi});
1020  Value *Shift = Builder.CreateLShr(Builder.CreateShl(Pair, Amt), 32);
1021  Value *Trunc = Builder.CreateTrunc(Shift, Type::getInt32Ty(F.getContext()));
1022  return Builder.CreateBitCast(Trunc, Hi->getType());
1023  }
1024  if (VecLen == 8) {
1025  Value *Sub = Builder.CreateSub(getConstInt(VecLen), Amt);
1026  return vralignb(Builder, Lo, Hi, Sub);
1027  }
1028  llvm_unreachable("Unexpected vector length");
1029 }
1030 
1031 auto HexagonVectorCombine::vralignb(IRBuilder<> &Builder, Value *Lo, Value *Hi,
1032  Value *Amt) const -> Value * {
1033  assert(Lo->getType() == Hi->getType() && "Argument type mismatch");
1034  assert(isSectorTy(Lo->getType()));
1035  if (isZero(Amt))
1036  return Lo;
1037  int VecLen = getSizeOf(Lo);
1038  if (auto IntAmt = getIntValue(Amt))
1039  return getElementRange(Builder, Lo, Hi, IntAmt->getSExtValue(), VecLen);
1040 
1041  if (HST.isTypeForHVX(Lo->getType())) {
1042  int HwLen = HST.getVectorLength();
1043  assert(VecLen == HwLen && "Expecting an exact HVX type");
1044  Intrinsic::ID V6_valignb = HwLen == 64 ? Intrinsic::hexagon_V6_valignb
1045  : Intrinsic::hexagon_V6_valignb_128B;
1046  return createHvxIntrinsic(Builder, V6_valignb, Lo->getType(),
1047  {Hi, Lo, Amt});
1048  }
1049 
1050  if (VecLen == 4) {
1051  Value *Pair = concat(Builder, {Lo, Hi});
1052  Value *Shift = Builder.CreateLShr(Pair, Amt);
1053  Value *Trunc = Builder.CreateTrunc(Shift, Type::getInt32Ty(F.getContext()));
1054  return Builder.CreateBitCast(Trunc, Lo->getType());
1055  }
1056  if (VecLen == 8) {
1057  Type *Int64Ty = Type::getInt64Ty(F.getContext());
1058  Value *Lo64 = Builder.CreateBitCast(Lo, Int64Ty);
1059  Value *Hi64 = Builder.CreateBitCast(Hi, Int64Ty);
1060  Function *FI = Intrinsic::getDeclaration(F.getParent(),
1061  Intrinsic::hexagon_S2_valignrb);
1062  Value *Call = Builder.CreateCall(FI, {Hi64, Lo64, Amt});
1063  return Builder.CreateBitCast(Call, Lo->getType());
1064  }
1065  llvm_unreachable("Unexpected vector length");
1066 }
1067 
1068 // Concatenates a sequence of vectors of the same type.
1070  ArrayRef<Value *> Vecs) const -> Value * {
1071  assert(!Vecs.empty());
1072  SmallVector<int, 256> SMask;
1073  std::vector<Value *> Work[2];
1074  int ThisW = 0, OtherW = 1;
1075 
1076  Work[ThisW].assign(Vecs.begin(), Vecs.end());
1077  while (Work[ThisW].size() > 1) {
1078  auto *Ty = cast<VectorType>(Work[ThisW].front()->getType());
1079  int ElemCount = Ty->getElementCount().getFixedValue();
1080  SMask.resize(ElemCount * 2);
1081  std::iota(SMask.begin(), SMask.end(), 0);
1082 
1083  Work[OtherW].clear();
1084  if (Work[ThisW].size() % 2 != 0)
1085  Work[ThisW].push_back(UndefValue::get(Ty));
1086  for (int i = 0, e = Work[ThisW].size(); i < e; i += 2) {
1087  Value *Joined = Builder.CreateShuffleVector(Work[ThisW][i],
1088  Work[ThisW][i + 1], SMask);
1089  Work[OtherW].push_back(Joined);
1090  }
1091  std::swap(ThisW, OtherW);
1092  }
1093 
1094  // Since there may have been some undefs appended to make shuffle operands
1095  // have the same type, perform the last shuffle to only pick the original
1096  // elements.
1097  SMask.resize(Vecs.size() * getSizeOf(Vecs.front()->getType()));
1098  std::iota(SMask.begin(), SMask.end(), 0);
1099  Value *Total = Work[OtherW].front();
1100  return Builder.CreateShuffleVector(Total, SMask);
1101 }
1102 
1103 auto HexagonVectorCombine::vresize(IRBuilder<> &Builder, Value *Val,
1104  int NewSize, Value *Pad) const -> Value * {
1105  assert(isa<VectorType>(Val->getType()));
1106  auto *ValTy = cast<VectorType>(Val->getType());
1107  assert(ValTy->getElementType() == Pad->getType());
1108 
1109  int CurSize = ValTy->getElementCount().getFixedValue();
1110  if (CurSize == NewSize)
1111  return Val;
1112  // Truncate?
1113  if (CurSize > NewSize)
1114  return getElementRange(Builder, Val, /*Unused*/ Val, 0, NewSize);
1115  // Extend.
1116  SmallVector<int, 128> SMask(NewSize);
1117  std::iota(SMask.begin(), SMask.begin() + CurSize, 0);
1118  std::fill(SMask.begin() + CurSize, SMask.end(), CurSize);
1119  Value *PadVec = Builder.CreateVectorSplat(CurSize, Pad);
1120  return Builder.CreateShuffleVector(Val, PadVec, SMask);
1121 }
1122 
1123 auto HexagonVectorCombine::rescale(IRBuilder<> &Builder, Value *Mask,
1124  Type *FromTy, Type *ToTy) const -> Value * {
1125  // Mask is a vector <N x i1>, where each element corresponds to an
1126  // element of FromTy. Remap it so that each element will correspond
1127  // to an element of ToTy.
1128  assert(isa<VectorType>(Mask->getType()));
1129 
1130  Type *FromSTy = FromTy->getScalarType();
1131  Type *ToSTy = ToTy->getScalarType();
1132  if (FromSTy == ToSTy)
1133  return Mask;
1134 
1135  int FromSize = getSizeOf(FromSTy);
1136  int ToSize = getSizeOf(ToSTy);
1137  assert(FromSize % ToSize == 0 || ToSize % FromSize == 0);
1138 
1139  auto *MaskTy = cast<VectorType>(Mask->getType());
1140  int FromCount = MaskTy->getElementCount().getFixedValue();
1141  int ToCount = (FromCount * FromSize) / ToSize;
1142  assert((FromCount * FromSize) % ToSize == 0);
1143 
1144  // Mask <N x i1> -> sext to <N x FromTy> -> bitcast to <M x ToTy> ->
1145  // -> trunc to <M x i1>.
1146  Value *Ext = Builder.CreateSExt(
1147  Mask, VectorType::get(FromSTy, FromCount, /*Scalable*/ false));
1148  Value *Cast = Builder.CreateBitCast(
1149  Ext, VectorType::get(ToSTy, ToCount, /*Scalable*/ false));
1150  return Builder.CreateTrunc(
1151  Cast, VectorType::get(getBoolTy(), ToCount, /*Scalable*/ false));
1152 }
1153 
1154 // Bitcast to bytes, and return least significant bits.
1155 auto HexagonVectorCombine::vlsb(IRBuilder<> &Builder, Value *Val) const
1156  -> Value * {
1157  Type *ScalarTy = Val->getType()->getScalarType();
1158  if (ScalarTy == getBoolTy())
1159  return Val;
1160 
1161  Value *Bytes = vbytes(Builder, Val);
1162  if (auto *VecTy = dyn_cast<VectorType>(Bytes->getType()))
1163  return Builder.CreateTrunc(Bytes, getBoolTy(getSizeOf(VecTy)));
1164  // If Bytes is a scalar (i.e. Val was a scalar byte), return i1, not
1165  // <1 x i1>.
1166  return Builder.CreateTrunc(Bytes, getBoolTy());
1167 }
1168 
1169 // Bitcast to bytes for non-bool. For bool, convert i1 -> i8.
1170 auto HexagonVectorCombine::vbytes(IRBuilder<> &Builder, Value *Val) const
1171  -> Value * {
1172  Type *ScalarTy = Val->getType()->getScalarType();
1173  if (ScalarTy == getByteTy())
1174  return Val;
1175 
1176  if (ScalarTy != getBoolTy())
1177  return Builder.CreateBitCast(Val, getByteTy(getSizeOf(Val)));
1178  // For bool, return a sext from i1 to i8.
1179  if (auto *VecTy = dyn_cast<VectorType>(Val->getType()))
1180  return Builder.CreateSExt(Val, VectorType::get(getByteTy(), VecTy));
1181  return Builder.CreateSExt(Val, getByteTy());
1182 }
1183 
1184 auto HexagonVectorCombine::createHvxIntrinsic(IRBuilder<> &Builder,
1185  Intrinsic::ID IntID, Type *RetTy,
1186  ArrayRef<Value *> Args) const
1187  -> Value * {
1188  int HwLen = HST.getVectorLength();
1189  Type *BoolTy = Type::getInt1Ty(F.getContext());
1190  Type *Int32Ty = Type::getInt32Ty(F.getContext());
1191  // HVX vector -> v16i32/v32i32
1192  // HVX vector predicate -> v512i1/v1024i1
1193  auto getTypeForIntrin = [&](Type *Ty) -> Type * {
1194  if (HST.isTypeForHVX(Ty, /*IncludeBool*/ true)) {
1195  Type *ElemTy = cast<VectorType>(Ty)->getElementType();
1196  if (ElemTy == Int32Ty)
1197  return Ty;
1198  if (ElemTy == BoolTy)
1199  return VectorType::get(BoolTy, 8 * HwLen, /*Scalable*/ false);
1200  return VectorType::get(Int32Ty, HwLen / 4, /*Scalable*/ false);
1201  }
1202  // Non-HVX type. It should be a scalar.
1203  assert(Ty == Int32Ty || Ty->isIntegerTy(64));
1204  return Ty;
1205  };
1206 
1207  auto getCast = [&](IRBuilder<> &Builder, Value *Val,
1208  Type *DestTy) -> Value * {
1209  Type *SrcTy = Val->getType();
1210  if (SrcTy == DestTy)
1211  return Val;
1212  if (HST.isTypeForHVX(SrcTy, /*IncludeBool*/ true)) {
1213  if (cast<VectorType>(SrcTy)->getElementType() == BoolTy) {
1214  // This should take care of casts the other way too, for example
1215  // v1024i1 -> v32i1.
1216  Intrinsic::ID TC = HwLen == 64
1217  ? Intrinsic::hexagon_V6_pred_typecast
1218  : Intrinsic::hexagon_V6_pred_typecast_128B;
1219  Function *FI = Intrinsic::getDeclaration(F.getParent(), TC,
1220  {DestTy, Val->getType()});
1221  return Builder.CreateCall(FI, {Val});
1222  }
1223  // Non-predicate HVX vector.
1224  return Builder.CreateBitCast(Val, DestTy);
1225  }
1226  // Non-HVX type. It should be a scalar, and it should already have
1227  // a valid type.
1228  llvm_unreachable("Unexpected type");
1229  };
1230 
1231  SmallVector<Value *, 4> IntOps;
1232  for (Value *A : Args)
1233  IntOps.push_back(getCast(Builder, A, getTypeForIntrin(A->getType())));
1234  Function *FI = Intrinsic::getDeclaration(F.getParent(), IntID);
1235  Value *Call = Builder.CreateCall(FI, IntOps);
1236 
1237  Type *CallTy = Call->getType();
1238  if (CallTy == RetTy)
1239  return Call;
1240  // Scalar types should have RetTy matching the call return type.
1241  assert(HST.isTypeForHVX(CallTy, /*IncludeBool*/ true));
1242  if (cast<VectorType>(CallTy)->getElementType() == BoolTy)
1243  return getCast(Builder, Call, RetTy);
1244  return Builder.CreateBitCast(Call, RetTy);
1245 }
1246 
1247 auto HexagonVectorCombine::calculatePointerDifference(Value *Ptr0,
1248  Value *Ptr1) const
1249  -> Optional<int> {
1250  struct Builder : IRBuilder<> {
1251  Builder(BasicBlock *B) : IRBuilder<>(B) {}
1252  ~Builder() {
1253  for (Instruction *I : llvm::reverse(ToErase))
1254  I->eraseFromParent();
1255  }
1257  };
1258 
1259 #define CallBuilder(B, F) \
1260  [&](auto &B_) { \
1261  Value *V = B_.F; \
1262  if (auto *I = dyn_cast<Instruction>(V)) \
1263  B_.ToErase.push_back(I); \
1264  return V; \
1265  }(B)
1266 
1267  auto Simplify = [&](Value *V) {
1268  if (auto *I = dyn_cast<Instruction>(V)) {
1269  SimplifyQuery Q(DL, &TLI, &DT, &AC, I);
1270  if (Value *S = SimplifyInstruction(I, Q))
1271  return S;
1272  }
1273  return V;
1274  };
1275 
1276  auto StripBitCast = [](Value *V) {
1277  while (auto *C = dyn_cast<BitCastInst>(V))
1278  V = C->getOperand(0);
1279  return V;
1280  };
1281 
1282  Ptr0 = StripBitCast(Ptr0);
1283  Ptr1 = StripBitCast(Ptr1);
1284  if (!isa<GetElementPtrInst>(Ptr0) || !isa<GetElementPtrInst>(Ptr1))
1285  return None;
1286 
1287  auto *Gep0 = cast<GetElementPtrInst>(Ptr0);
1288  auto *Gep1 = cast<GetElementPtrInst>(Ptr1);
1289  if (Gep0->getPointerOperand() != Gep1->getPointerOperand())
1290  return None;
1291 
1292  Builder B(Gep0->getParent());
1293  Value *BasePtr = Gep0->getPointerOperand();
1294  int Scale = DL.getTypeStoreSize(BasePtr->getType()->getPointerElementType());
1295 
1296  // FIXME: for now only check GEPs with a single index.
1297  if (Gep0->getNumOperands() != 2 || Gep1->getNumOperands() != 2)
1298  return None;
1299 
1300  Value *Idx0 = Gep0->getOperand(1);
1301  Value *Idx1 = Gep1->getOperand(1);
1302 
1303  // First, try to simplify the subtraction directly.
1304  if (auto *Diff = dyn_cast<ConstantInt>(
1305  Simplify(CallBuilder(B, CreateSub(Idx0, Idx1)))))
1306  return Diff->getSExtValue() * Scale;
1307 
1308  KnownBits Known0 = computeKnownBits(Idx0, DL, 0, &AC, Gep0, &DT);
1309  KnownBits Known1 = computeKnownBits(Idx1, DL, 0, &AC, Gep1, &DT);
1310  APInt Unknown = ~(Known0.Zero | Known0.One) | ~(Known1.Zero | Known1.One);
1311  if (Unknown.isAllOnesValue())
1312  return None;
1313 
1314  Value *MaskU = ConstantInt::get(Idx0->getType(), Unknown);
1315  Value *AndU0 = Simplify(CallBuilder(B, CreateAnd(Idx0, MaskU)));
1316  Value *AndU1 = Simplify(CallBuilder(B, CreateAnd(Idx1, MaskU)));
1317  Value *SubU = Simplify(CallBuilder(B, CreateSub(AndU0, AndU1)));
1318  int Diff0 = 0;
1319  if (auto *C = dyn_cast<ConstantInt>(SubU)) {
1320  Diff0 = C->getSExtValue();
1321  } else {
1322  return None;
1323  }
1324 
1325  Value *MaskK = ConstantInt::get(MaskU->getType(), ~Unknown);
1326  Value *AndK0 = Simplify(CallBuilder(B, CreateAnd(Idx0, MaskK)));
1327  Value *AndK1 = Simplify(CallBuilder(B, CreateAnd(Idx1, MaskK)));
1328  Value *SubK = Simplify(CallBuilder(B, CreateSub(AndK0, AndK1)));
1329  int Diff1 = 0;
1330  if (auto *C = dyn_cast<ConstantInt>(SubK)) {
1331  Diff1 = C->getSExtValue();
1332  } else {
1333  return None;
1334  }
1335 
1336  return (Diff0 + Diff1) * Scale;
1337 
1338 #undef CallBuilder
1339 }
1340 
1341 template <typename T>
1342 auto HexagonVectorCombine::isSafeToMoveBeforeInBB(const Instruction &In,
1344  const T &Ignore) const
1345  -> bool {
1346  auto getLocOrNone = [this](const Instruction &I) -> Optional<MemoryLocation> {
1347  if (const auto *II = dyn_cast<IntrinsicInst>(&I)) {
1348  switch (II->getIntrinsicID()) {
1349  case Intrinsic::masked_load:
1350  return MemoryLocation::getForArgument(II, 0, TLI);
1351  case Intrinsic::masked_store:
1352  return MemoryLocation::getForArgument(II, 1, TLI);
1353  }
1354  }
1355  return MemoryLocation::getOrNone(&I);
1356  };
1357 
1358  // The source and the destination must be in the same basic block.
1359  const BasicBlock &Block = *In.getParent();
1360  assert(Block.begin() == To || Block.end() == To || To->getParent() == &Block);
1361  // No PHIs.
1362  if (isa<PHINode>(In) || (To != Block.end() && isa<PHINode>(*To)))
1363  return false;
1364 
1365  if (!mayBeMemoryDependent(In))
1366  return true;
1367  bool MayWrite = In.mayWriteToMemory();
1368  auto MaybeLoc = getLocOrNone(In);
1369 
1370  auto From = In.getIterator();
1371  if (From == To)
1372  return true;
1373  bool MoveUp = (To != Block.end() && To->comesBefore(&In));
1374  auto Range =
1375  MoveUp ? std::make_pair(To, From) : std::make_pair(std::next(From), To);
1376  for (auto It = Range.first; It != Range.second; ++It) {
1377  const Instruction &I = *It;
1378  if (llvm::is_contained(Ignore, &I))
1379  continue;
1380  // assume intrinsic can be ignored
1381  if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
1382  if (II->getIntrinsicID() == Intrinsic::assume)
1383  continue;
1384  }
1385  // Parts based on isSafeToMoveBefore from CoveMoverUtils.cpp.
1386  if (I.mayThrow())
1387  return false;
1388  if (auto *CB = dyn_cast<CallBase>(&I)) {
1389  if (!CB->hasFnAttr(Attribute::WillReturn))
1390  return false;
1391  if (!CB->hasFnAttr(Attribute::NoSync))
1392  return false;
1393  }
1394  if (I.mayReadOrWriteMemory()) {
1395  auto MaybeLocI = getLocOrNone(I);
1396  if (MayWrite || I.mayWriteToMemory()) {
1397  if (!MaybeLoc || !MaybeLocI)
1398  return false;
1399  if (!AA.isNoAlias(*MaybeLoc, *MaybeLocI))
1400  return false;
1401  }
1402  }
1403  }
1404  return true;
1405 }
1406 
1407 #ifndef NDEBUG
1408 auto HexagonVectorCombine::isByteVecTy(Type *Ty) const -> bool {
1409  if (auto *VecTy = dyn_cast<VectorType>(Ty))
1410  return VecTy->getElementType() == getByteTy();
1411  return false;
1412 }
1413 
1414 auto HexagonVectorCombine::isSectorTy(Type *Ty) const -> bool {
1415  if (!isByteVecTy(Ty))
1416  return false;
1417  int Size = getSizeOf(Ty);
1418  if (HST.isTypeForHVX(Ty))
1419  return Size == static_cast<int>(HST.getVectorLength());
1420  return Size == 4 || Size == 8;
1421 }
1422 #endif
1423 
1424 auto HexagonVectorCombine::getElementRange(IRBuilder<> &Builder, Value *Lo,
1425  Value *Hi, int Start,
1426  int Length) const -> Value * {
1427  assert(0 <= Start && Start < Length);
1428  SmallVector<int, 128> SMask(Length);
1429  std::iota(SMask.begin(), SMask.end(), Start);
1430  return Builder.CreateShuffleVector(Lo, Hi, SMask);
1431 }
1432 
1433 // Pass management.
1434 
1435 namespace llvm {
1438 } // namespace llvm
1439 
1440 namespace {
1441 class HexagonVectorCombineLegacy : public FunctionPass {
1442 public:
1443  static char ID;
1444 
1445  HexagonVectorCombineLegacy() : FunctionPass(ID) {}
1446 
1447  StringRef getPassName() const override { return "Hexagon Vector Combine"; }
1448 
1449  void getAnalysisUsage(AnalysisUsage &AU) const override {
1450  AU.setPreservesCFG();
1457  }
1458 
1459  bool runOnFunction(Function &F) override {
1460  if (skipFunction(F))
1461  return false;
1462  AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
1463  AssumptionCache &AC =
1464  getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1465  DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1466  TargetLibraryInfo &TLI =
1467  getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
1468  auto &TM = getAnalysis<TargetPassConfig>().getTM<HexagonTargetMachine>();
1469  HexagonVectorCombine HVC(F, AA, AC, DT, TLI, TM);
1470  return HVC.run();
1471  }
1472 };
1473 } // namespace
1474 
1476 
1477 INITIALIZE_PASS_BEGIN(HexagonVectorCombineLegacy, DEBUG_TYPE,
1478  "Hexagon Vector Combine", false, false)
1484 INITIALIZE_PASS_END(HexagonVectorCombineLegacy, DEBUG_TYPE,
1485  "Hexagon Vector Combine", false, false)
1486 
1488  return new HexagonVectorCombineLegacy();
1489 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
i
i
Definition: README.txt:29
llvm::alignTo
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:148
Int32Ty
IntegerType * Int32Ty
Definition: NVVMIntrRange.cpp:67
AssumptionCache.h
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(HexagonVectorCombineLegacy, DEBUG_TYPE, "Hexagon Vector Combine", false, false) INITIALIZE_PASS_END(HexagonVectorCombineLegacy
MathExtras.h
llvm
Definition: AllocatorList.h:23
llvm::sys::path::const_iterator::end
friend const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:233
llvm::Type::getInt1Ty
static IntegerType * getInt1Ty(LLVMContext &C)
Definition: Type.cpp:194
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::SystemZISD::TM
@ TM
Definition: SystemZISelLowering.h:65
Optional.h
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:112
llvm::Intrinsic::getDeclaration
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1318
llvm::Type::getInt8PtrTy
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:249
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
IntrinsicInst.h
llvm::SimplifyQuery
Definition: InstructionSimplify.h:94
llvm::Function
Definition: Function.h:61
Pass.h
llvm::KnownBits::Zero
APInt Zero
Definition: KnownBits.h:24
llvm::Type::getScalarType
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:317
Ignore
ReachingDefAnalysis InstSet InstSet & Ignore
Definition: ARMLowOverheadLoops.cpp:540
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::RISCVFenceField::W
@ W
Definition: RISCVBaseInfo.h:134
llvm::createHexagonVectorCombineLegacyPass
FunctionPass * createHexagonVectorCombineLegacyPass()
Definition: HexagonVectorCombine.cpp:1487
HexagonSubtarget.h
llvm::MipsISD::Lo
@ Lo
Definition: MipsISelLowering.h:79
llvm::HexagonTargetMachine
Definition: HexagonTargetMachine.h:25
llvm::initializeHexagonVectorCombineLegacyPass
void initializeHexagonVectorCombineLegacyPass(PassRegistry &)
llvm::SPII::Load
@ Load
Definition: SparcInstrInfo.h:32
llvm::IRBuilder<>
llvm::erase_if
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:1651
ValueTracking.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
APInt.h
Shift
bool Shift
Definition: README.txt:468
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:46
DenseMap.h
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:329
llvm::sys::path::end
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:233
llvm::sys::path::begin
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:224
llvm::Optional
Definition: APInt.h:33
T
#define T
Definition: Mips16ISelLowering.cpp:341
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:81
STLExtras.h
llvm::PowerOf2Ceil
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:702
HexagonTargetMachine.h
LLVM_ATTRIBUTE_UNUSED
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:188
llvm::BitmaskEnumDetail::Mask
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
llvm::Type::getInt8Ty
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:195
llvm::Data
@ Data
Definition: SIMachineScheduler.h:56
p
the resulting code requires compare and branches when and if * p
Definition: README.txt:396
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:197
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:133
KnownBits.h
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::MipsISD::Hi
@ Hi
Definition: MipsISelLowering.h:75
isUndef
static bool isUndef(ArrayRef< int > Mask)
Definition: HexagonISelDAGToDAGHVX.cpp:904
AliasAnalysis.h
llvm::ARMBuildAttrs::Section
@ Section
Legacy Tags.
Definition: ARMBuildAttributes.h:78
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:77
llvm::Intrinsic::getType
FunctionType * getType(LLVMContext &Context, ID id, ArrayRef< Type * > Tys=None)
Return the function type for an intrinsic.
Definition: Function.cpp:1274
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1482
llvm::RecurKind::And
@ And
Bitwise or logical AND of integers.
TargetMachine.h
isZero
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:519
llvm::AAResults
Definition: AliasAnalysis.h:456
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Intrinsics.h
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::ARM_PROC::A
@ A
Definition: ARMBaseInfo.h:34
llvm::KnownBits::One
APInt One
Definition: KnownBits.h:25
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::DomTreeNodeBase::children
iterator_range< iterator > children()
Definition: GenericDomTree.h:83
TargetLibraryInfo.h
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:235
false
Definition: StackSlotColoring.cpp:142
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::IntegerType
Class to represent integer types.
Definition: DerivedTypes.h:40
llvm::Instruction
Definition: Instruction.h:45
llvm::SimplifyInstruction
Value * SimplifyInstruction(Instruction *I, const SimplifyQuery &Q, OptimizationRemarkEmitter *ORE=nullptr)
See if we can compute a simplified version of this instruction.
Definition: InstructionSimplify.cpp:5788
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:281
llvm::PassRegistry
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:38
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:50
llvm::ConstantVector::getSplat
static Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
Definition: Constants.cpp:1397
llvm::SmallVectorImpl::resize
void resize(size_type N)
Definition: SmallVector.h:606
llvm::codeview::EncodedFramePtrReg::BasePtr
@ BasePtr
llvm::operator<<
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:230
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1770
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:885
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
Align
uint64_t Align
Definition: ELFObjHandler.cpp:83
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::MCID::Call
@ Call
Definition: MCInstrDesc.h:153
llvm::None
const NoneType None
Definition: None.h:23
llvm::mayBeMemoryDependent
bool mayBeMemoryDependent(const Instruction &I)
Returns true if the result or effects of the given instructions I depend on or influence global memor...
Definition: ValueTracking.cpp:4626
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:202
llvm::TargetPassConfig
Target-Independent Code Generator Pass Configuration Options.
Definition: TargetPassConfig.h:84
llvm::VectorType
Base class of all SIMD vector types.
Definition: DerivedTypes.h:391
G
const DataFlowGraph & G
Definition: RDFGraph.cpp:202
llvm::tgtok::In
@ In
Definition: TGLexer.h:51
Combine
Hexagon Vector Combine
Definition: HexagonVectorCombine.cpp:1485
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:303
llvm::MipsISD::Ext
@ Ext
Definition: MipsISelLowering.h:156
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::MemoryLocation::getOrNone
static Optional< MemoryLocation > getOrNone(const Instruction *Inst)
Definition: MemoryLocation.cpp:88
llvm::DomTreeNodeBase::getBlock
NodeT * getBlock() const
Definition: GenericDomTree.h:88
llvm::TargetLibraryInfoWrapperPass
Definition: TargetLibraryInfo.h:446
llvm::pdb::Unknown
@ Unknown
Definition: PDBTypes.h:395
D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
const
aarch64 promote const
Definition: AArch64PromoteConstant.cpp:232
Addr
uint64_t Addr
Definition: ELFObjHandler.cpp:80
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::PointerType
Class to represent pointers.
Definition: DerivedTypes.h:634
llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1547
ArrayRef.h
llvm::concat
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&... Ranges)
Concatenated range across two or more ranges.
Definition: STLExtras.h:1011
TargetPassConfig.h
llvm::computeKnownBits
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, OptimizationRemarkEmitter *ORE=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
Definition: ValueTracking.cpp:212
llvm::sys::path::const_iterator::begin
friend const_iterator begin(StringRef path, Style style)
Get begin iterator over path.
Definition: Path.cpp:224
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
llvm::move
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1540
llvm::RegState::Undef
@ Undef
Value of the register doesn't matter.
Definition: MachineInstrBuilder.h:52
getLocation
static MemoryLocation getLocation(Instruction *I, AAResults *AA)
Definition: SLPVectorizer.cpp:515
llvm::MemoryLocation::getForArgument
static MemoryLocation getForArgument(const CallBase *Call, unsigned ArgIdx, const TargetLibraryInfo *TLI)
Return a location representing a particular argument of a call.
Definition: MemoryLocation.cpp:147
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:840
llvm::AMDGPU::IsaInfo::TargetIDSetting::Off
@ Off
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:70
llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1463
llvm::AssumptionCacheTracker
An immutable pass that tracks lazily created AssumptionCache objects.
Definition: AssumptionCache.h:200
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1489
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:253
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:57
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:41
Simplify
assume Assume Simplify
Definition: AssumeBundleBuilder.cpp:604
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:136
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
llvm::append_range
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
Definition: STLExtras.h:1667
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:81
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
DEBUG_TYPE
#define DEBUG_TYPE
Definition: HexagonVectorCombine.cpp:48
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::DomTreeNodeBase< BasicBlock >
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:174
CallBuilder
#define CallBuilder(B, F)
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
j
return j(j<< 16)
llvm::Type::getInt64Ty
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:198
llvm::ConstantInt::getTrue
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:833
std
Definition: BitVector.h:838
llvm::KnownBits
Definition: KnownBits.h:23
llvm::copy_if
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1528
H
#define H(x, y, z)
Definition: MD5.cpp:58
llvm::SmallVectorImpl::assign
void assign(size_type NumElts, ValueParamT Elt)
Definition: SmallVector.h:669
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:314
llvm::Type::isIntOrIntVectorTy
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:208
llvm::ConstantInt::getSigned
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition: Constants.cpp:899
llvm::Type::getPointerTo
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
Definition: Type.cpp:709
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:207
llvm::pdb::DbgHeaderType::Max
@ Max
llvm::TargetMachine::getSubtargetImpl
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
Definition: TargetMachine.h:130
SmallVector.h
Dominators.h
llvm::AAResultsWrapperPass
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Definition: AliasAnalysis.h:1269
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
InstructionSimplify.h
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
shift
http eax xorl edx cl sete al setne dl sall eax sall edx But that requires good bit subreg support this might be better It s an extra shift
Definition: README.txt:30
llvm::HexagonSubtarget
Definition: HexagonSubtarget.h:42
llvm::Pass::getAnalysisUsage
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:93
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
From
BlockVerifier::State From
Definition: BlockVerifier.cpp:55
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:389
raw_ostream.h
llvm::pdb::PDB_SymType::Block
@ Block
InitializePasses.h
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
llvm::VectorType::get
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:628
llvm::Optional::getValue
constexpr const T & getValue() const LLVM_LVALUE_FUNCTION
Definition: Optional.h:280
llvm::BasicBlock::const_iterator
InstListType::const_iterator const_iterator
Definition: BasicBlock.h:91
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38