LLVM  14.0.0git
HexagonVectorCombine.cpp
Go to the documentation of this file.
1 //===-- HexagonVectorCombine.cpp ------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // HexagonVectorCombine is a utility class implementing a variety of functions
9 // that assist in vector-based optimizations.
10 //
11 // AlignVectors: replace unaligned vector loads and stores with aligned ones.
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/ADT/APInt.h"
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/ADT/Optional.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/ADT/SmallVector.h"
27 #include "llvm/IR/Dominators.h"
28 #include "llvm/IR/IRBuilder.h"
29 #include "llvm/IR/IntrinsicInst.h"
30 #include "llvm/IR/Intrinsics.h"
31 #include "llvm/IR/IntrinsicsHexagon.h"
32 #include "llvm/IR/Metadata.h"
33 #include "llvm/InitializePasses.h"
34 #include "llvm/Pass.h"
35 #include "llvm/Support/KnownBits.h"
39 
40 #include "HexagonSubtarget.h"
41 #include "HexagonTargetMachine.h"
42 
43 #include <algorithm>
44 #include <deque>
45 #include <map>
46 #include <set>
47 #include <utility>
48 #include <vector>
49 
50 #define DEBUG_TYPE "hexagon-vc"
51 
52 using namespace llvm;
53 
54 namespace {
55 class HexagonVectorCombine {
56 public:
57  HexagonVectorCombine(Function &F_, AliasAnalysis &AA_, AssumptionCache &AC_,
58  DominatorTree &DT_, TargetLibraryInfo &TLI_,
59  const TargetMachine &TM_)
60  : F(F_), DL(F.getParent()->getDataLayout()), AA(AA_), AC(AC_), DT(DT_),
61  TLI(TLI_),
62  HST(static_cast<const HexagonSubtarget &>(*TM_.getSubtargetImpl(F))) {}
63 
64  bool run();
65 
66  // Common integer type.
67  IntegerType *getIntTy() const;
68  // Byte type: either scalar (when Length = 0), or vector with given
69  // element count.
70  Type *getByteTy(int ElemCount = 0) const;
71  // Boolean type: either scalar (when Length = 0), or vector with given
72  // element count.
73  Type *getBoolTy(int ElemCount = 0) const;
74  // Create a ConstantInt of type returned by getIntTy with the value Val.
75  ConstantInt *getConstInt(int Val) const;
76  // Get the integer value of V, if it exists.
77  Optional<APInt> getIntValue(const Value *Val) const;
78  // Is V a constant 0, or a vector of 0s?
79  bool isZero(const Value *Val) const;
80  // Is V an undef value?
81  bool isUndef(const Value *Val) const;
82 
83  int getSizeOf(const Value *Val) const;
84  int getSizeOf(const Type *Ty) const;
85  int getAllocSizeOf(const Type *Ty) const;
86  int getTypeAlignment(Type *Ty) const;
87 
88  VectorType *getByteVectorTy(int ScLen) const;
89  Constant *getNullValue(Type *Ty) const;
90  Constant *getFullValue(Type *Ty) const;
91 
92  Value *insertb(IRBuilder<> &Builder, Value *Dest, Value *Src, int Start,
93  int Length, int Where) const;
94  Value *vlalignb(IRBuilder<> &Builder, Value *Lo, Value *Hi, Value *Amt) const;
95  Value *vralignb(IRBuilder<> &Builder, Value *Lo, Value *Hi, Value *Amt) const;
97  Value *vresize(IRBuilder<> &Builder, Value *Val, int NewSize,
98  Value *Pad) const;
99  Value *rescale(IRBuilder<> &Builder, Value *Mask, Type *FromTy,
100  Type *ToTy) const;
101  Value *vlsb(IRBuilder<> &Builder, Value *Val) const;
102  Value *vbytes(IRBuilder<> &Builder, Value *Val) const;
103 
104  Value *createHvxIntrinsic(IRBuilder<> &Builder, Intrinsic::ID IntID,
105  Type *RetTy, ArrayRef<Value *> Args) const;
106 
107  Optional<int> calculatePointerDifference(Value *Ptr0, Value *Ptr1) const;
108 
109  template <typename T = std::vector<Instruction *>>
110  bool isSafeToMoveBeforeInBB(const Instruction &In,
112  const T &Ignore = {}) const;
113 
114  Function &F;
115  const DataLayout &DL;
116  AliasAnalysis &AA;
117  AssumptionCache &AC;
118  DominatorTree &DT;
119  TargetLibraryInfo &TLI;
120  const HexagonSubtarget &HST;
121 
122 private:
123 #ifndef NDEBUG
124  // These two functions are only used for assertions at the moment.
125  bool isByteVecTy(Type *Ty) const;
126  bool isSectorTy(Type *Ty) const;
127 #endif
128  Value *getElementRange(IRBuilder<> &Builder, Value *Lo, Value *Hi, int Start,
129  int Length) const;
130 };
131 
132 class AlignVectors {
133 public:
134  AlignVectors(HexagonVectorCombine &HVC_) : HVC(HVC_) {}
135 
136  bool run();
137 
138 private:
139  using InstList = std::vector<Instruction *>;
140 
141  struct Segment {
142  void *Data;
143  int Start;
144  int Size;
145  };
146 
147  struct AddrInfo {
148  AddrInfo(const AddrInfo &) = default;
149  AddrInfo(const HexagonVectorCombine &HVC, Instruction *I, Value *A, Type *T,
150  Align H)
151  : Inst(I), Addr(A), ValTy(T), HaveAlign(H),
152  NeedAlign(HVC.getTypeAlignment(ValTy)) {}
153 
154  // XXX: add Size member?
155  Instruction *Inst;
156  Value *Addr;
157  Type *ValTy;
158  Align HaveAlign;
159  Align NeedAlign;
160  int Offset = 0; // Offset (in bytes) from the first member of the
161  // containing AddrList.
162  };
163  using AddrList = std::vector<AddrInfo>;
164 
165  struct InstrLess {
166  bool operator()(const Instruction *A, const Instruction *B) const {
167  return A->comesBefore(B);
168  }
169  };
170  using DepList = std::set<Instruction *, InstrLess>;
171 
172  struct MoveGroup {
173  MoveGroup(const AddrInfo &AI, Instruction *B, bool Hvx, bool Load)
174  : Base(B), Main{AI.Inst}, IsHvx(Hvx), IsLoad(Load) {}
175  Instruction *Base; // Base instruction of the parent address group.
176  InstList Main; // Main group of instructions.
177  InstList Deps; // List of dependencies.
178  bool IsHvx; // Is this group of HVX instructions?
179  bool IsLoad; // Is this a load group?
180  };
181  using MoveList = std::vector<MoveGroup>;
182 
183  struct ByteSpan {
184  struct Segment {
185  // Segment of a Value: 'Len' bytes starting at byte 'Begin'.
186  Segment(Value *Val, int Begin, int Len)
187  : Val(Val), Start(Begin), Size(Len) {}
188  Segment(const Segment &Seg) = default;
189  Value *Val; // Value representable as a sequence of bytes.
190  int Start; // First byte of the value that belongs to the segment.
191  int Size; // Number of bytes in the segment.
192  };
193 
194  struct Block {
195  Block(Value *Val, int Len, int Pos) : Seg(Val, 0, Len), Pos(Pos) {}
196  Block(Value *Val, int Off, int Len, int Pos)
197  : Seg(Val, Off, Len), Pos(Pos) {}
198  Block(const Block &Blk) = default;
199  Segment Seg; // Value segment.
200  int Pos; // Position (offset) of the segment in the Block.
201  };
202 
203  int extent() const;
204  ByteSpan section(int Start, int Length) const;
205  ByteSpan &shift(int Offset);
207 
208  int size() const { return Blocks.size(); }
209  Block &operator[](int i) { return Blocks[i]; }
210 
211  std::vector<Block> Blocks;
212 
213  using iterator = decltype(Blocks)::iterator;
214  iterator begin() { return Blocks.begin(); }
215  iterator end() { return Blocks.end(); }
216  using const_iterator = decltype(Blocks)::const_iterator;
217  const_iterator begin() const { return Blocks.begin(); }
218  const_iterator end() const { return Blocks.end(); }
219  };
220 
221  Align getAlignFromValue(const Value *V) const;
223  Optional<AddrInfo> getAddrInfo(Instruction &In) const;
224  bool isHvx(const AddrInfo &AI) const;
225 
226  Value *getPayload(Value *Val) const;
227  Value *getMask(Value *Val) const;
228  Value *getPassThrough(Value *Val) const;
229 
230  Value *createAdjustedPointer(IRBuilder<> &Builder, Value *Ptr, Type *ValTy,
231  int Adjust) const;
232  Value *createAlignedPointer(IRBuilder<> &Builder, Value *Ptr, Type *ValTy,
233  int Alignment) const;
234  Value *createAlignedLoad(IRBuilder<> &Builder, Type *ValTy, Value *Ptr,
235  int Alignment, Value *Mask, Value *PassThru) const;
236  Value *createAlignedStore(IRBuilder<> &Builder, Value *Val, Value *Ptr,
237  int Alignment, Value *Mask) const;
238 
239  bool createAddressGroups();
240  MoveList createLoadGroups(const AddrList &Group) const;
241  MoveList createStoreGroups(const AddrList &Group) const;
242  bool move(const MoveGroup &Move) const;
243  bool realignGroup(const MoveGroup &Move) const;
244 
245  friend raw_ostream &operator<<(raw_ostream &OS, const AddrInfo &AI);
246  friend raw_ostream &operator<<(raw_ostream &OS, const MoveGroup &MG);
247  friend raw_ostream &operator<<(raw_ostream &OS, const ByteSpan &BS);
248 
249  std::map<Instruction *, AddrList> AddrGroups;
250  HexagonVectorCombine &HVC;
251 };
252 
254 raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::AddrInfo &AI) {
255  OS << "Inst: " << AI.Inst << " " << *AI.Inst << '\n';
256  OS << "Addr: " << *AI.Addr << '\n';
257  OS << "Type: " << *AI.ValTy << '\n';
258  OS << "HaveAlign: " << AI.HaveAlign.value() << '\n';
259  OS << "NeedAlign: " << AI.NeedAlign.value() << '\n';
260  OS << "Offset: " << AI.Offset;
261  return OS;
262 }
263 
265 raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::MoveGroup &MG) {
266  OS << "Main\n";
267  for (Instruction *I : MG.Main)
268  OS << " " << *I << '\n';
269  OS << "Deps\n";
270  for (Instruction *I : MG.Deps)
271  OS << " " << *I << '\n';
272  return OS;
273 }
274 
276 raw_ostream &operator<<(raw_ostream &OS, const AlignVectors::ByteSpan &BS) {
277  OS << "ByteSpan[size=" << BS.size() << ", extent=" << BS.extent() << '\n';
278  for (const AlignVectors::ByteSpan::Block &B : BS) {
279  OS << " @" << B.Pos << " [" << B.Seg.Start << ',' << B.Seg.Size << "] "
280  << *B.Seg.Val << '\n';
281  }
282  OS << ']';
283  return OS;
284 }
285 
286 } // namespace
287 
288 namespace {
289 
290 template <typename T> T *getIfUnordered(T *MaybeT) {
291  return MaybeT && MaybeT->isUnordered() ? MaybeT : nullptr;
292 }
293 template <typename T> T *isCandidate(Instruction *In) {
294  return dyn_cast<T>(In);
295 }
296 template <> LoadInst *isCandidate<LoadInst>(Instruction *In) {
297  return getIfUnordered(dyn_cast<LoadInst>(In));
298 }
299 template <> StoreInst *isCandidate<StoreInst>(Instruction *In) {
300  return getIfUnordered(dyn_cast<StoreInst>(In));
301 }
302 
303 #if !defined(_MSC_VER) || _MSC_VER >= 1926
304 // VS2017 and some versions of VS2019 have trouble compiling this:
305 // error C2976: 'std::map': too few template arguments
306 // VS 2019 16.x is known to work, except for 16.4/16.5 (MSC_VER 1924/1925)
307 template <typename Pred, typename... Ts>
308 void erase_if(std::map<Ts...> &map, Pred p)
309 #else
310 template <typename Pred, typename T, typename U>
311 void erase_if(std::map<T, U> &map, Pred p)
312 #endif
313 {
314  for (auto i = map.begin(), e = map.end(); i != e;) {
315  if (p(*i))
316  i = map.erase(i);
317  else
318  i = std::next(i);
319  }
320 }
321 
322 // Forward other erase_ifs to the LLVM implementations.
323 template <typename Pred, typename T> void erase_if(T &&container, Pred p) {
324  llvm::erase_if(std::forward<T>(container), p);
325 }
326 
327 } // namespace
328 
329 // --- Begin AlignVectors
330 
331 auto AlignVectors::ByteSpan::extent() const -> int {
332  if (size() == 0)
333  return 0;
334  int Min = Blocks[0].Pos;
335  int Max = Blocks[0].Pos + Blocks[0].Seg.Size;
336  for (int i = 1, e = size(); i != e; ++i) {
337  Min = std::min(Min, Blocks[i].Pos);
338  Max = std::max(Max, Blocks[i].Pos + Blocks[i].Seg.Size);
339  }
340  return Max - Min;
341 }
342 
343 auto AlignVectors::ByteSpan::section(int Start, int Length) const -> ByteSpan {
344  ByteSpan Section;
345  for (const ByteSpan::Block &B : Blocks) {
346  int L = std::max(B.Pos, Start); // Left end.
347  int R = std::min(B.Pos + B.Seg.Size, Start + Length); // Right end+1.
348  if (L < R) {
349  // How much to chop off the beginning of the segment:
350  int Off = L > B.Pos ? L - B.Pos : 0;
351  Section.Blocks.emplace_back(B.Seg.Val, B.Seg.Start + Off, R - L, L);
352  }
353  }
354  return Section;
355 }
356 
357 auto AlignVectors::ByteSpan::shift(int Offset) -> ByteSpan & {
358  for (Block &B : Blocks)
359  B.Pos += Offset;
360  return *this;
361 }
362 
364  SmallVector<Value *, 8> Values(Blocks.size());
365  for (int i = 0, e = Blocks.size(); i != e; ++i)
366  Values[i] = Blocks[i].Seg.Val;
367  return Values;
368 }
369 
370 auto AlignVectors::getAlignFromValue(const Value *V) const -> Align {
371  const auto *C = dyn_cast<ConstantInt>(V);
372  assert(C && "Alignment must be a compile-time constant integer");
373  return C->getAlignValue();
374 }
375 
376 auto AlignVectors::getAddrInfo(Instruction &In) const -> Optional<AddrInfo> {
377  if (auto *L = isCandidate<LoadInst>(&In))
378  return AddrInfo(HVC, L, L->getPointerOperand(), L->getType(),
379  L->getAlign());
380  if (auto *S = isCandidate<StoreInst>(&In))
381  return AddrInfo(HVC, S, S->getPointerOperand(),
382  S->getValueOperand()->getType(), S->getAlign());
383  if (auto *II = isCandidate<IntrinsicInst>(&In)) {
384  Intrinsic::ID ID = II->getIntrinsicID();
385  switch (ID) {
386  case Intrinsic::masked_load:
387  return AddrInfo(HVC, II, II->getArgOperand(0), II->getType(),
388  getAlignFromValue(II->getArgOperand(1)));
389  case Intrinsic::masked_store:
390  return AddrInfo(HVC, II, II->getArgOperand(1),
391  II->getArgOperand(0)->getType(),
392  getAlignFromValue(II->getArgOperand(2)));
393  }
394  }
395  return Optional<AddrInfo>();
396 }
397 
398 auto AlignVectors::isHvx(const AddrInfo &AI) const -> bool {
399  return HVC.HST.isTypeForHVX(AI.ValTy);
400 }
401 
402 auto AlignVectors::getPayload(Value *Val) const -> Value * {
403  if (auto *In = dyn_cast<Instruction>(Val)) {
404  Intrinsic::ID ID = 0;
405  if (auto *II = dyn_cast<IntrinsicInst>(In))
406  ID = II->getIntrinsicID();
407  if (isa<StoreInst>(In) || ID == Intrinsic::masked_store)
408  return In->getOperand(0);
409  }
410  return Val;
411 }
412 
413 auto AlignVectors::getMask(Value *Val) const -> Value * {
414  if (auto *II = dyn_cast<IntrinsicInst>(Val)) {
415  switch (II->getIntrinsicID()) {
416  case Intrinsic::masked_load:
417  return II->getArgOperand(2);
418  case Intrinsic::masked_store:
419  return II->getArgOperand(3);
420  }
421  }
422 
423  Type *ValTy = getPayload(Val)->getType();
424  if (auto *VecTy = dyn_cast<VectorType>(ValTy)) {
425  int ElemCount = VecTy->getElementCount().getFixedValue();
426  return HVC.getFullValue(HVC.getBoolTy(ElemCount));
427  }
428  return HVC.getFullValue(HVC.getBoolTy());
429 }
430 
431 auto AlignVectors::getPassThrough(Value *Val) const -> Value * {
432  if (auto *II = dyn_cast<IntrinsicInst>(Val)) {
433  if (II->getIntrinsicID() == Intrinsic::masked_load)
434  return II->getArgOperand(3);
435  }
436  return UndefValue::get(getPayload(Val)->getType());
437 }
438 
439 auto AlignVectors::createAdjustedPointer(IRBuilder<> &Builder, Value *Ptr,
440  Type *ValTy, int Adjust) const
441  -> Value * {
442  // The adjustment is in bytes, but if it's a multiple of the type size,
443  // we don't need to do pointer casts.
444  auto *PtrTy = cast<PointerType>(Ptr->getType());
445  if (!PtrTy->isOpaque()) {
446  Type *ElemTy = PtrTy->getElementType();
447  int ElemSize = HVC.getAllocSizeOf(ElemTy);
448  if (Adjust % ElemSize == 0 && Adjust != 0) {
449  Value *Tmp0 =
450  Builder.CreateGEP(ElemTy, Ptr, HVC.getConstInt(Adjust / ElemSize));
451  return Builder.CreatePointerCast(Tmp0, ValTy->getPointerTo());
452  }
453  }
454 
455  PointerType *CharPtrTy = Type::getInt8PtrTy(HVC.F.getContext());
456  Value *Tmp0 = Builder.CreatePointerCast(Ptr, CharPtrTy);
457  Value *Tmp1 = Builder.CreateGEP(Type::getInt8Ty(HVC.F.getContext()), Tmp0,
458  HVC.getConstInt(Adjust));
459  return Builder.CreatePointerCast(Tmp1, ValTy->getPointerTo());
460 }
461 
462 auto AlignVectors::createAlignedPointer(IRBuilder<> &Builder, Value *Ptr,
463  Type *ValTy, int Alignment) const
464  -> Value * {
465  Value *AsInt = Builder.CreatePtrToInt(Ptr, HVC.getIntTy());
466  Value *Mask = HVC.getConstInt(-Alignment);
467  Value *And = Builder.CreateAnd(AsInt, Mask);
468  return Builder.CreateIntToPtr(And, ValTy->getPointerTo());
469 }
470 
471 auto AlignVectors::createAlignedLoad(IRBuilder<> &Builder, Type *ValTy,
472  Value *Ptr, int Alignment, Value *Mask,
473  Value *PassThru) const -> Value * {
474  assert(!HVC.isUndef(Mask)); // Should this be allowed?
475  if (HVC.isZero(Mask))
476  return PassThru;
477  if (Mask == ConstantInt::getTrue(Mask->getType()))
478  return Builder.CreateAlignedLoad(ValTy, Ptr, Align(Alignment));
479  return Builder.CreateMaskedLoad(ValTy, Ptr, Align(Alignment), Mask, PassThru);
480 }
481 
482 auto AlignVectors::createAlignedStore(IRBuilder<> &Builder, Value *Val,
483  Value *Ptr, int Alignment,
484  Value *Mask) const -> Value * {
485  if (HVC.isZero(Mask) || HVC.isUndef(Val) || HVC.isUndef(Mask))
486  return UndefValue::get(Val->getType());
487  if (Mask == ConstantInt::getTrue(Mask->getType()))
488  return Builder.CreateAlignedStore(Val, Ptr, Align(Alignment));
489  return Builder.CreateMaskedStore(Val, Ptr, Align(Alignment), Mask);
490 }
491 
492 auto AlignVectors::createAddressGroups() -> bool {
493  // An address group created here may contain instructions spanning
494  // multiple basic blocks.
495  AddrList WorkStack;
496 
497  auto findBaseAndOffset = [&](AddrInfo &AI) -> std::pair<Instruction *, int> {
498  for (AddrInfo &W : WorkStack) {
499  if (auto D = HVC.calculatePointerDifference(AI.Addr, W.Addr))
500  return std::make_pair(W.Inst, *D);
501  }
502  return std::make_pair(nullptr, 0);
503  };
504 
505  auto traverseBlock = [&](DomTreeNode *DomN, auto Visit) -> void {
506  BasicBlock &Block = *DomN->getBlock();
507  for (Instruction &I : Block) {
508  auto AI = this->getAddrInfo(I); // Use this-> for gcc6.
509  if (!AI)
510  continue;
511  auto F = findBaseAndOffset(*AI);
512  Instruction *GroupInst;
513  if (Instruction *BI = F.first) {
514  AI->Offset = F.second;
515  GroupInst = BI;
516  } else {
517  WorkStack.push_back(*AI);
518  GroupInst = AI->Inst;
519  }
520  AddrGroups[GroupInst].push_back(*AI);
521  }
522 
523  for (DomTreeNode *C : DomN->children())
524  Visit(C, Visit);
525 
526  while (!WorkStack.empty() && WorkStack.back().Inst->getParent() == &Block)
527  WorkStack.pop_back();
528  };
529 
530  traverseBlock(HVC.DT.getRootNode(), traverseBlock);
531  assert(WorkStack.empty());
532 
533  // AddrGroups are formed.
534 
535  // Remove groups of size 1.
536  erase_if(AddrGroups, [](auto &G) { return G.second.size() == 1; });
537  // Remove groups that don't use HVX types.
538  erase_if(AddrGroups, [&](auto &G) {
539  return !llvm::any_of(
540  G.second, [&](auto &I) { return HVC.HST.isTypeForHVX(I.ValTy); });
541  });
542 
543  return !AddrGroups.empty();
544 }
545 
546 auto AlignVectors::createLoadGroups(const AddrList &Group) const -> MoveList {
547  // Form load groups.
548  // To avoid complications with moving code across basic blocks, only form
549  // groups that are contained within a single basic block.
550 
551  auto getUpwardDeps = [](Instruction *In, Instruction *Base) {
552  BasicBlock *Parent = Base->getParent();
553  assert(In->getParent() == Parent &&
554  "Base and In should be in the same block");
555  assert(Base->comesBefore(In) && "Base should come before In");
556 
557  DepList Deps;
558  std::deque<Instruction *> WorkQ = {In};
559  while (!WorkQ.empty()) {
560  Instruction *D = WorkQ.front();
561  WorkQ.pop_front();
562  Deps.insert(D);
563  for (Value *Op : D->operands()) {
564  if (auto *I = dyn_cast<Instruction>(Op)) {
565  if (I->getParent() == Parent && Base->comesBefore(I))
566  WorkQ.push_back(I);
567  }
568  }
569  }
570  return Deps;
571  };
572 
573  auto tryAddTo = [&](const AddrInfo &Info, MoveGroup &Move) {
574  assert(!Move.Main.empty() && "Move group should have non-empty Main");
575  // Don't mix HVX and non-HVX instructions.
576  if (Move.IsHvx != isHvx(Info))
577  return false;
578  // Leading instruction in the load group.
579  Instruction *Base = Move.Main.front();
580  if (Base->getParent() != Info.Inst->getParent())
581  return false;
582 
583  auto isSafeToMoveToBase = [&](const Instruction *I) {
584  return HVC.isSafeToMoveBeforeInBB(*I, Base->getIterator());
585  };
586  DepList Deps = getUpwardDeps(Info.Inst, Base);
587  if (!llvm::all_of(Deps, isSafeToMoveToBase))
588  return false;
589 
590  // The dependencies will be moved together with the load, so make sure
591  // that none of them could be moved independently in another group.
592  Deps.erase(Info.Inst);
593  auto inAddrMap = [&](Instruction *I) { return AddrGroups.count(I) > 0; };
594  if (llvm::any_of(Deps, inAddrMap))
595  return false;
596  Move.Main.push_back(Info.Inst);
597  llvm::append_range(Move.Deps, Deps);
598  return true;
599  };
600 
601  MoveList LoadGroups;
602 
603  for (const AddrInfo &Info : Group) {
604  if (!Info.Inst->mayReadFromMemory())
605  continue;
606  if (LoadGroups.empty() || !tryAddTo(Info, LoadGroups.back()))
607  LoadGroups.emplace_back(Info, Group.front().Inst, isHvx(Info), true);
608  }
609 
610  // Erase singleton groups.
611  erase_if(LoadGroups, [](const MoveGroup &G) { return G.Main.size() <= 1; });
612  return LoadGroups;
613 }
614 
615 auto AlignVectors::createStoreGroups(const AddrList &Group) const -> MoveList {
616  // Form store groups.
617  // To avoid complications with moving code across basic blocks, only form
618  // groups that are contained within a single basic block.
619 
620  auto tryAddTo = [&](const AddrInfo &Info, MoveGroup &Move) {
621  assert(!Move.Main.empty() && "Move group should have non-empty Main");
622  // For stores with return values we'd have to collect downward depenencies.
623  // There are no such stores that we handle at the moment, so omit that.
624  assert(Info.Inst->getType()->isVoidTy() &&
625  "Not handling stores with return values");
626  // Don't mix HVX and non-HVX instructions.
627  if (Move.IsHvx != isHvx(Info))
628  return false;
629  // For stores we need to be careful whether it's safe to move them.
630  // Stores that are otherwise safe to move together may not appear safe
631  // to move over one another (i.e. isSafeToMoveBefore may return false).
632  Instruction *Base = Move.Main.front();
633  if (Base->getParent() != Info.Inst->getParent())
634  return false;
635  if (!HVC.isSafeToMoveBeforeInBB(*Info.Inst, Base->getIterator(), Move.Main))
636  return false;
637  Move.Main.push_back(Info.Inst);
638  return true;
639  };
640 
641  MoveList StoreGroups;
642 
643  for (auto I = Group.rbegin(), E = Group.rend(); I != E; ++I) {
644  const AddrInfo &Info = *I;
645  if (!Info.Inst->mayWriteToMemory())
646  continue;
647  if (StoreGroups.empty() || !tryAddTo(Info, StoreGroups.back()))
648  StoreGroups.emplace_back(Info, Group.front().Inst, isHvx(Info), false);
649  }
650 
651  // Erase singleton groups.
652  erase_if(StoreGroups, [](const MoveGroup &G) { return G.Main.size() <= 1; });
653  return StoreGroups;
654 }
655 
656 auto AlignVectors::move(const MoveGroup &Move) const -> bool {
657  assert(!Move.Main.empty() && "Move group should have non-empty Main");
658  Instruction *Where = Move.Main.front();
659 
660  if (Move.IsLoad) {
661  // Move all deps to before Where, keeping order.
662  for (Instruction *D : Move.Deps)
663  D->moveBefore(Where);
664  // Move all main instructions to after Where, keeping order.
665  ArrayRef<Instruction *> Main(Move.Main);
666  for (Instruction *M : Main.drop_front(1)) {
667  M->moveAfter(Where);
668  Where = M;
669  }
670  } else {
671  // NOTE: Deps are empty for "store" groups. If they need to be
672  // non-empty, decide on the order.
673  assert(Move.Deps.empty());
674  // Move all main instructions to before Where, inverting order.
675  ArrayRef<Instruction *> Main(Move.Main);
676  for (Instruction *M : Main.drop_front(1)) {
677  M->moveBefore(Where);
678  Where = M;
679  }
680  }
681 
682  return Move.Main.size() + Move.Deps.size() > 1;
683 }
684 
685 auto AlignVectors::realignGroup(const MoveGroup &Move) const -> bool {
686  // TODO: Needs support for masked loads/stores of "scalar" vectors.
687  if (!Move.IsHvx)
688  return false;
689 
690  // Return the element with the maximum alignment from Range,
691  // where GetValue obtains the value to compare from an element.
692  auto getMaxOf = [](auto Range, auto GetValue) {
693  return *std::max_element(
694  Range.begin(), Range.end(),
695  [&GetValue](auto &A, auto &B) { return GetValue(A) < GetValue(B); });
696  };
697 
698  const AddrList &BaseInfos = AddrGroups.at(Move.Base);
699 
700  // Conceptually, there is a vector of N bytes covering the addresses
701  // starting from the minimum offset (i.e. Base.Addr+Start). This vector
702  // represents a contiguous memory region that spans all accessed memory
703  // locations.
704  // The correspondence between loaded or stored values will be expressed
705  // in terms of this vector. For example, the 0th element of the vector
706  // from the Base address info will start at byte Start from the beginning
707  // of this conceptual vector.
708  //
709  // This vector will be loaded/stored starting at the nearest down-aligned
710  // address and the amount od the down-alignment will be AlignVal:
711  // valign(load_vector(align_down(Base+Start)), AlignVal)
712 
713  std::set<Instruction *> TestSet(Move.Main.begin(), Move.Main.end());
714  AddrList MoveInfos;
716  BaseInfos, std::back_inserter(MoveInfos),
717  [&TestSet](const AddrInfo &AI) { return TestSet.count(AI.Inst); });
718 
719  // Maximum alignment present in the whole address group.
720  const AddrInfo &WithMaxAlign =
721  getMaxOf(BaseInfos, [](const AddrInfo &AI) { return AI.HaveAlign; });
722  Align MaxGiven = WithMaxAlign.HaveAlign;
723 
724  // Minimum alignment present in the move address group.
725  const AddrInfo &WithMinOffset =
726  getMaxOf(MoveInfos, [](const AddrInfo &AI) { return -AI.Offset; });
727 
728  const AddrInfo &WithMaxNeeded =
729  getMaxOf(MoveInfos, [](const AddrInfo &AI) { return AI.NeedAlign; });
730  Align MinNeeded = WithMaxNeeded.NeedAlign;
731 
732  // Set the builder at the top instruction in the move group.
733  Instruction *TopIn = Move.IsLoad ? Move.Main.front() : Move.Main.back();
734  IRBuilder<> Builder(TopIn);
735  Value *AlignAddr = nullptr; // Actual aligned address.
736  Value *AlignVal = nullptr; // Right-shift amount (for valign).
737 
738  if (MinNeeded <= MaxGiven) {
739  int Start = WithMinOffset.Offset;
740  int OffAtMax = WithMaxAlign.Offset;
741  // Shift the offset of the maximally aligned instruction (OffAtMax)
742  // back by just enough multiples of the required alignment to cover the
743  // distance from Start to OffAtMax.
744  // Calculate the address adjustment amount based on the address with the
745  // maximum alignment. This is to allow a simple gep instruction instead
746  // of potential bitcasts to i8*.
747  int Adjust = -alignTo(OffAtMax - Start, MinNeeded.value());
748  AlignAddr = createAdjustedPointer(Builder, WithMaxAlign.Addr,
749  WithMaxAlign.ValTy, Adjust);
750  int Diff = Start - (OffAtMax + Adjust);
751  AlignVal = HVC.getConstInt(Diff);
752  // Sanity.
753  assert(Diff >= 0);
754  assert(static_cast<decltype(MinNeeded.value())>(Diff) < MinNeeded.value());
755  } else {
756  // WithMinOffset is the lowest address in the group,
757  // WithMinOffset.Addr = Base+Start.
758  // Align instructions for both HVX (V6_valign) and scalar (S2_valignrb)
759  // mask off unnecessary bits, so it's ok to just the original pointer as
760  // the alignment amount.
761  // Do an explicit down-alignment of the address to avoid creating an
762  // aligned instruction with an address that is not really aligned.
763  AlignAddr = createAlignedPointer(Builder, WithMinOffset.Addr,
764  WithMinOffset.ValTy, MinNeeded.value());
765  AlignVal = Builder.CreatePtrToInt(WithMinOffset.Addr, HVC.getIntTy());
766  }
767 
768  ByteSpan VSpan;
769  for (const AddrInfo &AI : MoveInfos) {
770  VSpan.Blocks.emplace_back(AI.Inst, HVC.getSizeOf(AI.ValTy),
771  AI.Offset - WithMinOffset.Offset);
772  }
773 
774  // The aligned loads/stores will use blocks that are either scalars,
775  // or HVX vectors. Let "sector" be the unified term for such a block.
776  // blend(scalar, vector) -> sector...
777  int ScLen = Move.IsHvx ? HVC.HST.getVectorLength()
778  : std::max<int>(MinNeeded.value(), 4);
779  assert(!Move.IsHvx || ScLen == 64 || ScLen == 128);
780  assert(Move.IsHvx || ScLen == 4 || ScLen == 8);
781 
782  Type *SecTy = HVC.getByteTy(ScLen);
783  int NumSectors = (VSpan.extent() + ScLen - 1) / ScLen;
784  bool DoAlign = !HVC.isZero(AlignVal);
785 
786  if (Move.IsLoad) {
787  ByteSpan ASpan;
788  auto *True = HVC.getFullValue(HVC.getBoolTy(ScLen));
789  auto *Undef = UndefValue::get(SecTy);
790 
791  for (int i = 0; i != NumSectors + DoAlign; ++i) {
792  Value *Ptr = createAdjustedPointer(Builder, AlignAddr, SecTy, i * ScLen);
793  // FIXME: generate a predicated load?
794  Value *Load = createAlignedLoad(Builder, SecTy, Ptr, ScLen, True, Undef);
795  // If vector shifting is potentially needed, accumulate metadata
796  // from source sections of twice the load width.
797  int Start = (i - DoAlign) * ScLen;
798  int Width = (1 + DoAlign) * ScLen;
799  propagateMetadata(cast<Instruction>(Load),
800  VSpan.section(Start, Width).values());
801  ASpan.Blocks.emplace_back(Load, ScLen, i * ScLen);
802  }
803 
804  if (DoAlign) {
805  for (int j = 0; j != NumSectors; ++j) {
806  ASpan[j].Seg.Val = HVC.vralignb(Builder, ASpan[j].Seg.Val,
807  ASpan[j + 1].Seg.Val, AlignVal);
808  }
809  }
810 
811  for (ByteSpan::Block &B : VSpan) {
812  ByteSpan ASection = ASpan.section(B.Pos, B.Seg.Size).shift(-B.Pos);
813  Value *Accum = UndefValue::get(HVC.getByteTy(B.Seg.Size));
814  for (ByteSpan::Block &S : ASection) {
815  Value *Pay = HVC.vbytes(Builder, getPayload(S.Seg.Val));
816  Accum =
817  HVC.insertb(Builder, Accum, Pay, S.Seg.Start, S.Seg.Size, S.Pos);
818  }
819  // Instead of casting everything to bytes for the vselect, cast to the
820  // original value type. This will avoid complications with casting masks.
821  // For example, in cases when the original mask applied to i32, it could
822  // be converted to a mask applicable to i8 via pred_typecast intrinsic,
823  // but if the mask is not exactly of HVX length, extra handling would be
824  // needed to make it work.
825  Type *ValTy = getPayload(B.Seg.Val)->getType();
826  Value *Cast = Builder.CreateBitCast(Accum, ValTy);
827  Value *Sel = Builder.CreateSelect(getMask(B.Seg.Val), Cast,
828  getPassThrough(B.Seg.Val));
829  B.Seg.Val->replaceAllUsesWith(Sel);
830  }
831  } else {
832  // Stores.
833  ByteSpan ASpanV, ASpanM;
834 
835  // Return a vector value corresponding to the input value Val:
836  // either <1 x Val> for scalar Val, or Val itself for vector Val.
837  auto MakeVec = [](IRBuilder<> &Builder, Value *Val) -> Value * {
838  Type *Ty = Val->getType();
839  if (Ty->isVectorTy())
840  return Val;
841  auto *VecTy = VectorType::get(Ty, 1, /*Scalable*/ false);
842  return Builder.CreateBitCast(Val, VecTy);
843  };
844 
845  // Create an extra "undef" sector at the beginning and at the end.
846  // They will be used as the left/right filler in the vlalign step.
847  for (int i = (DoAlign ? -1 : 0); i != NumSectors + DoAlign; ++i) {
848  // For stores, the size of each section is an aligned vector length.
849  // Adjust the store offsets relative to the section start offset.
850  ByteSpan VSection = VSpan.section(i * ScLen, ScLen).shift(-i * ScLen);
851  Value *AccumV = UndefValue::get(SecTy);
852  Value *AccumM = HVC.getNullValue(SecTy);
853  for (ByteSpan::Block &S : VSection) {
854  Value *Pay = getPayload(S.Seg.Val);
855  Value *Mask = HVC.rescale(Builder, MakeVec(Builder, getMask(S.Seg.Val)),
856  Pay->getType(), HVC.getByteTy());
857  AccumM = HVC.insertb(Builder, AccumM, HVC.vbytes(Builder, Mask),
858  S.Seg.Start, S.Seg.Size, S.Pos);
859  AccumV = HVC.insertb(Builder, AccumV, HVC.vbytes(Builder, Pay),
860  S.Seg.Start, S.Seg.Size, S.Pos);
861  }
862  ASpanV.Blocks.emplace_back(AccumV, ScLen, i * ScLen);
863  ASpanM.Blocks.emplace_back(AccumM, ScLen, i * ScLen);
864  }
865 
866  // vlalign
867  if (DoAlign) {
868  for (int j = 1; j != NumSectors + 2; ++j) {
869  ASpanV[j - 1].Seg.Val = HVC.vlalignb(Builder, ASpanV[j - 1].Seg.Val,
870  ASpanV[j].Seg.Val, AlignVal);
871  ASpanM[j - 1].Seg.Val = HVC.vlalignb(Builder, ASpanM[j - 1].Seg.Val,
872  ASpanM[j].Seg.Val, AlignVal);
873  }
874  }
875 
876  for (int i = 0; i != NumSectors + DoAlign; ++i) {
877  Value *Ptr = createAdjustedPointer(Builder, AlignAddr, SecTy, i * ScLen);
878  Value *Val = ASpanV[i].Seg.Val;
879  Value *Mask = ASpanM[i].Seg.Val; // bytes
880  if (!HVC.isUndef(Val) && !HVC.isZero(Mask)) {
881  Value *Store = createAlignedStore(Builder, Val, Ptr, ScLen,
882  HVC.vlsb(Builder, Mask));
883  // If vector shifting is potentially needed, accumulate metadata
884  // from source sections of twice the store width.
885  int Start = (i - DoAlign) * ScLen;
886  int Width = (1 + DoAlign) * ScLen;
887  propagateMetadata(cast<Instruction>(Store),
888  VSpan.section(Start, Width).values());
889  }
890  }
891  }
892 
893  for (auto *Inst : Move.Main)
894  Inst->eraseFromParent();
895 
896  return true;
897 }
898 
899 auto AlignVectors::run() -> bool {
900  if (!createAddressGroups())
901  return false;
902 
903  bool Changed = false;
904  MoveList LoadGroups, StoreGroups;
905 
906  for (auto &G : AddrGroups) {
907  llvm::append_range(LoadGroups, createLoadGroups(G.second));
908  llvm::append_range(StoreGroups, createStoreGroups(G.second));
909  }
910 
911  for (auto &M : LoadGroups)
912  Changed |= move(M);
913  for (auto &M : StoreGroups)
914  Changed |= move(M);
915 
916  for (auto &M : LoadGroups)
917  Changed |= realignGroup(M);
918  for (auto &M : StoreGroups)
919  Changed |= realignGroup(M);
920 
921  return Changed;
922 }
923 
924 // --- End AlignVectors
925 
926 auto HexagonVectorCombine::run() -> bool {
927  if (!HST.useHVXOps())
928  return false;
929 
930  bool Changed = AlignVectors(*this).run();
931  return Changed;
932 }
933 
934 auto HexagonVectorCombine::getIntTy() const -> IntegerType * {
935  return Type::getInt32Ty(F.getContext());
936 }
937 
938 auto HexagonVectorCombine::getByteTy(int ElemCount) const -> Type * {
939  assert(ElemCount >= 0);
940  IntegerType *ByteTy = Type::getInt8Ty(F.getContext());
941  if (ElemCount == 0)
942  return ByteTy;
943  return VectorType::get(ByteTy, ElemCount, /*Scalable*/ false);
944 }
945 
946 auto HexagonVectorCombine::getBoolTy(int ElemCount) const -> Type * {
947  assert(ElemCount >= 0);
948  IntegerType *BoolTy = Type::getInt1Ty(F.getContext());
949  if (ElemCount == 0)
950  return BoolTy;
951  return VectorType::get(BoolTy, ElemCount, /*Scalable*/ false);
952 }
953 
954 auto HexagonVectorCombine::getConstInt(int Val) const -> ConstantInt * {
955  return ConstantInt::getSigned(getIntTy(), Val);
956 }
957 
958 auto HexagonVectorCombine::isZero(const Value *Val) const -> bool {
959  if (auto *C = dyn_cast<Constant>(Val))
960  return C->isZeroValue();
961  return false;
962 }
963 
964 auto HexagonVectorCombine::getIntValue(const Value *Val) const
965  -> Optional<APInt> {
966  if (auto *CI = dyn_cast<ConstantInt>(Val))
967  return CI->getValue();
968  return None;
969 }
970 
971 auto HexagonVectorCombine::isUndef(const Value *Val) const -> bool {
972  return isa<UndefValue>(Val);
973 }
974 
975 auto HexagonVectorCombine::getSizeOf(const Value *Val) const -> int {
976  return getSizeOf(Val->getType());
977 }
978 
979 auto HexagonVectorCombine::getSizeOf(const Type *Ty) const -> int {
980  return DL.getTypeStoreSize(const_cast<Type *>(Ty)).getFixedValue();
981 }
982 
983 auto HexagonVectorCombine::getAllocSizeOf(const Type *Ty) const -> int {
984  return DL.getTypeAllocSize(const_cast<Type *>(Ty)).getFixedValue();
985 }
986 
987 auto HexagonVectorCombine::getTypeAlignment(Type *Ty) const -> int {
988  // The actual type may be shorter than the HVX vector, so determine
989  // the alignment based on subtarget info.
990  if (HST.isTypeForHVX(Ty))
991  return HST.getVectorLength();
992  return DL.getABITypeAlign(Ty).value();
993 }
994 
995 auto HexagonVectorCombine::getNullValue(Type *Ty) const -> Constant * {
996  assert(Ty->isIntOrIntVectorTy());
997  auto Zero = ConstantInt::get(Ty->getScalarType(), 0);
998  if (auto *VecTy = dyn_cast<VectorType>(Ty))
999  return ConstantVector::getSplat(VecTy->getElementCount(), Zero);
1000  return Zero;
1001 }
1002 
1003 auto HexagonVectorCombine::getFullValue(Type *Ty) const -> Constant * {
1004  assert(Ty->isIntOrIntVectorTy());
1005  auto Minus1 = ConstantInt::get(Ty->getScalarType(), -1);
1006  if (auto *VecTy = dyn_cast<VectorType>(Ty))
1007  return ConstantVector::getSplat(VecTy->getElementCount(), Minus1);
1008  return Minus1;
1009 }
1010 
1011 // Insert bytes [Start..Start+Length) of Src into Dst at byte Where.
1012 auto HexagonVectorCombine::insertb(IRBuilder<> &Builder, Value *Dst, Value *Src,
1013  int Start, int Length, int Where) const
1014  -> Value * {
1015  assert(isByteVecTy(Dst->getType()) && isByteVecTy(Src->getType()));
1016  int SrcLen = getSizeOf(Src);
1017  int DstLen = getSizeOf(Dst);
1018  assert(0 <= Start && Start + Length <= SrcLen);
1019  assert(0 <= Where && Where + Length <= DstLen);
1020 
1021  int P2Len = PowerOf2Ceil(SrcLen | DstLen);
1022  auto *Undef = UndefValue::get(getByteTy());
1023  Value *P2Src = vresize(Builder, Src, P2Len, Undef);
1024  Value *P2Dst = vresize(Builder, Dst, P2Len, Undef);
1025 
1026  SmallVector<int, 256> SMask(P2Len);
1027  for (int i = 0; i != P2Len; ++i) {
1028  // If i is in [Where, Where+Length), pick Src[Start+(i-Where)].
1029  // Otherwise, pick Dst[i];
1030  SMask[i] =
1031  (Where <= i && i < Where + Length) ? P2Len + Start + (i - Where) : i;
1032  }
1033 
1034  Value *P2Insert = Builder.CreateShuffleVector(P2Dst, P2Src, SMask);
1035  return vresize(Builder, P2Insert, DstLen, Undef);
1036 }
1037 
1038 auto HexagonVectorCombine::vlalignb(IRBuilder<> &Builder, Value *Lo, Value *Hi,
1039  Value *Amt) const -> Value * {
1040  assert(Lo->getType() == Hi->getType() && "Argument type mismatch");
1041  assert(isSectorTy(Hi->getType()));
1042  if (isZero(Amt))
1043  return Hi;
1044  int VecLen = getSizeOf(Hi);
1045  if (auto IntAmt = getIntValue(Amt))
1046  return getElementRange(Builder, Lo, Hi, VecLen - IntAmt->getSExtValue(),
1047  VecLen);
1048 
1049  if (HST.isTypeForHVX(Hi->getType())) {
1050  int HwLen = HST.getVectorLength();
1051  assert(VecLen == HwLen && "Expecting an exact HVX type");
1052  Intrinsic::ID V6_vlalignb = HwLen == 64
1053  ? Intrinsic::hexagon_V6_vlalignb
1054  : Intrinsic::hexagon_V6_vlalignb_128B;
1055  return createHvxIntrinsic(Builder, V6_vlalignb, Hi->getType(),
1056  {Hi, Lo, Amt});
1057  }
1058 
1059  if (VecLen == 4) {
1060  Value *Pair = concat(Builder, {Lo, Hi});
1061  Value *Shift = Builder.CreateLShr(Builder.CreateShl(Pair, Amt), 32);
1062  Value *Trunc = Builder.CreateTrunc(Shift, Type::getInt32Ty(F.getContext()));
1063  return Builder.CreateBitCast(Trunc, Hi->getType());
1064  }
1065  if (VecLen == 8) {
1066  Value *Sub = Builder.CreateSub(getConstInt(VecLen), Amt);
1067  return vralignb(Builder, Lo, Hi, Sub);
1068  }
1069  llvm_unreachable("Unexpected vector length");
1070 }
1071 
1072 auto HexagonVectorCombine::vralignb(IRBuilder<> &Builder, Value *Lo, Value *Hi,
1073  Value *Amt) const -> Value * {
1074  assert(Lo->getType() == Hi->getType() && "Argument type mismatch");
1075  assert(isSectorTy(Lo->getType()));
1076  if (isZero(Amt))
1077  return Lo;
1078  int VecLen = getSizeOf(Lo);
1079  if (auto IntAmt = getIntValue(Amt))
1080  return getElementRange(Builder, Lo, Hi, IntAmt->getSExtValue(), VecLen);
1081 
1082  if (HST.isTypeForHVX(Lo->getType())) {
1083  int HwLen = HST.getVectorLength();
1084  assert(VecLen == HwLen && "Expecting an exact HVX type");
1085  Intrinsic::ID V6_valignb = HwLen == 64 ? Intrinsic::hexagon_V6_valignb
1086  : Intrinsic::hexagon_V6_valignb_128B;
1087  return createHvxIntrinsic(Builder, V6_valignb, Lo->getType(),
1088  {Hi, Lo, Amt});
1089  }
1090 
1091  if (VecLen == 4) {
1092  Value *Pair = concat(Builder, {Lo, Hi});
1093  Value *Shift = Builder.CreateLShr(Pair, Amt);
1094  Value *Trunc = Builder.CreateTrunc(Shift, Type::getInt32Ty(F.getContext()));
1095  return Builder.CreateBitCast(Trunc, Lo->getType());
1096  }
1097  if (VecLen == 8) {
1098  Type *Int64Ty = Type::getInt64Ty(F.getContext());
1099  Value *Lo64 = Builder.CreateBitCast(Lo, Int64Ty);
1100  Value *Hi64 = Builder.CreateBitCast(Hi, Int64Ty);
1101  Function *FI = Intrinsic::getDeclaration(F.getParent(),
1102  Intrinsic::hexagon_S2_valignrb);
1103  Value *Call = Builder.CreateCall(FI, {Hi64, Lo64, Amt});
1104  return Builder.CreateBitCast(Call, Lo->getType());
1105  }
1106  llvm_unreachable("Unexpected vector length");
1107 }
1108 
1109 // Concatenates a sequence of vectors of the same type.
1111  ArrayRef<Value *> Vecs) const -> Value * {
1112  assert(!Vecs.empty());
1113  SmallVector<int, 256> SMask;
1114  std::vector<Value *> Work[2];
1115  int ThisW = 0, OtherW = 1;
1116 
1117  Work[ThisW].assign(Vecs.begin(), Vecs.end());
1118  while (Work[ThisW].size() > 1) {
1119  auto *Ty = cast<VectorType>(Work[ThisW].front()->getType());
1120  int ElemCount = Ty->getElementCount().getFixedValue();
1121  SMask.resize(ElemCount * 2);
1122  std::iota(SMask.begin(), SMask.end(), 0);
1123 
1124  Work[OtherW].clear();
1125  if (Work[ThisW].size() % 2 != 0)
1126  Work[ThisW].push_back(UndefValue::get(Ty));
1127  for (int i = 0, e = Work[ThisW].size(); i < e; i += 2) {
1128  Value *Joined = Builder.CreateShuffleVector(Work[ThisW][i],
1129  Work[ThisW][i + 1], SMask);
1130  Work[OtherW].push_back(Joined);
1131  }
1132  std::swap(ThisW, OtherW);
1133  }
1134 
1135  // Since there may have been some undefs appended to make shuffle operands
1136  // have the same type, perform the last shuffle to only pick the original
1137  // elements.
1138  SMask.resize(Vecs.size() * getSizeOf(Vecs.front()->getType()));
1139  std::iota(SMask.begin(), SMask.end(), 0);
1140  Value *Total = Work[OtherW].front();
1141  return Builder.CreateShuffleVector(Total, SMask);
1142 }
1143 
1144 auto HexagonVectorCombine::vresize(IRBuilder<> &Builder, Value *Val,
1145  int NewSize, Value *Pad) const -> Value * {
1146  assert(isa<VectorType>(Val->getType()));
1147  auto *ValTy = cast<VectorType>(Val->getType());
1148  assert(ValTy->getElementType() == Pad->getType());
1149 
1150  int CurSize = ValTy->getElementCount().getFixedValue();
1151  if (CurSize == NewSize)
1152  return Val;
1153  // Truncate?
1154  if (CurSize > NewSize)
1155  return getElementRange(Builder, Val, /*Unused*/ Val, 0, NewSize);
1156  // Extend.
1157  SmallVector<int, 128> SMask(NewSize);
1158  std::iota(SMask.begin(), SMask.begin() + CurSize, 0);
1159  std::fill(SMask.begin() + CurSize, SMask.end(), CurSize);
1160  Value *PadVec = Builder.CreateVectorSplat(CurSize, Pad);
1161  return Builder.CreateShuffleVector(Val, PadVec, SMask);
1162 }
1163 
1164 auto HexagonVectorCombine::rescale(IRBuilder<> &Builder, Value *Mask,
1165  Type *FromTy, Type *ToTy) const -> Value * {
1166  // Mask is a vector <N x i1>, where each element corresponds to an
1167  // element of FromTy. Remap it so that each element will correspond
1168  // to an element of ToTy.
1169  assert(isa<VectorType>(Mask->getType()));
1170 
1171  Type *FromSTy = FromTy->getScalarType();
1172  Type *ToSTy = ToTy->getScalarType();
1173  if (FromSTy == ToSTy)
1174  return Mask;
1175 
1176  int FromSize = getSizeOf(FromSTy);
1177  int ToSize = getSizeOf(ToSTy);
1178  assert(FromSize % ToSize == 0 || ToSize % FromSize == 0);
1179 
1180  auto *MaskTy = cast<VectorType>(Mask->getType());
1181  int FromCount = MaskTy->getElementCount().getFixedValue();
1182  int ToCount = (FromCount * FromSize) / ToSize;
1183  assert((FromCount * FromSize) % ToSize == 0);
1184 
1185  // Mask <N x i1> -> sext to <N x FromTy> -> bitcast to <M x ToTy> ->
1186  // -> trunc to <M x i1>.
1187  Value *Ext = Builder.CreateSExt(
1188  Mask, VectorType::get(FromSTy, FromCount, /*Scalable*/ false));
1189  Value *Cast = Builder.CreateBitCast(
1190  Ext, VectorType::get(ToSTy, ToCount, /*Scalable*/ false));
1191  return Builder.CreateTrunc(
1192  Cast, VectorType::get(getBoolTy(), ToCount, /*Scalable*/ false));
1193 }
1194 
1195 // Bitcast to bytes, and return least significant bits.
1196 auto HexagonVectorCombine::vlsb(IRBuilder<> &Builder, Value *Val) const
1197  -> Value * {
1198  Type *ScalarTy = Val->getType()->getScalarType();
1199  if (ScalarTy == getBoolTy())
1200  return Val;
1201 
1202  Value *Bytes = vbytes(Builder, Val);
1203  if (auto *VecTy = dyn_cast<VectorType>(Bytes->getType()))
1204  return Builder.CreateTrunc(Bytes, getBoolTy(getSizeOf(VecTy)));
1205  // If Bytes is a scalar (i.e. Val was a scalar byte), return i1, not
1206  // <1 x i1>.
1207  return Builder.CreateTrunc(Bytes, getBoolTy());
1208 }
1209 
1210 // Bitcast to bytes for non-bool. For bool, convert i1 -> i8.
1211 auto HexagonVectorCombine::vbytes(IRBuilder<> &Builder, Value *Val) const
1212  -> Value * {
1213  Type *ScalarTy = Val->getType()->getScalarType();
1214  if (ScalarTy == getByteTy())
1215  return Val;
1216 
1217  if (ScalarTy != getBoolTy())
1218  return Builder.CreateBitCast(Val, getByteTy(getSizeOf(Val)));
1219  // For bool, return a sext from i1 to i8.
1220  if (auto *VecTy = dyn_cast<VectorType>(Val->getType()))
1221  return Builder.CreateSExt(Val, VectorType::get(getByteTy(), VecTy));
1222  return Builder.CreateSExt(Val, getByteTy());
1223 }
1224 
1225 auto HexagonVectorCombine::createHvxIntrinsic(IRBuilder<> &Builder,
1226  Intrinsic::ID IntID, Type *RetTy,
1227  ArrayRef<Value *> Args) const
1228  -> Value * {
1229  int HwLen = HST.getVectorLength();
1230  Type *BoolTy = Type::getInt1Ty(F.getContext());
1231  Type *Int32Ty = Type::getInt32Ty(F.getContext());
1232  // HVX vector -> v16i32/v32i32
1233  // HVX vector predicate -> v512i1/v1024i1
1234  auto getTypeForIntrin = [&](Type *Ty) -> Type * {
1235  if (HST.isTypeForHVX(Ty, /*IncludeBool*/ true)) {
1236  Type *ElemTy = cast<VectorType>(Ty)->getElementType();
1237  if (ElemTy == Int32Ty)
1238  return Ty;
1239  if (ElemTy == BoolTy)
1240  return VectorType::get(BoolTy, 8 * HwLen, /*Scalable*/ false);
1241  return VectorType::get(Int32Ty, HwLen / 4, /*Scalable*/ false);
1242  }
1243  // Non-HVX type. It should be a scalar.
1244  assert(Ty == Int32Ty || Ty->isIntegerTy(64));
1245  return Ty;
1246  };
1247 
1248  auto getCast = [&](IRBuilder<> &Builder, Value *Val,
1249  Type *DestTy) -> Value * {
1250  Type *SrcTy = Val->getType();
1251  if (SrcTy == DestTy)
1252  return Val;
1253  if (HST.isTypeForHVX(SrcTy, /*IncludeBool*/ true)) {
1254  if (cast<VectorType>(SrcTy)->getElementType() == BoolTy) {
1255  // This should take care of casts the other way too, for example
1256  // v1024i1 -> v32i1.
1257  Intrinsic::ID TC = HwLen == 64
1258  ? Intrinsic::hexagon_V6_pred_typecast
1259  : Intrinsic::hexagon_V6_pred_typecast_128B;
1260  Function *FI = Intrinsic::getDeclaration(F.getParent(), TC,
1261  {DestTy, Val->getType()});
1262  return Builder.CreateCall(FI, {Val});
1263  }
1264  // Non-predicate HVX vector.
1265  return Builder.CreateBitCast(Val, DestTy);
1266  }
1267  // Non-HVX type. It should be a scalar, and it should already have
1268  // a valid type.
1269  llvm_unreachable("Unexpected type");
1270  };
1271 
1272  SmallVector<Value *, 4> IntOps;
1273  for (Value *A : Args)
1274  IntOps.push_back(getCast(Builder, A, getTypeForIntrin(A->getType())));
1275  Function *FI = Intrinsic::getDeclaration(F.getParent(), IntID);
1276  Value *Call = Builder.CreateCall(FI, IntOps);
1277 
1278  Type *CallTy = Call->getType();
1279  if (CallTy == RetTy)
1280  return Call;
1281  // Scalar types should have RetTy matching the call return type.
1282  assert(HST.isTypeForHVX(CallTy, /*IncludeBool*/ true));
1283  if (cast<VectorType>(CallTy)->getElementType() == BoolTy)
1284  return getCast(Builder, Call, RetTy);
1285  return Builder.CreateBitCast(Call, RetTy);
1286 }
1287 
1288 auto HexagonVectorCombine::calculatePointerDifference(Value *Ptr0,
1289  Value *Ptr1) const
1290  -> Optional<int> {
1291  struct Builder : IRBuilder<> {
1292  Builder(BasicBlock *B) : IRBuilder<>(B) {}
1293  ~Builder() {
1294  for (Instruction *I : llvm::reverse(ToErase))
1295  I->eraseFromParent();
1296  }
1298  };
1299 
1300 #define CallBuilder(B, F) \
1301  [&](auto &B_) { \
1302  Value *V = B_.F; \
1303  if (auto *I = dyn_cast<Instruction>(V)) \
1304  B_.ToErase.push_back(I); \
1305  return V; \
1306  }(B)
1307 
1308  auto Simplify = [&](Value *V) {
1309  if (auto *I = dyn_cast<Instruction>(V)) {
1310  SimplifyQuery Q(DL, &TLI, &DT, &AC, I);
1311  if (Value *S = SimplifyInstruction(I, Q))
1312  return S;
1313  }
1314  return V;
1315  };
1316 
1317  auto StripBitCast = [](Value *V) {
1318  while (auto *C = dyn_cast<BitCastInst>(V))
1319  V = C->getOperand(0);
1320  return V;
1321  };
1322 
1323  Ptr0 = StripBitCast(Ptr0);
1324  Ptr1 = StripBitCast(Ptr1);
1325  if (!isa<GetElementPtrInst>(Ptr0) || !isa<GetElementPtrInst>(Ptr1))
1326  return None;
1327 
1328  auto *Gep0 = cast<GetElementPtrInst>(Ptr0);
1329  auto *Gep1 = cast<GetElementPtrInst>(Ptr1);
1330  if (Gep0->getPointerOperand() != Gep1->getPointerOperand())
1331  return None;
1332 
1333  Builder B(Gep0->getParent());
1334  int Scale = getAllocSizeOf(Gep0->getSourceElementType());
1335 
1336  // FIXME: for now only check GEPs with a single index.
1337  if (Gep0->getNumOperands() != 2 || Gep1->getNumOperands() != 2)
1338  return None;
1339 
1340  Value *Idx0 = Gep0->getOperand(1);
1341  Value *Idx1 = Gep1->getOperand(1);
1342 
1343  // First, try to simplify the subtraction directly.
1344  if (auto *Diff = dyn_cast<ConstantInt>(
1345  Simplify(CallBuilder(B, CreateSub(Idx0, Idx1)))))
1346  return Diff->getSExtValue() * Scale;
1347 
1348  KnownBits Known0 = computeKnownBits(Idx0, DL, 0, &AC, Gep0, &DT);
1349  KnownBits Known1 = computeKnownBits(Idx1, DL, 0, &AC, Gep1, &DT);
1350  APInt Unknown = ~(Known0.Zero | Known0.One) | ~(Known1.Zero | Known1.One);
1351  if (Unknown.isAllOnesValue())
1352  return None;
1353 
1354  Value *MaskU = ConstantInt::get(Idx0->getType(), Unknown);
1355  Value *AndU0 = Simplify(CallBuilder(B, CreateAnd(Idx0, MaskU)));
1356  Value *AndU1 = Simplify(CallBuilder(B, CreateAnd(Idx1, MaskU)));
1357  Value *SubU = Simplify(CallBuilder(B, CreateSub(AndU0, AndU1)));
1358  int Diff0 = 0;
1359  if (auto *C = dyn_cast<ConstantInt>(SubU)) {
1360  Diff0 = C->getSExtValue();
1361  } else {
1362  return None;
1363  }
1364 
1365  Value *MaskK = ConstantInt::get(MaskU->getType(), ~Unknown);
1366  Value *AndK0 = Simplify(CallBuilder(B, CreateAnd(Idx0, MaskK)));
1367  Value *AndK1 = Simplify(CallBuilder(B, CreateAnd(Idx1, MaskK)));
1368  Value *SubK = Simplify(CallBuilder(B, CreateSub(AndK0, AndK1)));
1369  int Diff1 = 0;
1370  if (auto *C = dyn_cast<ConstantInt>(SubK)) {
1371  Diff1 = C->getSExtValue();
1372  } else {
1373  return None;
1374  }
1375 
1376  return (Diff0 + Diff1) * Scale;
1377 
1378 #undef CallBuilder
1379 }
1380 
1381 template <typename T>
1382 auto HexagonVectorCombine::isSafeToMoveBeforeInBB(const Instruction &In,
1384  const T &Ignore) const
1385  -> bool {
1386  auto getLocOrNone = [this](const Instruction &I) -> Optional<MemoryLocation> {
1387  if (const auto *II = dyn_cast<IntrinsicInst>(&I)) {
1388  switch (II->getIntrinsicID()) {
1389  case Intrinsic::masked_load:
1390  return MemoryLocation::getForArgument(II, 0, TLI);
1391  case Intrinsic::masked_store:
1392  return MemoryLocation::getForArgument(II, 1, TLI);
1393  }
1394  }
1395  return MemoryLocation::getOrNone(&I);
1396  };
1397 
1398  // The source and the destination must be in the same basic block.
1399  const BasicBlock &Block = *In.getParent();
1400  assert(Block.begin() == To || Block.end() == To || To->getParent() == &Block);
1401  // No PHIs.
1402  if (isa<PHINode>(In) || (To != Block.end() && isa<PHINode>(*To)))
1403  return false;
1404 
1405  if (!mayBeMemoryDependent(In))
1406  return true;
1407  bool MayWrite = In.mayWriteToMemory();
1408  auto MaybeLoc = getLocOrNone(In);
1409 
1410  auto From = In.getIterator();
1411  if (From == To)
1412  return true;
1413  bool MoveUp = (To != Block.end() && To->comesBefore(&In));
1414  auto Range =
1415  MoveUp ? std::make_pair(To, From) : std::make_pair(std::next(From), To);
1416  for (auto It = Range.first; It != Range.second; ++It) {
1417  const Instruction &I = *It;
1418  if (llvm::is_contained(Ignore, &I))
1419  continue;
1420  // assume intrinsic can be ignored
1421  if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
1422  if (II->getIntrinsicID() == Intrinsic::assume)
1423  continue;
1424  }
1425  // Parts based on isSafeToMoveBefore from CoveMoverUtils.cpp.
1426  if (I.mayThrow())
1427  return false;
1428  if (auto *CB = dyn_cast<CallBase>(&I)) {
1429  if (!CB->hasFnAttr(Attribute::WillReturn))
1430  return false;
1431  if (!CB->hasFnAttr(Attribute::NoSync))
1432  return false;
1433  }
1434  if (I.mayReadOrWriteMemory()) {
1435  auto MaybeLocI = getLocOrNone(I);
1436  if (MayWrite || I.mayWriteToMemory()) {
1437  if (!MaybeLoc || !MaybeLocI)
1438  return false;
1439  if (!AA.isNoAlias(*MaybeLoc, *MaybeLocI))
1440  return false;
1441  }
1442  }
1443  }
1444  return true;
1445 }
1446 
1447 #ifndef NDEBUG
1448 auto HexagonVectorCombine::isByteVecTy(Type *Ty) const -> bool {
1449  if (auto *VecTy = dyn_cast<VectorType>(Ty))
1450  return VecTy->getElementType() == getByteTy();
1451  return false;
1452 }
1453 
1454 auto HexagonVectorCombine::isSectorTy(Type *Ty) const -> bool {
1455  if (!isByteVecTy(Ty))
1456  return false;
1457  int Size = getSizeOf(Ty);
1458  if (HST.isTypeForHVX(Ty))
1459  return Size == static_cast<int>(HST.getVectorLength());
1460  return Size == 4 || Size == 8;
1461 }
1462 #endif
1463 
1464 auto HexagonVectorCombine::getElementRange(IRBuilder<> &Builder, Value *Lo,
1465  Value *Hi, int Start,
1466  int Length) const -> Value * {
1467  assert(0 <= Start && Start < Length);
1468  SmallVector<int, 128> SMask(Length);
1469  std::iota(SMask.begin(), SMask.end(), Start);
1470  return Builder.CreateShuffleVector(Lo, Hi, SMask);
1471 }
1472 
1473 // Pass management.
1474 
1475 namespace llvm {
1478 } // namespace llvm
1479 
1480 namespace {
1481 class HexagonVectorCombineLegacy : public FunctionPass {
1482 public:
1483  static char ID;
1484 
1485  HexagonVectorCombineLegacy() : FunctionPass(ID) {}
1486 
1487  StringRef getPassName() const override { return "Hexagon Vector Combine"; }
1488 
1489  void getAnalysisUsage(AnalysisUsage &AU) const override {
1490  AU.setPreservesCFG();
1497  }
1498 
1499  bool runOnFunction(Function &F) override {
1500  if (skipFunction(F))
1501  return false;
1502  AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
1503  AssumptionCache &AC =
1504  getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
1505  DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1506  TargetLibraryInfo &TLI =
1507  getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
1508  auto &TM = getAnalysis<TargetPassConfig>().getTM<HexagonTargetMachine>();
1509  HexagonVectorCombine HVC(F, AA, AC, DT, TLI, TM);
1510  return HVC.run();
1511  }
1512 };
1513 } // namespace
1514 
1516 
1517 INITIALIZE_PASS_BEGIN(HexagonVectorCombineLegacy, DEBUG_TYPE,
1518  "Hexagon Vector Combine", false, false)
1524 INITIALIZE_PASS_END(HexagonVectorCombineLegacy, DEBUG_TYPE,
1525  "Hexagon Vector Combine", false, false)
1526 
1528  return new HexagonVectorCombineLegacy();
1529 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
i
i
Definition: README.txt:29
llvm::alignTo
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:148
Int32Ty
IntegerType * Int32Ty
Definition: NVVMIntrRange.cpp:67
AssumptionCache.h
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(HexagonVectorCombineLegacy, DEBUG_TYPE, "Hexagon Vector Combine", false, false) INITIALIZE_PASS_END(HexagonVectorCombineLegacy
MathExtras.h
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
llvm::sys::path::const_iterator::end
friend const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:233
llvm::Type::getInt1Ty
static IntegerType * getInt1Ty(LLVMContext &C)
Definition: Type.cpp:200
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
Optional.h
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:112
llvm::Intrinsic::getDeclaration
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1379
Metadata.h
llvm::Type::getInt8PtrTy
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:255
IntrinsicInst.h
llvm::SimplifyQuery
Definition: InstructionSimplify.h:94
T
llvm::Function
Definition: Function.h:61
Pass.h
llvm::KnownBits::Zero
APInt Zero
Definition: KnownBits.h:24
llvm::Type::getScalarType
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:319
Ignore
ReachingDefAnalysis InstSet InstSet & Ignore
Definition: ARMLowOverheadLoops.cpp:545
llvm::SmallVector< Value *, 8 >
llvm::RISCVFenceField::W
@ W
Definition: RISCVBaseInfo.h:199
llvm::createHexagonVectorCombineLegacyPass
FunctionPass * createHexagonVectorCombineLegacyPass()
Definition: HexagonVectorCombine.cpp:1527
HexagonSubtarget.h
llvm::MipsISD::Lo
@ Lo
Definition: MipsISelLowering.h:79
llvm::HexagonTargetMachine
Definition: HexagonTargetMachine.h:25
llvm::initializeHexagonVectorCombineLegacyPass
void initializeHexagonVectorCombineLegacyPass(PassRegistry &)
llvm::IRBuilder<>
llvm::SPII::Store
@ Store
Definition: SparcInstrInfo.h:33
llvm::erase_if
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:1732
ValueTracking.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
APInt.h
Shift
bool Shift
Definition: README.txt:468
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
DenseMap.h
llvm::reverse
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:333
llvm::sys::path::end
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:233
llvm::sys::path::begin
const_iterator begin(StringRef path, Style style=Style::native)
Get begin iterator over path.
Definition: Path.cpp:224
llvm::Optional
Definition: APInt.h:33
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:81
STLExtras.h
llvm::PowerOf2Ceil
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition: MathExtras.h:702
llvm::propagateMetadata
Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
Definition: VectorUtils.cpp:726
HexagonTargetMachine.h
LLVM_ATTRIBUTE_UNUSED
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:188
llvm::BitmaskEnumDetail::Mask
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
llvm::Type::getInt8Ty
static IntegerType * getInt8Ty(LLVMContext &C)
Definition: Type.cpp:201
llvm::Data
@ Data
Definition: SIMachineScheduler.h:55
p
the resulting code requires compare and branches when and if * p
Definition: README.txt:396
llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:203
F
#define F(x, y, z)
Definition: MD5.cpp:56
llvm::RISCVFenceField::R
@ R
Definition: RISCVBaseInfo.h:198
KnownBits.h
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
llvm::MipsISD::Hi
@ Hi
Definition: MipsISelLowering.h:75
isUndef
static bool isUndef(ArrayRef< int > Mask)
Definition: HexagonISelDAGToDAGHVX.cpp:912
AliasAnalysis.h
llvm::ARMBuildAttrs::Section
@ Section
Legacy Tags.
Definition: ARMBuildAttributes.h:78
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
llvm::Intrinsic::getType
FunctionType * getType(LLVMContext &Context, ID id, ArrayRef< Type * > Tys=None)
Return the function type for an intrinsic.
Definition: Function.cpp:1335
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1551
llvm::RecurKind::And
@ And
Bitwise or logical AND of integers.
TargetMachine.h
isZero
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition: Lint.cpp:519
llvm::AAResults
Definition: AliasAnalysis.h:508
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::RegState::Undef
@ Undef
Value of the register doesn't matter.
Definition: MachineInstrBuilder.h:52
Intrinsics.h
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::ARM_PROC::A
@ A
Definition: ARMBaseInfo.h:34
llvm::KnownBits::One
APInt One
Definition: KnownBits.h:25
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::DomTreeNodeBase::children
iterator_range< iterator > children()
Definition: GenericDomTree.h:83
TargetLibraryInfo.h
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:237
false
Definition: StackSlotColoring.cpp:142
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::IntegerType
Class to represent integer types.
Definition: DerivedTypes.h:40
llvm::Instruction
Definition: Instruction.h:45
llvm::SimplifyInstruction
Value * SimplifyInstruction(Instruction *I, const SimplifyQuery &Q, OptimizationRemarkEmitter *ORE=nullptr)
See if we can compute a simplified version of this instruction.
Definition: InstructionSimplify.cpp:6293
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:287
llvm::PassRegistry
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:38
llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:53
llvm::ConstantVector::getSplat
static Constant * getSplat(ElementCount EC, Constant *Elt)
Return a ConstantVector with the specified constant in each element.
Definition: Constants.cpp:1412
llvm::SmallVectorImpl::resize
void resize(size_type N)
Definition: SmallVector.h:606
llvm::operator<<
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:230
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1771
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:900
Info
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
Align
uint64_t Align
Definition: ELFObjHandler.cpp:83
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::MCID::Call
@ Call
Definition: MCInstrDesc.h:153
llvm::None
const NoneType None
Definition: None.h:23
llvm::mayBeMemoryDependent
bool mayBeMemoryDependent(const Instruction &I)
Returns true if the result or effects of the given instructions I depend on or influence global memor...
Definition: ValueTracking.cpp:4696
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:201
llvm::TargetPassConfig
Target-Independent Code Generator Pass Configuration Options.
Definition: TargetPassConfig.h:84
llvm::VectorType
Base class of all SIMD vector types.
Definition: DerivedTypes.h:389
G
const DataFlowGraph & G
Definition: RDFGraph.cpp:202
llvm::tgtok::In
@ In
Definition: TGLexer.h:51
Combine
Hexagon Vector Combine
Definition: HexagonVectorCombine.cpp:1525
VectorUtils.h
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:304
llvm::MipsISD::Ext
@ Ext
Definition: MipsISelLowering.h:156
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::cl::values
ValuesClass values(OptsTy... Options)
Helper to build a ValuesClass by forwarding a variable number of arguments as an initializer list to ...
Definition: CommandLine.h:699
llvm::MemoryLocation::getOrNone
static Optional< MemoryLocation > getOrNone(const Instruction *Inst)
Definition: MemoryLocation.cpp:78
llvm::DomTreeNodeBase::getBlock
NodeT * getBlock() const
Definition: GenericDomTree.h:88
llvm::TargetLibraryInfoWrapperPass
Definition: TargetLibraryInfo.h:463
llvm::pdb::Unknown
@ Unknown
Definition: PDBTypes.h:395
D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
const
aarch64 promote const
Definition: AArch64PromoteConstant.cpp:232
Addr
uint64_t Addr
Definition: ELFObjHandler.cpp:80
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
move
compiles ldr LCPI1_0 ldr ldr mov lsr tst moveq r1 ldr LCPI1_1 and r0 bx lr It would be better to do something like to fold the shift into the conditional move
Definition: README.txt:546
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::PointerType
Class to represent pointers.
Definition: DerivedTypes.h:632
llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1616
ArrayRef.h
llvm::concat
detail::concat_range< ValueT, RangeTs... > concat(RangeTs &&... Ranges)
Concatenated range across two or more ranges.
Definition: STLExtras.h:1028
TargetPassConfig.h
llvm::computeKnownBits
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, OptimizationRemarkEmitter *ORE=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
Definition: ValueTracking.cpp:213
llvm::sys::path::const_iterator::begin
friend const_iterator begin(StringRef path, Style style)
Get begin iterator over path.
Definition: Path.cpp:224
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:79
llvm::move
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1609
getLocation
static MemoryLocation getLocation(Instruction *I, AAResults *AA)
Definition: SLPVectorizer.cpp:520
llvm::MemoryLocation::getForArgument
static MemoryLocation getForArgument(const CallBase *Call, unsigned ArgIdx, const TargetLibraryInfo *TLI)
Return a location representing a particular argument of a call.
Definition: MemoryLocation.cpp:131
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:840
llvm::AMDGPU::IsaInfo::TargetIDSetting::Off
@ Off
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::size
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1532
llvm::AssumptionCacheTracker
An immutable pass that tracks lazily created AssumptionCache objects.
Definition: AssumptionCache.h:200
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::any_of
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1558
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:253
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:41
Simplify
assume Assume Simplify
Definition: AssumeBundleBuilder.cpp:604
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:136
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
llvm::append_range
void append_range(Container &C, Range &&R)
Wrapper function to append a range to a container.
Definition: STLExtras.h:1748
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
DEBUG_TYPE
#define DEBUG_TYPE
Definition: HexagonVectorCombine.cpp:50
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::DomTreeNodeBase< BasicBlock >
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:175
llvm::SPII::Load
@ Load
Definition: SparcInstrInfo.h:32
CallBuilder
#define CallBuilder(B, F)
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
j
return j(j<< 16)
llvm::Type::getInt64Ty
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:204
llvm::ConstantInt::getTrue
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:848
std
Definition: BitVector.h:838
llvm::KnownBits
Definition: KnownBits.h:23
llvm::copy_if
OutputIt copy_if(R &&Range, OutputIt Out, UnaryPredicate P)
Provide wrappers to std::copy_if which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1597
H
#define H(x, y, z)
Definition: MD5.cpp:58
llvm::SmallVectorImpl::assign
void assign(size_type NumElts, ValueParamT Elt)
Definition: SmallVector.h:669
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:321
llvm::Type::isIntOrIntVectorTy
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:207
llvm::ConstantInt::getSigned
static ConstantInt * getSigned(IntegerType *Ty, int64_t V)
Return a ConstantInt with the specified value for the specified type.
Definition: Constants.cpp:914
llvm::Type::getPointerTo
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
Definition: Type.cpp:738
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:219
llvm::AMDGPU::Hwreg::Width
Width
Definition: SIDefines.h:410
llvm::pdb::DbgHeaderType::Max
@ Max
llvm::TargetMachine::getSubtargetImpl
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
Definition: TargetMachine.h:135
SmallVector.h
Dominators.h
llvm::AAResultsWrapperPass
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Definition: AliasAnalysis.h:1336
InstructionSimplify.h
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
shift
http eax xorl edx cl sete al setne dl sall eax sall edx But that requires good bit subreg support this might be better It s an extra shift
Definition: README.txt:30
llvm::HexagonSubtarget
Definition: HexagonSubtarget.h:43
llvm::Pass::getAnalysisUsage
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:93
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
From
BlockVerifier::State From
Definition: BlockVerifier.cpp:55
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:389
raw_ostream.h
llvm::pdb::PDB_SymType::Block
@ Block
InitializePasses.h
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
llvm::VectorType::get
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:632
llvm::BasicBlock::const_iterator
InstListType::const_iterator const_iterator
Definition: BasicBlock.h:91
llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37