LLVM  14.0.0git
LoadStoreVectorizer.cpp
Go to the documentation of this file.
1 //===- LoadStoreVectorizer.cpp - GPU Load & Store Vectorizer --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass merges loads/stores to/from sequential memory addresses into vector
10 // loads/stores. Although there's nothing GPU-specific in here, this pass is
11 // motivated by the microarchitectural quirks of nVidia and AMD GPUs.
12 //
13 // (For simplicity below we talk about loads only, but everything also applies
14 // to stores.)
15 //
16 // This pass is intended to be run late in the pipeline, after other
17 // vectorization opportunities have been exploited. So the assumption here is
18 // that immediately following our new vector load we'll need to extract out the
19 // individual elements of the load, so we can operate on them individually.
20 //
21 // On CPUs this transformation is usually not beneficial, because extracting the
22 // elements of a vector register is expensive on most architectures. It's
23 // usually better just to load each element individually into its own scalar
24 // register.
25 //
26 // However, nVidia and AMD GPUs don't have proper vector registers. Instead, a
27 // "vector load" loads directly into a series of scalar registers. In effect,
28 // extracting the elements of the vector is free. It's therefore always
29 // beneficial to vectorize a sequence of loads on these architectures.
30 //
31 // Vectorizing (perhaps a better name might be "coalescing") loads can have
32 // large performance impacts on GPU kernels, and opportunities for vectorizing
33 // are common in GPU code. This pass tries very hard to find such
34 // opportunities; its runtime is quadratic in the number of loads in a BB.
35 //
36 // Some CPU architectures, such as ARM, have instructions that load into
37 // multiple scalar registers, similar to a GPU vectorized load. In theory ARM
38 // could use this pass (with some modifications), but currently it implements
39 // its own pass to do something similar to what we do here.
40 
42 #include "llvm/ADT/APInt.h"
43 #include "llvm/ADT/ArrayRef.h"
44 #include "llvm/ADT/MapVector.h"
46 #include "llvm/ADT/STLExtras.h"
47 #include "llvm/ADT/SmallPtrSet.h"
48 #include "llvm/ADT/SmallVector.h"
49 #include "llvm/ADT/Statistic.h"
58 #include "llvm/IR/Attributes.h"
59 #include "llvm/IR/BasicBlock.h"
60 #include "llvm/IR/Constants.h"
61 #include "llvm/IR/DataLayout.h"
62 #include "llvm/IR/DerivedTypes.h"
63 #include "llvm/IR/Dominators.h"
64 #include "llvm/IR/Function.h"
65 #include "llvm/IR/IRBuilder.h"
66 #include "llvm/IR/InstrTypes.h"
67 #include "llvm/IR/Instruction.h"
68 #include "llvm/IR/Instructions.h"
69 #include "llvm/IR/IntrinsicInst.h"
70 #include "llvm/IR/Module.h"
71 #include "llvm/IR/Type.h"
72 #include "llvm/IR/User.h"
73 #include "llvm/IR/Value.h"
74 #include "llvm/InitializePasses.h"
75 #include "llvm/Pass.h"
76 #include "llvm/Support/Casting.h"
77 #include "llvm/Support/Debug.h"
78 #include "llvm/Support/KnownBits.h"
83 #include <algorithm>
84 #include <cassert>
85 #include <cstdlib>
86 #include <tuple>
87 #include <utility>
88 
89 using namespace llvm;
90 
91 #define DEBUG_TYPE "load-store-vectorizer"
92 
93 STATISTIC(NumVectorInstructions, "Number of vector accesses generated");
94 STATISTIC(NumScalarsVectorized, "Number of scalar accesses vectorized");
95 
96 // FIXME: Assuming stack alignment of 4 is always good enough
97 static const unsigned StackAdjustedAlignment = 4;
98 
99 namespace {
100 
101 /// ChainID is an arbitrary token that is allowed to be different only for the
102 /// accesses that are guaranteed to be considered non-consecutive by
103 /// Vectorizer::isConsecutiveAccess. It's used for grouping instructions
104 /// together and reducing the number of instructions the main search operates on
105 /// at a time, i.e. this is to reduce compile time and nothing else as the main
106 /// search has O(n^2) time complexity. The underlying type of ChainID should not
107 /// be relied upon.
108 using ChainID = const Value *;
109 using InstrList = SmallVector<Instruction *, 8>;
110 using InstrListMap = MapVector<ChainID, InstrList>;
111 
112 class Vectorizer {
113  Function &F;
114  AliasAnalysis &AA;
115  AssumptionCache &AC;
116  DominatorTree &DT;
117  ScalarEvolution &SE;
119  const DataLayout &DL;
121 
122 public:
123  Vectorizer(Function &F, AliasAnalysis &AA, AssumptionCache &AC,
125  : F(F), AA(AA), AC(AC), DT(DT), SE(SE), TTI(TTI),
126  DL(F.getParent()->getDataLayout()), Builder(SE.getContext()) {}
127 
128  bool run();
129 
130 private:
131  unsigned getPointerAddressSpace(Value *I);
132 
133  static const unsigned MaxDepth = 3;
134 
135  bool isConsecutiveAccess(Value *A, Value *B);
136  bool areConsecutivePointers(Value *PtrA, Value *PtrB, APInt PtrDelta,
137  unsigned Depth = 0) const;
138  bool lookThroughComplexAddresses(Value *PtrA, Value *PtrB, APInt PtrDelta,
139  unsigned Depth) const;
140  bool lookThroughSelects(Value *PtrA, Value *PtrB, const APInt &PtrDelta,
141  unsigned Depth) const;
142 
143  /// After vectorization, reorder the instructions that I depends on
144  /// (the instructions defining its operands), to ensure they dominate I.
145  void reorder(Instruction *I);
146 
147  /// Returns the first and the last instructions in Chain.
148  std::pair<BasicBlock::iterator, BasicBlock::iterator>
149  getBoundaryInstrs(ArrayRef<Instruction *> Chain);
150 
151  /// Erases the original instructions after vectorizing.
152  void eraseInstructions(ArrayRef<Instruction *> Chain);
153 
154  /// "Legalize" the vector type that would be produced by combining \p
155  /// ElementSizeBits elements in \p Chain. Break into two pieces such that the
156  /// total size of each piece is 1, 2 or a multiple of 4 bytes. \p Chain is
157  /// expected to have more than 4 elements.
158  std::pair<ArrayRef<Instruction *>, ArrayRef<Instruction *>>
159  splitOddVectorElts(ArrayRef<Instruction *> Chain, unsigned ElementSizeBits);
160 
161  /// Finds the largest prefix of Chain that's vectorizable, checking for
162  /// intervening instructions which may affect the memory accessed by the
163  /// instructions within Chain.
164  ///
165  /// The elements of \p Chain must be all loads or all stores and must be in
166  /// address order.
167  ArrayRef<Instruction *> getVectorizablePrefix(ArrayRef<Instruction *> Chain);
168 
169  /// Collects load and store instructions to vectorize.
170  std::pair<InstrListMap, InstrListMap> collectInstructions(BasicBlock *BB);
171 
172  /// Processes the collected instructions, the \p Map. The values of \p Map
173  /// should be all loads or all stores.
174  bool vectorizeChains(InstrListMap &Map);
175 
176  /// Finds the load/stores to consecutive memory addresses and vectorizes them.
177  bool vectorizeInstructions(ArrayRef<Instruction *> Instrs);
178 
179  /// Vectorizes the load instructions in Chain.
180  bool
181  vectorizeLoadChain(ArrayRef<Instruction *> Chain,
182  SmallPtrSet<Instruction *, 16> *InstructionsProcessed);
183 
184  /// Vectorizes the store instructions in Chain.
185  bool
186  vectorizeStoreChain(ArrayRef<Instruction *> Chain,
187  SmallPtrSet<Instruction *, 16> *InstructionsProcessed);
188 
189  /// Check if this load/store access is misaligned accesses.
190  bool accessIsMisaligned(unsigned SzInBytes, unsigned AddressSpace,
191  Align Alignment);
192 };
193 
194 class LoadStoreVectorizerLegacyPass : public FunctionPass {
195 public:
196  static char ID;
197 
198  LoadStoreVectorizerLegacyPass() : FunctionPass(ID) {
200  }
201 
202  bool runOnFunction(Function &F) override;
203 
204  StringRef getPassName() const override {
205  return "GPU Load and Store Vectorizer";
206  }
207 
208  void getAnalysisUsage(AnalysisUsage &AU) const override {
214  AU.setPreservesCFG();
215  }
216 };
217 
218 } // end anonymous namespace
219 
221 
222 INITIALIZE_PASS_BEGIN(LoadStoreVectorizerLegacyPass, DEBUG_TYPE,
223  "Vectorize load and Store instructions", false, false)
230 INITIALIZE_PASS_END(LoadStoreVectorizerLegacyPass, DEBUG_TYPE,
231  "Vectorize load and store instructions", false, false)
232 
234  return new LoadStoreVectorizerLegacyPass();
235 }
236 
238  // Don't vectorize when the attribute NoImplicitFloat is used.
239  if (skipFunction(F) || F.hasFnAttribute(Attribute::NoImplicitFloat))
240  return false;
241 
242  AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
243  DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
244  ScalarEvolution &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
246  getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
247 
248  AssumptionCache &AC =
249  getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
250 
251  Vectorizer V(F, AA, AC, DT, SE, TTI);
252  return V.run();
253 }
254 
256  // Don't vectorize when the attribute NoImplicitFloat is used.
257  if (F.hasFnAttribute(Attribute::NoImplicitFloat))
258  return PreservedAnalyses::all();
259 
260  AliasAnalysis &AA = AM.getResult<AAManager>(F);
265 
266  Vectorizer V(F, AA, AC, DT, SE, TTI);
267  bool Changed = V.run();
269  PA.preserveSet<CFGAnalyses>();
270  return Changed ? PA : PreservedAnalyses::all();
271 }
272 
273 // The real propagateMetadata expects a SmallVector<Value*>, but we deal in
274 // vectors of Instructions.
276  SmallVector<Value *, 8> VL(IL.begin(), IL.end());
277  propagateMetadata(I, VL);
278 }
279 
280 // Vectorizer Implementation
281 bool Vectorizer::run() {
282  bool Changed = false;
283 
284  // Scan the blocks in the function in post order.
285  for (BasicBlock *BB : post_order(&F)) {
286  InstrListMap LoadRefs, StoreRefs;
287  std::tie(LoadRefs, StoreRefs) = collectInstructions(BB);
288  Changed |= vectorizeChains(LoadRefs);
289  Changed |= vectorizeChains(StoreRefs);
290  }
291 
292  return Changed;
293 }
294 
295 unsigned Vectorizer::getPointerAddressSpace(Value *I) {
296  if (LoadInst *L = dyn_cast<LoadInst>(I))
297  return L->getPointerAddressSpace();
298  if (StoreInst *S = dyn_cast<StoreInst>(I))
299  return S->getPointerAddressSpace();
300  return -1;
301 }
302 
303 // FIXME: Merge with llvm::isConsecutiveAccess
307  unsigned ASA = getPointerAddressSpace(A);
308  unsigned ASB = getPointerAddressSpace(B);
309 
310  // Check that the address spaces match and that the pointers are valid.
311  if (!PtrA || !PtrB || (ASA != ASB))
312  return false;
313 
314  // Make sure that A and B are different pointers of the same size type.
315  Type *PtrATy = getLoadStoreType(A);
316  Type *PtrBTy = getLoadStoreType(B);
317  if (PtrA == PtrB ||
318  PtrATy->isVectorTy() != PtrBTy->isVectorTy() ||
319  DL.getTypeStoreSize(PtrATy) != DL.getTypeStoreSize(PtrBTy) ||
320  DL.getTypeStoreSize(PtrATy->getScalarType()) !=
321  DL.getTypeStoreSize(PtrBTy->getScalarType()))
322  return false;
323 
324  unsigned PtrBitWidth = DL.getPointerSizeInBits(ASA);
325  APInt Size(PtrBitWidth, DL.getTypeStoreSize(PtrATy));
326 
327  return areConsecutivePointers(PtrA, PtrB, Size);
328 }
329 
330 bool Vectorizer::areConsecutivePointers(Value *PtrA, Value *PtrB,
331  APInt PtrDelta, unsigned Depth) const {
332  unsigned PtrBitWidth = DL.getPointerTypeSizeInBits(PtrA->getType());
333  APInt OffsetA(PtrBitWidth, 0);
334  APInt OffsetB(PtrBitWidth, 0);
335  PtrA = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);
336  PtrB = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB);
337 
338  unsigned NewPtrBitWidth = DL.getTypeStoreSizeInBits(PtrA->getType());
339 
340  if (NewPtrBitWidth != DL.getTypeStoreSizeInBits(PtrB->getType()))
341  return false;
342 
343  // In case if we have to shrink the pointer
344  // stripAndAccumulateInBoundsConstantOffsets should properly handle a
345  // possible overflow and the value should fit into a smallest data type
346  // used in the cast/gep chain.
347  assert(OffsetA.getMinSignedBits() <= NewPtrBitWidth &&
348  OffsetB.getMinSignedBits() <= NewPtrBitWidth);
349 
350  OffsetA = OffsetA.sextOrTrunc(NewPtrBitWidth);
351  OffsetB = OffsetB.sextOrTrunc(NewPtrBitWidth);
352  PtrDelta = PtrDelta.sextOrTrunc(NewPtrBitWidth);
353 
354  APInt OffsetDelta = OffsetB - OffsetA;
355 
356  // Check if they are based on the same pointer. That makes the offsets
357  // sufficient.
358  if (PtrA == PtrB)
359  return OffsetDelta == PtrDelta;
360 
361  // Compute the necessary base pointer delta to have the necessary final delta
362  // equal to the pointer delta requested.
363  APInt BaseDelta = PtrDelta - OffsetDelta;
364 
365  // Compute the distance with SCEV between the base pointers.
366  const SCEV *PtrSCEVA = SE.getSCEV(PtrA);
367  const SCEV *PtrSCEVB = SE.getSCEV(PtrB);
368  const SCEV *C = SE.getConstant(BaseDelta);
369  const SCEV *X = SE.getAddExpr(PtrSCEVA, C);
370  if (X == PtrSCEVB)
371  return true;
372 
373  // The above check will not catch the cases where one of the pointers is
374  // factorized but the other one is not, such as (C + (S * (A + B))) vs
375  // (AS + BS). Get the minus scev. That will allow re-combining the expresions
376  // and getting the simplified difference.
377  const SCEV *Dist = SE.getMinusSCEV(PtrSCEVB, PtrSCEVA);
378  if (C == Dist)
379  return true;
380 
381  // Sometimes even this doesn't work, because SCEV can't always see through
382  // patterns that look like (gep (ext (add (shl X, C1), C2))). Try checking
383  // things the hard way.
384  return lookThroughComplexAddresses(PtrA, PtrB, BaseDelta, Depth);
385 }
386 
387 static bool checkNoWrapFlags(Instruction *I, bool Signed) {
388  BinaryOperator *BinOpI = cast<BinaryOperator>(I);
389  return (Signed && BinOpI->hasNoSignedWrap()) ||
390  (!Signed && BinOpI->hasNoUnsignedWrap());
391 }
392 
393 static bool checkIfSafeAddSequence(const APInt &IdxDiff, Instruction *AddOpA,
394  unsigned MatchingOpIdxA, Instruction *AddOpB,
395  unsigned MatchingOpIdxB, bool Signed) {
396  // If both OpA and OpB is an add with NSW/NUW and with
397  // one of the operands being the same, we can guarantee that the
398  // transformation is safe if we can prove that OpA won't overflow when
399  // IdxDiff added to the other operand of OpA.
400  // For example:
401  // %tmp7 = add nsw i32 %tmp2, %v0
402  // %tmp8 = sext i32 %tmp7 to i64
403  // ...
404  // %tmp11 = add nsw i32 %v0, 1
405  // %tmp12 = add nsw i32 %tmp2, %tmp11
406  // %tmp13 = sext i32 %tmp12 to i64
407  //
408  // Both %tmp7 and %tmp2 has the nsw flag and the first operand
409  // is %tmp2. It's guaranteed that adding 1 to %tmp7 won't overflow
410  // because %tmp11 adds 1 to %v0 and both %tmp11 and %tmp12 has the
411  // nsw flag.
412  assert(AddOpA->getOpcode() == Instruction::Add &&
413  AddOpB->getOpcode() == Instruction::Add &&
414  checkNoWrapFlags(AddOpA, Signed) && checkNoWrapFlags(AddOpB, Signed));
415  if (AddOpA->getOperand(MatchingOpIdxA) ==
416  AddOpB->getOperand(MatchingOpIdxB)) {
417  Value *OtherOperandA = AddOpA->getOperand(MatchingOpIdxA == 1 ? 0 : 1);
418  Value *OtherOperandB = AddOpB->getOperand(MatchingOpIdxB == 1 ? 0 : 1);
419  Instruction *OtherInstrA = dyn_cast<Instruction>(OtherOperandA);
420  Instruction *OtherInstrB = dyn_cast<Instruction>(OtherOperandB);
421  // Match `x +nsw/nuw y` and `x +nsw/nuw (y +nsw/nuw IdxDiff)`.
422  if (OtherInstrB && OtherInstrB->getOpcode() == Instruction::Add &&
423  checkNoWrapFlags(OtherInstrB, Signed) &&
424  isa<ConstantInt>(OtherInstrB->getOperand(1))) {
425  int64_t CstVal =
426  cast<ConstantInt>(OtherInstrB->getOperand(1))->getSExtValue();
427  if (OtherInstrB->getOperand(0) == OtherOperandA &&
428  IdxDiff.getSExtValue() == CstVal)
429  return true;
430  }
431  // Match `x +nsw/nuw (y +nsw/nuw -Idx)` and `x +nsw/nuw (y +nsw/nuw x)`.
432  if (OtherInstrA && OtherInstrA->getOpcode() == Instruction::Add &&
433  checkNoWrapFlags(OtherInstrA, Signed) &&
434  isa<ConstantInt>(OtherInstrA->getOperand(1))) {
435  int64_t CstVal =
436  cast<ConstantInt>(OtherInstrA->getOperand(1))->getSExtValue();
437  if (OtherInstrA->getOperand(0) == OtherOperandB &&
438  IdxDiff.getSExtValue() == -CstVal)
439  return true;
440  }
441  // Match `x +nsw/nuw (y +nsw/nuw c)` and
442  // `x +nsw/nuw (y +nsw/nuw (c + IdxDiff))`.
443  if (OtherInstrA && OtherInstrB &&
444  OtherInstrA->getOpcode() == Instruction::Add &&
445  OtherInstrB->getOpcode() == Instruction::Add &&
446  checkNoWrapFlags(OtherInstrA, Signed) &&
447  checkNoWrapFlags(OtherInstrB, Signed) &&
448  isa<ConstantInt>(OtherInstrA->getOperand(1)) &&
449  isa<ConstantInt>(OtherInstrB->getOperand(1))) {
450  int64_t CstValA =
451  cast<ConstantInt>(OtherInstrA->getOperand(1))->getSExtValue();
452  int64_t CstValB =
453  cast<ConstantInt>(OtherInstrB->getOperand(1))->getSExtValue();
454  if (OtherInstrA->getOperand(0) == OtherInstrB->getOperand(0) &&
455  IdxDiff.getSExtValue() == (CstValB - CstValA))
456  return true;
457  }
458  }
459  return false;
460 }
461 
462 bool Vectorizer::lookThroughComplexAddresses(Value *PtrA, Value *PtrB,
463  APInt PtrDelta,
464  unsigned Depth) const {
465  auto *GEPA = dyn_cast<GetElementPtrInst>(PtrA);
466  auto *GEPB = dyn_cast<GetElementPtrInst>(PtrB);
467  if (!GEPA || !GEPB)
468  return lookThroughSelects(PtrA, PtrB, PtrDelta, Depth);
469 
470  // Look through GEPs after checking they're the same except for the last
471  // index.
472  if (GEPA->getNumOperands() != GEPB->getNumOperands() ||
473  GEPA->getPointerOperand() != GEPB->getPointerOperand())
474  return false;
475  gep_type_iterator GTIA = gep_type_begin(GEPA);
476  gep_type_iterator GTIB = gep_type_begin(GEPB);
477  for (unsigned I = 0, E = GEPA->getNumIndices() - 1; I < E; ++I) {
478  if (GTIA.getOperand() != GTIB.getOperand())
479  return false;
480  ++GTIA;
481  ++GTIB;
482  }
483 
484  Instruction *OpA = dyn_cast<Instruction>(GTIA.getOperand());
485  Instruction *OpB = dyn_cast<Instruction>(GTIB.getOperand());
486  if (!OpA || !OpB || OpA->getOpcode() != OpB->getOpcode() ||
487  OpA->getType() != OpB->getType())
488  return false;
489 
490  if (PtrDelta.isNegative()) {
491  if (PtrDelta.isMinSignedValue())
492  return false;
493  PtrDelta.negate();
494  std::swap(OpA, OpB);
495  }
496  uint64_t Stride = DL.getTypeAllocSize(GTIA.getIndexedType());
497  if (PtrDelta.urem(Stride) != 0)
498  return false;
499  unsigned IdxBitWidth = OpA->getType()->getScalarSizeInBits();
500  APInt IdxDiff = PtrDelta.udiv(Stride).zextOrSelf(IdxBitWidth);
501 
502  // Only look through a ZExt/SExt.
503  if (!isa<SExtInst>(OpA) && !isa<ZExtInst>(OpA))
504  return false;
505 
506  bool Signed = isa<SExtInst>(OpA);
507 
508  // At this point A could be a function parameter, i.e. not an instruction
509  Value *ValA = OpA->getOperand(0);
510  OpB = dyn_cast<Instruction>(OpB->getOperand(0));
511  if (!OpB || ValA->getType() != OpB->getType())
512  return false;
513 
514  // Now we need to prove that adding IdxDiff to ValA won't overflow.
515  bool Safe = false;
516 
517  // First attempt: if OpB is an add with NSW/NUW, and OpB is IdxDiff added to
518  // ValA, we're okay.
519  if (OpB->getOpcode() == Instruction::Add &&
520  isa<ConstantInt>(OpB->getOperand(1)) &&
521  IdxDiff.sle(cast<ConstantInt>(OpB->getOperand(1))->getSExtValue()) &&
522  checkNoWrapFlags(OpB, Signed))
523  Safe = true;
524 
525  // Second attempt: check if we have eligible add NSW/NUW instruction
526  // sequences.
527  OpA = dyn_cast<Instruction>(ValA);
528  if (!Safe && OpA && OpA->getOpcode() == Instruction::Add &&
529  OpB->getOpcode() == Instruction::Add && checkNoWrapFlags(OpA, Signed) &&
530  checkNoWrapFlags(OpB, Signed)) {
531  // In the checks below a matching operand in OpA and OpB is
532  // an operand which is the same in those two instructions.
533  // Below we account for possible orders of the operands of
534  // these add instructions.
535  for (unsigned MatchingOpIdxA : {0, 1})
536  for (unsigned MatchingOpIdxB : {0, 1})
537  if (!Safe)
538  Safe = checkIfSafeAddSequence(IdxDiff, OpA, MatchingOpIdxA, OpB,
539  MatchingOpIdxB, Signed);
540  }
541 
542  unsigned BitWidth = ValA->getType()->getScalarSizeInBits();
543 
544  // Third attempt:
545  // If all set bits of IdxDiff or any higher order bit other than the sign bit
546  // are known to be zero in ValA, we can add Diff to it while guaranteeing no
547  // overflow of any sort.
548  if (!Safe) {
549  KnownBits Known(BitWidth);
550  computeKnownBits(ValA, Known, DL, 0, &AC, OpB, &DT);
551  APInt BitsAllowedToBeSet = Known.Zero.zext(IdxDiff.getBitWidth());
552  if (Signed)
553  BitsAllowedToBeSet.clearBit(BitWidth - 1);
554  if (BitsAllowedToBeSet.ult(IdxDiff))
555  return false;
556  }
557 
558  const SCEV *OffsetSCEVA = SE.getSCEV(ValA);
559  const SCEV *OffsetSCEVB = SE.getSCEV(OpB);
560  const SCEV *C = SE.getConstant(IdxDiff.trunc(BitWidth));
561  const SCEV *X = SE.getAddExpr(OffsetSCEVA, C);
562  return X == OffsetSCEVB;
563 }
564 
565 bool Vectorizer::lookThroughSelects(Value *PtrA, Value *PtrB,
566  const APInt &PtrDelta,
567  unsigned Depth) const {
568  if (Depth++ == MaxDepth)
569  return false;
570 
571  if (auto *SelectA = dyn_cast<SelectInst>(PtrA)) {
572  if (auto *SelectB = dyn_cast<SelectInst>(PtrB)) {
573  return SelectA->getCondition() == SelectB->getCondition() &&
574  areConsecutivePointers(SelectA->getTrueValue(),
575  SelectB->getTrueValue(), PtrDelta, Depth) &&
576  areConsecutivePointers(SelectA->getFalseValue(),
577  SelectB->getFalseValue(), PtrDelta, Depth);
578  }
579  }
580  return false;
581 }
582 
583 void Vectorizer::reorder(Instruction *I) {
584  SmallPtrSet<Instruction *, 16> InstructionsToMove;
586 
587  Worklist.push_back(I);
588  while (!Worklist.empty()) {
589  Instruction *IW = Worklist.pop_back_val();
590  int NumOperands = IW->getNumOperands();
591  for (int i = 0; i < NumOperands; i++) {
592  Instruction *IM = dyn_cast<Instruction>(IW->getOperand(i));
593  if (!IM || IM->getOpcode() == Instruction::PHI)
594  continue;
595 
596  // If IM is in another BB, no need to move it, because this pass only
597  // vectorizes instructions within one BB.
598  if (IM->getParent() != I->getParent())
599  continue;
600 
601  if (!IM->comesBefore(I)) {
602  InstructionsToMove.insert(IM);
603  Worklist.push_back(IM);
604  }
605  }
606  }
607 
608  // All instructions to move should follow I. Start from I, not from begin().
609  for (auto BBI = I->getIterator(), E = I->getParent()->end(); BBI != E;
610  ++BBI) {
611  if (!InstructionsToMove.count(&*BBI))
612  continue;
613  Instruction *IM = &*BBI;
614  --BBI;
615  IM->removeFromParent();
616  IM->insertBefore(I);
617  }
618 }
619 
620 std::pair<BasicBlock::iterator, BasicBlock::iterator>
621 Vectorizer::getBoundaryInstrs(ArrayRef<Instruction *> Chain) {
622  Instruction *C0 = Chain[0];
623  BasicBlock::iterator FirstInstr = C0->getIterator();
624  BasicBlock::iterator LastInstr = C0->getIterator();
625 
626  BasicBlock *BB = C0->getParent();
627  unsigned NumFound = 0;
628  for (Instruction &I : *BB) {
629  if (!is_contained(Chain, &I))
630  continue;
631 
632  ++NumFound;
633  if (NumFound == 1) {
634  FirstInstr = I.getIterator();
635  }
636  if (NumFound == Chain.size()) {
637  LastInstr = I.getIterator();
638  break;
639  }
640  }
641 
642  // Range is [first, last).
643  return std::make_pair(FirstInstr, ++LastInstr);
644 }
645 
646 void Vectorizer::eraseInstructions(ArrayRef<Instruction *> Chain) {
648  for (Instruction *I : Chain) {
649  Value *PtrOperand = getLoadStorePointerOperand(I);
650  assert(PtrOperand && "Instruction must have a pointer operand.");
651  Instrs.push_back(I);
652  if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(PtrOperand))
653  Instrs.push_back(GEP);
654  }
655 
656  // Erase instructions.
657  for (Instruction *I : Instrs)
658  if (I->use_empty())
659  I->eraseFromParent();
660 }
661 
662 std::pair<ArrayRef<Instruction *>, ArrayRef<Instruction *>>
663 Vectorizer::splitOddVectorElts(ArrayRef<Instruction *> Chain,
664  unsigned ElementSizeBits) {
665  unsigned ElementSizeBytes = ElementSizeBits / 8;
666  unsigned SizeBytes = ElementSizeBytes * Chain.size();
667  unsigned NumLeft = (SizeBytes - (SizeBytes % 4)) / ElementSizeBytes;
668  if (NumLeft == Chain.size()) {
669  if ((NumLeft & 1) == 0)
670  NumLeft /= 2; // Split even in half
671  else
672  --NumLeft; // Split off last element
673  } else if (NumLeft == 0)
674  NumLeft = 1;
675  return std::make_pair(Chain.slice(0, NumLeft), Chain.slice(NumLeft));
676 }
677 
679 Vectorizer::getVectorizablePrefix(ArrayRef<Instruction *> Chain) {
680  // These are in BB order, unlike Chain, which is in address order.
681  SmallVector<Instruction *, 16> MemoryInstrs;
682  SmallVector<Instruction *, 16> ChainInstrs;
683 
684  bool IsLoadChain = isa<LoadInst>(Chain[0]);
685  LLVM_DEBUG({
686  for (Instruction *I : Chain) {
687  if (IsLoadChain)
688  assert(isa<LoadInst>(I) &&
689  "All elements of Chain must be loads, or all must be stores.");
690  else
691  assert(isa<StoreInst>(I) &&
692  "All elements of Chain must be loads, or all must be stores.");
693  }
694  });
695 
696  for (Instruction &I : make_range(getBoundaryInstrs(Chain))) {
697  if ((isa<LoadInst>(I) || isa<StoreInst>(I)) && is_contained(Chain, &I)) {
698  ChainInstrs.push_back(&I);
699  continue;
700  }
701  if (I.mayThrow()) {
702  LLVM_DEBUG(dbgs() << "LSV: Found may-throw operation: " << I << '\n');
703  break;
704  }
705  if (I.mayReadOrWriteMemory())
706  MemoryInstrs.push_back(&I);
707  }
708 
709  // Loop until we find an instruction in ChainInstrs that we can't vectorize.
710  unsigned ChainInstrIdx = 0;
711  Instruction *BarrierMemoryInstr = nullptr;
712 
713  for (unsigned E = ChainInstrs.size(); ChainInstrIdx < E; ++ChainInstrIdx) {
714  Instruction *ChainInstr = ChainInstrs[ChainInstrIdx];
715 
716  // If a barrier memory instruction was found, chain instructions that follow
717  // will not be added to the valid prefix.
718  if (BarrierMemoryInstr && BarrierMemoryInstr->comesBefore(ChainInstr))
719  break;
720 
721  // Check (in BB order) if any instruction prevents ChainInstr from being
722  // vectorized. Find and store the first such "conflicting" instruction.
723  for (Instruction *MemInstr : MemoryInstrs) {
724  // If a barrier memory instruction was found, do not check past it.
725  if (BarrierMemoryInstr && BarrierMemoryInstr->comesBefore(MemInstr))
726  break;
727 
728  auto *MemLoad = dyn_cast<LoadInst>(MemInstr);
729  auto *ChainLoad = dyn_cast<LoadInst>(ChainInstr);
730  if (MemLoad && ChainLoad)
731  continue;
732 
733  // We can ignore the alias if the we have a load store pair and the load
734  // is known to be invariant. The load cannot be clobbered by the store.
735  auto IsInvariantLoad = [](const LoadInst *LI) -> bool {
736  return LI->hasMetadata(LLVMContext::MD_invariant_load);
737  };
738 
739  if (IsLoadChain) {
740  // We can ignore the alias as long as the load comes before the store,
741  // because that means we won't be moving the load past the store to
742  // vectorize it (the vectorized load is inserted at the location of the
743  // first load in the chain).
744  if (ChainInstr->comesBefore(MemInstr) ||
745  (ChainLoad && IsInvariantLoad(ChainLoad)))
746  continue;
747  } else {
748  // Same case, but in reverse.
749  if (MemInstr->comesBefore(ChainInstr) ||
750  (MemLoad && IsInvariantLoad(MemLoad)))
751  continue;
752  }
753 
754  ModRefInfo MR =
755  AA.getModRefInfo(MemInstr, MemoryLocation::get(ChainInstr));
756  if (IsLoadChain ? isModSet(MR) : isModOrRefSet(MR)) {
757  LLVM_DEBUG({
758  dbgs() << "LSV: Found alias:\n"
759  " Aliasing instruction:\n"
760  << " " << *MemInstr << '\n'
761  << " Aliased instruction and pointer:\n"
762  << " " << *ChainInstr << '\n'
763  << " " << *getLoadStorePointerOperand(ChainInstr) << '\n';
764  });
765  // Save this aliasing memory instruction as a barrier, but allow other
766  // instructions that precede the barrier to be vectorized with this one.
767  BarrierMemoryInstr = MemInstr;
768  break;
769  }
770  }
771  // Continue the search only for store chains, since vectorizing stores that
772  // precede an aliasing load is valid. Conversely, vectorizing loads is valid
773  // up to an aliasing store, but should not pull loads from further down in
774  // the basic block.
775  if (IsLoadChain && BarrierMemoryInstr) {
776  // The BarrierMemoryInstr is a store that precedes ChainInstr.
777  assert(BarrierMemoryInstr->comesBefore(ChainInstr));
778  break;
779  }
780  }
781 
782  // Find the largest prefix of Chain whose elements are all in
783  // ChainInstrs[0, ChainInstrIdx). This is the largest vectorizable prefix of
784  // Chain. (Recall that Chain is in address order, but ChainInstrs is in BB
785  // order.)
786  SmallPtrSet<Instruction *, 8> VectorizableChainInstrs(
787  ChainInstrs.begin(), ChainInstrs.begin() + ChainInstrIdx);
788  unsigned ChainIdx = 0;
789  for (unsigned ChainLen = Chain.size(); ChainIdx < ChainLen; ++ChainIdx) {
790  if (!VectorizableChainInstrs.count(Chain[ChainIdx]))
791  break;
792  }
793  return Chain.slice(0, ChainIdx);
794 }
795 
796 static ChainID getChainID(const Value *Ptr) {
797  const Value *ObjPtr = getUnderlyingObject(Ptr);
798  if (const auto *Sel = dyn_cast<SelectInst>(ObjPtr)) {
799  // The select's themselves are distinct instructions even if they share the
800  // same condition and evaluate to consecutive pointers for true and false
801  // values of the condition. Therefore using the select's themselves for
802  // grouping instructions would put consecutive accesses into different lists
803  // and they won't be even checked for being consecutive, and won't be
804  // vectorized.
805  return Sel->getCondition();
806  }
807  return ObjPtr;
808 }
809 
810 std::pair<InstrListMap, InstrListMap>
811 Vectorizer::collectInstructions(BasicBlock *BB) {
812  InstrListMap LoadRefs;
813  InstrListMap StoreRefs;
814 
815  for (Instruction &I : *BB) {
816  if (!I.mayReadOrWriteMemory())
817  continue;
818 
819  if (LoadInst *LI = dyn_cast<LoadInst>(&I)) {
820  if (!LI->isSimple())
821  continue;
822 
823  // Skip if it's not legal.
824  if (!TTI.isLegalToVectorizeLoad(LI))
825  continue;
826 
827  Type *Ty = LI->getType();
829  continue;
830 
831  // Skip weird non-byte sizes. They probably aren't worth the effort of
832  // handling correctly.
833  unsigned TySize = DL.getTypeSizeInBits(Ty);
834  if ((TySize % 8) != 0)
835  continue;
836 
837  // Skip vectors of pointers. The vectorizeLoadChain/vectorizeStoreChain
838  // functions are currently using an integer type for the vectorized
839  // load/store, and does not support casting between the integer type and a
840  // vector of pointers (e.g. i64 to <2 x i16*>)
841  if (Ty->isVectorTy() && Ty->isPtrOrPtrVectorTy())
842  continue;
843 
844  Value *Ptr = LI->getPointerOperand();
845  unsigned AS = Ptr->getType()->getPointerAddressSpace();
846  unsigned VecRegSize = TTI.getLoadStoreVecRegBitWidth(AS);
847 
848  unsigned VF = VecRegSize / TySize;
849  VectorType *VecTy = dyn_cast<VectorType>(Ty);
850 
851  // No point in looking at these if they're too big to vectorize.
852  if (TySize > VecRegSize / 2 ||
853  (VecTy && TTI.getLoadVectorFactor(VF, TySize, TySize / 8, VecTy) == 0))
854  continue;
855 
856  // Make sure all the users of a vector are constant-index extracts.
857  if (isa<VectorType>(Ty) && !llvm::all_of(LI->users(), [](const User *U) {
858  const ExtractElementInst *EEI = dyn_cast<ExtractElementInst>(U);
859  return EEI && isa<ConstantInt>(EEI->getOperand(1));
860  }))
861  continue;
862 
863  // Save the load locations.
864  const ChainID ID = getChainID(Ptr);
865  LoadRefs[ID].push_back(LI);
866  } else if (StoreInst *SI = dyn_cast<StoreInst>(&I)) {
867  if (!SI->isSimple())
868  continue;
869 
870  // Skip if it's not legal.
872  continue;
873 
874  Type *Ty = SI->getValueOperand()->getType();
876  continue;
877 
878  // Skip vectors of pointers. The vectorizeLoadChain/vectorizeStoreChain
879  // functions are currently using an integer type for the vectorized
880  // load/store, and does not support casting between the integer type and a
881  // vector of pointers (e.g. i64 to <2 x i16*>)
882  if (Ty->isVectorTy() && Ty->isPtrOrPtrVectorTy())
883  continue;
884 
885  // Skip weird non-byte sizes. They probably aren't worth the effort of
886  // handling correctly.
887  unsigned TySize = DL.getTypeSizeInBits(Ty);
888  if ((TySize % 8) != 0)
889  continue;
890 
891  Value *Ptr = SI->getPointerOperand();
892  unsigned AS = Ptr->getType()->getPointerAddressSpace();
893  unsigned VecRegSize = TTI.getLoadStoreVecRegBitWidth(AS);
894 
895  unsigned VF = VecRegSize / TySize;
896  VectorType *VecTy = dyn_cast<VectorType>(Ty);
897 
898  // No point in looking at these if they're too big to vectorize.
899  if (TySize > VecRegSize / 2 ||
900  (VecTy && TTI.getStoreVectorFactor(VF, TySize, TySize / 8, VecTy) == 0))
901  continue;
902 
903  if (isa<VectorType>(Ty) && !llvm::all_of(SI->users(), [](const User *U) {
904  const ExtractElementInst *EEI = dyn_cast<ExtractElementInst>(U);
905  return EEI && isa<ConstantInt>(EEI->getOperand(1));
906  }))
907  continue;
908 
909  // Save store location.
910  const ChainID ID = getChainID(Ptr);
911  StoreRefs[ID].push_back(SI);
912  }
913  }
914 
915  return {LoadRefs, StoreRefs};
916 }
917 
918 bool Vectorizer::vectorizeChains(InstrListMap &Map) {
919  bool Changed = false;
920 
921  for (const std::pair<ChainID, InstrList> &Chain : Map) {
922  unsigned Size = Chain.second.size();
923  if (Size < 2)
924  continue;
925 
926  LLVM_DEBUG(dbgs() << "LSV: Analyzing a chain of length " << Size << ".\n");
927 
928  // Process the stores in chunks of 64.
929  for (unsigned CI = 0, CE = Size; CI < CE; CI += 64) {
930  unsigned Len = std::min<unsigned>(CE - CI, 64);
931  ArrayRef<Instruction *> Chunk(&Chain.second[CI], Len);
932  Changed |= vectorizeInstructions(Chunk);
933  }
934  }
935 
936  return Changed;
937 }
938 
939 bool Vectorizer::vectorizeInstructions(ArrayRef<Instruction *> Instrs) {
940  LLVM_DEBUG(dbgs() << "LSV: Vectorizing " << Instrs.size()
941  << " instructions.\n");
942  SmallVector<int, 16> Heads, Tails;
943  int ConsecutiveChain[64];
944 
945  // Do a quadratic search on all of the given loads/stores and find all of the
946  // pairs of loads/stores that follow each other.
947  for (int i = 0, e = Instrs.size(); i < e; ++i) {
948  ConsecutiveChain[i] = -1;
949  for (int j = e - 1; j >= 0; --j) {
950  if (i == j)
951  continue;
952 
953  if (isConsecutiveAccess(Instrs[i], Instrs[j])) {
954  if (ConsecutiveChain[i] != -1) {
955  int CurDistance = std::abs(ConsecutiveChain[i] - i);
956  int NewDistance = std::abs(ConsecutiveChain[i] - j);
957  if (j < i || NewDistance > CurDistance)
958  continue; // Should not insert.
959  }
960 
961  Tails.push_back(j);
962  Heads.push_back(i);
963  ConsecutiveChain[i] = j;
964  }
965  }
966  }
967 
968  bool Changed = false;
969  SmallPtrSet<Instruction *, 16> InstructionsProcessed;
970 
971  for (int Head : Heads) {
972  if (InstructionsProcessed.count(Instrs[Head]))
973  continue;
974  bool LongerChainExists = false;
975  for (unsigned TIt = 0; TIt < Tails.size(); TIt++)
976  if (Head == Tails[TIt] &&
977  !InstructionsProcessed.count(Instrs[Heads[TIt]])) {
978  LongerChainExists = true;
979  break;
980  }
981  if (LongerChainExists)
982  continue;
983 
984  // We found an instr that starts a chain. Now follow the chain and try to
985  // vectorize it.
987  int I = Head;
988  while (I != -1 && (is_contained(Tails, I) || is_contained(Heads, I))) {
989  if (InstructionsProcessed.count(Instrs[I]))
990  break;
991 
992  Operands.push_back(Instrs[I]);
993  I = ConsecutiveChain[I];
994  }
995 
996  bool Vectorized = false;
997  if (isa<LoadInst>(*Operands.begin()))
998  Vectorized = vectorizeLoadChain(Operands, &InstructionsProcessed);
999  else
1000  Vectorized = vectorizeStoreChain(Operands, &InstructionsProcessed);
1001 
1002  Changed |= Vectorized;
1003  }
1004 
1005  return Changed;
1006 }
1007 
1008 bool Vectorizer::vectorizeStoreChain(
1010  SmallPtrSet<Instruction *, 16> *InstructionsProcessed) {
1011  StoreInst *S0 = cast<StoreInst>(Chain[0]);
1012 
1013  // If the vector has an int element, default to int for the whole store.
1014  Type *StoreTy = nullptr;
1015  for (Instruction *I : Chain) {
1016  StoreTy = cast<StoreInst>(I)->getValueOperand()->getType();
1017  if (StoreTy->isIntOrIntVectorTy())
1018  break;
1019 
1020  if (StoreTy->isPtrOrPtrVectorTy()) {
1021  StoreTy = Type::getIntNTy(F.getParent()->getContext(),
1022  DL.getTypeSizeInBits(StoreTy));
1023  break;
1024  }
1025  }
1026  assert(StoreTy && "Failed to find store type");
1027 
1028  unsigned Sz = DL.getTypeSizeInBits(StoreTy);
1029  unsigned AS = S0->getPointerAddressSpace();
1030  unsigned VecRegSize = TTI.getLoadStoreVecRegBitWidth(AS);
1031  unsigned VF = VecRegSize / Sz;
1032  unsigned ChainSize = Chain.size();
1033  Align Alignment = S0->getAlign();
1034 
1035  if (!isPowerOf2_32(Sz) || VF < 2 || ChainSize < 2) {
1036  InstructionsProcessed->insert(Chain.begin(), Chain.end());
1037  return false;
1038  }
1039 
1040  ArrayRef<Instruction *> NewChain = getVectorizablePrefix(Chain);
1041  if (NewChain.empty()) {
1042  // No vectorization possible.
1043  InstructionsProcessed->insert(Chain.begin(), Chain.end());
1044  return false;
1045  }
1046  if (NewChain.size() == 1) {
1047  // Failed after the first instruction. Discard it and try the smaller chain.
1048  InstructionsProcessed->insert(NewChain.front());
1049  return false;
1050  }
1051 
1052  // Update Chain to the valid vectorizable subchain.
1053  Chain = NewChain;
1054  ChainSize = Chain.size();
1055 
1056  // Check if it's legal to vectorize this chain. If not, split the chain and
1057  // try again.
1058  unsigned EltSzInBytes = Sz / 8;
1059  unsigned SzInBytes = EltSzInBytes * ChainSize;
1060 
1061  FixedVectorType *VecTy;
1062  auto *VecStoreTy = dyn_cast<FixedVectorType>(StoreTy);
1063  if (VecStoreTy)
1064  VecTy = FixedVectorType::get(StoreTy->getScalarType(),
1065  Chain.size() * VecStoreTy->getNumElements());
1066  else
1067  VecTy = FixedVectorType::get(StoreTy, Chain.size());
1068 
1069  // If it's more than the max vector size or the target has a better
1070  // vector factor, break it into two pieces.
1071  unsigned TargetVF = TTI.getStoreVectorFactor(VF, Sz, SzInBytes, VecTy);
1072  if (ChainSize > VF || (VF != TargetVF && TargetVF < ChainSize)) {
1073  LLVM_DEBUG(dbgs() << "LSV: Chain doesn't match with the vector factor."
1074  " Creating two separate arrays.\n");
1075  return vectorizeStoreChain(Chain.slice(0, TargetVF),
1076  InstructionsProcessed) |
1077  vectorizeStoreChain(Chain.slice(TargetVF), InstructionsProcessed);
1078  }
1079 
1080  LLVM_DEBUG({
1081  dbgs() << "LSV: Stores to vectorize:\n";
1082  for (Instruction *I : Chain)
1083  dbgs() << " " << *I << "\n";
1084  });
1085 
1086  // We won't try again to vectorize the elements of the chain, regardless of
1087  // whether we succeed below.
1088  InstructionsProcessed->insert(Chain.begin(), Chain.end());
1089 
1090  // If the store is going to be misaligned, don't vectorize it.
1091  if (accessIsMisaligned(SzInBytes, AS, Alignment)) {
1092  if (S0->getPointerAddressSpace() != DL.getAllocaAddrSpace()) {
1093  auto Chains = splitOddVectorElts(Chain, Sz);
1094  return vectorizeStoreChain(Chains.first, InstructionsProcessed) |
1095  vectorizeStoreChain(Chains.second, InstructionsProcessed);
1096  }
1097 
1100  DL, S0, nullptr, &DT);
1101  if (NewAlign >= Alignment)
1102  Alignment = NewAlign;
1103  else
1104  return false;
1105  }
1106 
1107  if (!TTI.isLegalToVectorizeStoreChain(SzInBytes, Alignment, AS)) {
1108  auto Chains = splitOddVectorElts(Chain, Sz);
1109  return vectorizeStoreChain(Chains.first, InstructionsProcessed) |
1110  vectorizeStoreChain(Chains.second, InstructionsProcessed);
1111  }
1112 
1114  std::tie(First, Last) = getBoundaryInstrs(Chain);
1115  Builder.SetInsertPoint(&*Last);
1116 
1117  Value *Vec = UndefValue::get(VecTy);
1118 
1119  if (VecStoreTy) {
1120  unsigned VecWidth = VecStoreTy->getNumElements();
1121  for (unsigned I = 0, E = Chain.size(); I != E; ++I) {
1122  StoreInst *Store = cast<StoreInst>(Chain[I]);
1123  for (unsigned J = 0, NE = VecStoreTy->getNumElements(); J != NE; ++J) {
1124  unsigned NewIdx = J + I * VecWidth;
1125  Value *Extract = Builder.CreateExtractElement(Store->getValueOperand(),
1126  Builder.getInt32(J));
1127  if (Extract->getType() != StoreTy->getScalarType())
1128  Extract = Builder.CreateBitCast(Extract, StoreTy->getScalarType());
1129 
1130  Value *Insert =
1131  Builder.CreateInsertElement(Vec, Extract, Builder.getInt32(NewIdx));
1132  Vec = Insert;
1133  }
1134  }
1135  } else {
1136  for (unsigned I = 0, E = Chain.size(); I != E; ++I) {
1137  StoreInst *Store = cast<StoreInst>(Chain[I]);
1138  Value *Extract = Store->getValueOperand();
1139  if (Extract->getType() != StoreTy->getScalarType())
1140  Extract =
1141  Builder.CreateBitOrPointerCast(Extract, StoreTy->getScalarType());
1142 
1143  Value *Insert =
1144  Builder.CreateInsertElement(Vec, Extract, Builder.getInt32(I));
1145  Vec = Insert;
1146  }
1147  }
1148 
1149  StoreInst *SI = Builder.CreateAlignedStore(
1150  Vec,
1151  Builder.CreateBitCast(S0->getPointerOperand(), VecTy->getPointerTo(AS)),
1152  Alignment);
1153  propagateMetadata(SI, Chain);
1154 
1155  eraseInstructions(Chain);
1156  ++NumVectorInstructions;
1157  NumScalarsVectorized += Chain.size();
1158  return true;
1159 }
1160 
1161 bool Vectorizer::vectorizeLoadChain(
1163  SmallPtrSet<Instruction *, 16> *InstructionsProcessed) {
1164  LoadInst *L0 = cast<LoadInst>(Chain[0]);
1165 
1166  // If the vector has an int element, default to int for the whole load.
1167  Type *LoadTy = nullptr;
1168  for (const auto &V : Chain) {
1169  LoadTy = cast<LoadInst>(V)->getType();
1170  if (LoadTy->isIntOrIntVectorTy())
1171  break;
1172 
1173  if (LoadTy->isPtrOrPtrVectorTy()) {
1174  LoadTy = Type::getIntNTy(F.getParent()->getContext(),
1175  DL.getTypeSizeInBits(LoadTy));
1176  break;
1177  }
1178  }
1179  assert(LoadTy && "Can't determine LoadInst type from chain");
1180 
1181  unsigned Sz = DL.getTypeSizeInBits(LoadTy);
1182  unsigned AS = L0->getPointerAddressSpace();
1183  unsigned VecRegSize = TTI.getLoadStoreVecRegBitWidth(AS);
1184  unsigned VF = VecRegSize / Sz;
1185  unsigned ChainSize = Chain.size();
1186  Align Alignment = L0->getAlign();
1187 
1188  if (!isPowerOf2_32(Sz) || VF < 2 || ChainSize < 2) {
1189  InstructionsProcessed->insert(Chain.begin(), Chain.end());
1190  return false;
1191  }
1192 
1193  ArrayRef<Instruction *> NewChain = getVectorizablePrefix(Chain);
1194  if (NewChain.empty()) {
1195  // No vectorization possible.
1196  InstructionsProcessed->insert(Chain.begin(), Chain.end());
1197  return false;
1198  }
1199  if (NewChain.size() == 1) {
1200  // Failed after the first instruction. Discard it and try the smaller chain.
1201  InstructionsProcessed->insert(NewChain.front());
1202  return false;
1203  }
1204 
1205  // Update Chain to the valid vectorizable subchain.
1206  Chain = NewChain;
1207  ChainSize = Chain.size();
1208 
1209  // Check if it's legal to vectorize this chain. If not, split the chain and
1210  // try again.
1211  unsigned EltSzInBytes = Sz / 8;
1212  unsigned SzInBytes = EltSzInBytes * ChainSize;
1213  VectorType *VecTy;
1214  auto *VecLoadTy = dyn_cast<FixedVectorType>(LoadTy);
1215  if (VecLoadTy)
1216  VecTy = FixedVectorType::get(LoadTy->getScalarType(),
1217  Chain.size() * VecLoadTy->getNumElements());
1218  else
1219  VecTy = FixedVectorType::get(LoadTy, Chain.size());
1220 
1221  // If it's more than the max vector size or the target has a better
1222  // vector factor, break it into two pieces.
1223  unsigned TargetVF = TTI.getLoadVectorFactor(VF, Sz, SzInBytes, VecTy);
1224  if (ChainSize > VF || (VF != TargetVF && TargetVF < ChainSize)) {
1225  LLVM_DEBUG(dbgs() << "LSV: Chain doesn't match with the vector factor."
1226  " Creating two separate arrays.\n");
1227  return vectorizeLoadChain(Chain.slice(0, TargetVF), InstructionsProcessed) |
1228  vectorizeLoadChain(Chain.slice(TargetVF), InstructionsProcessed);
1229  }
1230 
1231  // We won't try again to vectorize the elements of the chain, regardless of
1232  // whether we succeed below.
1233  InstructionsProcessed->insert(Chain.begin(), Chain.end());
1234 
1235  // If the load is going to be misaligned, don't vectorize it.
1236  if (accessIsMisaligned(SzInBytes, AS, Alignment)) {
1237  if (L0->getPointerAddressSpace() != DL.getAllocaAddrSpace()) {
1238  auto Chains = splitOddVectorElts(Chain, Sz);
1239  return vectorizeLoadChain(Chains.first, InstructionsProcessed) |
1240  vectorizeLoadChain(Chains.second, InstructionsProcessed);
1241  }
1242 
1245  DL, L0, nullptr, &DT);
1246  if (NewAlign >= Alignment)
1247  Alignment = NewAlign;
1248  else
1249  return false;
1250  }
1251 
1252  if (!TTI.isLegalToVectorizeLoadChain(SzInBytes, Alignment, AS)) {
1253  auto Chains = splitOddVectorElts(Chain, Sz);
1254  return vectorizeLoadChain(Chains.first, InstructionsProcessed) |
1255  vectorizeLoadChain(Chains.second, InstructionsProcessed);
1256  }
1257 
1258  LLVM_DEBUG({
1259  dbgs() << "LSV: Loads to vectorize:\n";
1260  for (Instruction *I : Chain)
1261  I->dump();
1262  });
1263 
1264  // getVectorizablePrefix already computed getBoundaryInstrs. The value of
1265  // Last may have changed since then, but the value of First won't have. If it
1266  // matters, we could compute getBoundaryInstrs only once and reuse it here.
1268  std::tie(First, Last) = getBoundaryInstrs(Chain);
1269  Builder.SetInsertPoint(&*First);
1270 
1271  Value *Bitcast =
1272  Builder.CreateBitCast(L0->getPointerOperand(), VecTy->getPointerTo(AS));
1273  LoadInst *LI =
1274  Builder.CreateAlignedLoad(VecTy, Bitcast, MaybeAlign(Alignment));
1275  propagateMetadata(LI, Chain);
1276 
1277  if (VecLoadTy) {
1278  SmallVector<Instruction *, 16> InstrsToErase;
1279 
1280  unsigned VecWidth = VecLoadTy->getNumElements();
1281  for (unsigned I = 0, E = Chain.size(); I != E; ++I) {
1282  for (auto Use : Chain[I]->users()) {
1283  // All users of vector loads are ExtractElement instructions with
1284  // constant indices, otherwise we would have bailed before now.
1285  Instruction *UI = cast<Instruction>(Use);
1286  unsigned Idx = cast<ConstantInt>(UI->getOperand(1))->getZExtValue();
1287  unsigned NewIdx = Idx + I * VecWidth;
1288  Value *V = Builder.CreateExtractElement(LI, Builder.getInt32(NewIdx),
1289  UI->getName());
1290  if (V->getType() != UI->getType())
1291  V = Builder.CreateBitCast(V, UI->getType());
1292 
1293  // Replace the old instruction.
1294  UI->replaceAllUsesWith(V);
1295  InstrsToErase.push_back(UI);
1296  }
1297  }
1298 
1299  // Bitcast might not be an Instruction, if the value being loaded is a
1300  // constant. In that case, no need to reorder anything.
1301  if (Instruction *BitcastInst = dyn_cast<Instruction>(Bitcast))
1302  reorder(BitcastInst);
1303 
1304  for (auto I : InstrsToErase)
1305  I->eraseFromParent();
1306  } else {
1307  for (unsigned I = 0, E = Chain.size(); I != E; ++I) {
1308  Value *CV = Chain[I];
1309  Value *V =
1310  Builder.CreateExtractElement(LI, Builder.getInt32(I), CV->getName());
1311  if (V->getType() != CV->getType()) {
1312  V = Builder.CreateBitOrPointerCast(V, CV->getType());
1313  }
1314 
1315  // Replace the old instruction.
1316  CV->replaceAllUsesWith(V);
1317  }
1318 
1319  if (Instruction *BitcastInst = dyn_cast<Instruction>(Bitcast))
1320  reorder(BitcastInst);
1321  }
1322 
1323  eraseInstructions(Chain);
1324 
1325  ++NumVectorInstructions;
1326  NumScalarsVectorized += Chain.size();
1327  return true;
1328 }
1329 
1330 bool Vectorizer::accessIsMisaligned(unsigned SzInBytes, unsigned AddressSpace,
1331  Align Alignment) {
1332  if (Alignment.value() % SzInBytes == 0)
1333  return false;
1334 
1335  bool Fast = false;
1336  bool Allows = TTI.allowsMisalignedMemoryAccesses(F.getParent()->getContext(),
1337  SzInBytes * 8, AddressSpace,
1338  Alignment, &Fast);
1339  LLVM_DEBUG(dbgs() << "LSV: Target said misaligned is allowed? " << Allows
1340  << " and fast? " << Fast << "\n";);
1341  return !Allows || !Fast;
1342 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
i
i
Definition: README.txt:29
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:155
llvm::ScalarEvolution::getContext
LLVMContext & getContext() const
Definition: ScalarEvolution.h:503
AssumptionCache.h
llvm::AAManager
A manager for alias analyses.
Definition: AliasAnalysis.h:1233
llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition: TargetTransformInfo.h:2331
Signed
@ Signed
Definition: NVPTXISelLowering.cpp:4636
llvm::ScalarEvolutionAnalysis
Analysis pass that exposes the ScalarEvolution for a function.
Definition: ScalarEvolution.h:2036
MathExtras.h
llvm::MemoryLocation::get
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
Definition: MemoryLocation.cpp:37
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
llvm::initializeLoadStoreVectorizerLegacyPassPass
void initializeLoadStoreVectorizerLegacyPassPass(PassRegistry &)
llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:53
llvm::AArch64CC::NE
@ NE
Definition: AArch64BaseInfo.h:256
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:112
llvm::generic_gep_type_iterator
Definition: GetElementPtrTypeIterator.h:31
Insert
Vector Rotate Left Mask Mask Insert
Definition: README_P9.txt:112
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:90
llvm::isModOrRefSet
LLVM_NODISCARD bool isModOrRefSet(const ModRefInfo MRI)
Definition: AliasAnalysis.h:189
IntrinsicInst.h
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:779
checkNoWrapFlags
static bool checkNoWrapFlags(Instruction *I, bool Signed)
Definition: LoadStoreVectorizer.cpp:387
llvm::Function
Definition: Function.h:61
Pass.h
store
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM ID Predecessors according to mbb< bb27, 0x8b0a7c0 > Note ADDri is not a two address instruction its result reg1037 is an operand of the PHI node in bb76 and its operand reg1039 is the result of the PHI node We should treat it as a two address code and make sure the ADDri is scheduled after any node that reads reg1039 Use info(i.e. register scavenger) to assign it a free register to allow reuse the collector could move the objects and invalidate the derived pointer This is bad enough in the first but safe points can crop up unpredictably **array_addr i32 n y store obj obj **nth_el If the i64 division is lowered to a then a safe point array and nth_el no longer point into the correct object The fix for this is to copy address calculations so that dependent pointers are never live across safe point boundaries But the loads cannot be copied like this if there was an intervening store
Definition: README.txt:133
llvm::Type::getScalarType
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:319
llvm::SmallVector< Instruction *, 8 >
Statistic.h
llvm::APInt::getSExtValue
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1467
llvm::LegacyLegalizeActions::Bitcast
@ Bitcast
Perform the operation on a different, but equivalently sized type.
Definition: LegacyLegalizerInfo.h:54
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:168
llvm::Type::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:734
llvm::IRBuilder<>
MapVector.h
llvm::SPII::Store
@ Store
Definition: SparcInstrInfo.h:33
llvm::ScalarEvolution
The main scalar evolution driver.
Definition: ScalarEvolution.h:461
llvm::Instruction::hasNoUnsignedWrap
bool hasNoUnsignedWrap() const
Determine whether the no unsigned wrap flag is set.
Definition: Instruction.cpp:136
ValueTracking.h
Local.h
llvm::Instruction::insertBefore
void insertBefore(Instruction *InsertPos)
Insert an unlinked instruction into a basic block immediately before the specified instruction.
Definition: Instruction.cpp:84
llvm::LoadStoreVectorizerPass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition: LoadStoreVectorizer.cpp:255
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:151
APInt.h
llvm::getLoadStoreType
Type * getLoadStoreType(Value *I)
A helper function that returns the type of a load or store instruction.
Definition: Instructions.h:5346
llvm::Depth
@ Depth
Definition: SIMachineScheduler.h:36
ScalarEvolution.h
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(LoadStoreVectorizerLegacyPass, DEBUG_TYPE, "Vectorize load and Store instructions", false, false) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::APInt::getBitWidth
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1403
Module.h
llvm::getOrEnforceKnownAlignment
Align getOrEnforceKnownAlignment(Value *V, MaybeAlign PrefAlign, const DataLayout &DL, const Instruction *CxtI=nullptr, AssumptionCache *AC=nullptr, const DominatorTree *DT=nullptr)
Try to ensure that the alignment of V is at least PrefAlign bytes.
Definition: Local.cpp:1343
llvm::Instruction::comesBefore
bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
Definition: Instruction.cpp:111
llvm::isConsecutiveAccess
bool isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, ScalarEvolution &SE, bool CheckType=true)
Returns true if the memory operations A and B are consecutive.
Definition: LoopAccessAnalysis.cpp:1253
llvm::MapVector
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:37
llvm::SmallPtrSet< Instruction *, 16 >
STLExtras.h
llvm::SmallVectorImpl::pop_back_val
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:635
llvm::propagateMetadata
Instruction * propagateMetadata(Instruction *I, ArrayRef< Value * > VL)
Specifically, let Kinds = [MD_tbaa, MD_alias_scope, MD_noalias, MD_fpmath, MD_nontemporal,...
Definition: VectorUtils.cpp:726
llvm::LoadInst::getPointerOperand
Value * getPointerOperand()
Definition: Instructions.h:267
llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:491
llvm::gep_type_begin
gep_type_iterator gep_type_begin(const User *GEP)
Definition: GetElementPtrTypeIterator.h:139
llvm::Sched::Fast
@ Fast
Definition: TargetLowering.h:105
and
We currently generate a but we really shouldn eax ecx xorl edx divl ecx eax divl ecx movl eax ret A similar code sequence works for division We currently compile i32 v2 eax eax jo LBB1_2 and
Definition: README.txt:1271
llvm::FixedVectorType
Class to represent fixed width SIMD vectors.
Definition: DerivedTypes.h:525
llvm::LoadInst::getAlign
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:223
llvm::ArrayRef::empty
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:160
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
F
#define F(x, y, z)
Definition: MD5.cpp:56
KnownBits.h
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
AliasAnalysis.h
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
checkIfSafeAddSequence
static bool checkIfSafeAddSequence(const APInt &IdxDiff, Instruction *AddOpA, unsigned MatchingOpIdxA, Instruction *AddOpB, unsigned MatchingOpIdxB, bool Signed)
Definition: LoadStoreVectorizer.cpp:393
Instruction.h
llvm::Instruction::getOpcode
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
Definition: Instruction.h:160
llvm::all_of
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1551
llvm::APInt::isNegative
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:314
llvm::TargetTransformInfo::isLegalToVectorizeLoad
bool isLegalToVectorizeLoad(LoadInst *LI) const
Definition: TargetTransformInfo.cpp:987
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:31
Constants.h
llvm::AAResults
Definition: AliasAnalysis.h:456
E
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
llvm::User
Definition: User.h:44
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
InstrTypes.h
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
llvm::generic_gep_type_iterator::getOperand
Value * getOperand() const
Definition: GetElementPtrTypeIterator.h:78
llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:237
false
Definition: StackSlotColoring.cpp:142
llvm::MaybeAlign
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:109
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
First
into llvm powi allowing the code generator to produce balanced multiplication trees First
Definition: README.txt:54
llvm::Instruction
Definition: Instruction.h:45
llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition: Type.cpp:153
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:287
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::UndefValue::get
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1771
llvm::getUnderlyingObject
const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=6)
This method strips off any GEP address adjustments and pointer casts from the specified value,...
Definition: ValueTracking.cpp:4378
llvm::ScalarEvolutionWrapperPass
Definition: ScalarEvolution.h:2066
llvm::createLoadStoreVectorizerPass
Pass * createLoadStoreVectorizerPass()
Create a legacy pass manager instance of the LoadStoreVectorizer pass.
SmallPtrSet.h
llvm::Instruction::removeFromParent
void removeFromParent()
This method unlinks 'this' from the containing basic block, but does not delete it.
Definition: Instruction.cpp:74
llvm::APInt::sle
bool sle(const APInt &RHS) const
Signed less or equal comparison.
Definition: APInt.h:1091
llvm::FixedVectorType::get
static FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition: Type.cpp:648
llvm::StoreInst::getAlign
Align getAlign() const
Definition: Instructions.h:353
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::isModSet
LLVM_NODISCARD bool isModSet(const ModRefInfo MRI)
Definition: AliasAnalysis.h:196
llvm::AddressSpace
AddressSpace
Definition: NVPTXBaseInfo.h:21
llvm::ArrayRef::slice
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:195
Type.h
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
Operands
mir Rename Register Operands
Definition: MIRNamerPass.cpp:78
llvm::ScalarEvolution::getSCEV
const SCEV * getSCEV(Value *V)
Return a SCEV expression for the full generality of the specified expression.
Definition: ScalarEvolution.cpp:4066
getChainID
static ChainID getChainID(const Value *Ptr)
Definition: LoadStoreVectorizer.cpp:796
llvm::VectorType::isValidElementType
static bool isValidElementType(Type *ElemTy)
Return true if the specified type is valid as a element type.
Definition: Type.cpp:639
llvm::VectorType
Base class of all SIMD vector types.
Definition: DerivedTypes.h:389
VectorUtils.h
BasicBlock.h
LoadStoreVectorizer.h
llvm::LoadInst::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:273
llvm::SCEV
This class represents an analyzed expression in the program.
Definition: ScalarEvolution.h:78
llvm::instructions
inst_range instructions(Function *F)
Definition: InstIterator.h:133
llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:304
llvm::SCEVAAWrapperPass
Legacy wrapper pass to provide the SCEVAAResult object.
Definition: ScalarEvolutionAliasAnalysis.h:55
uint64_t
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2387
llvm::AssumptionAnalysis
A function analysis which provides an AssumptionCache.
Definition: AssumptionCache.h:169
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
StackAdjustedAlignment
static const unsigned StackAdjustedAlignment
Definition: LoadStoreVectorizer.cpp:97
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
MemoryLocation.h
llvm::APInt::negate
void negate()
Negate this APInt in place.
Definition: APInt.h:1385
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::GetElementPtrInst
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:928
llvm::is_contained
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1616
ArrayRef.h
llvm::computeKnownBits
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, OptimizationRemarkEmitter *ORE=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
Definition: ValueTracking.cpp:213
llvm::ModRefInfo
ModRefInfo
Flags indicating whether a memory access modifies or references memory.
Definition: AliasAnalysis.h:148
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:840
load
LLVM currently emits rax rax movq rax rax ret It could narrow the loads and stores to emit rax rax movq rax rax ret The trouble is that there is a TokenFactor between the store and the load
Definition: README.txt:1531
SI
StandardInstrumentations SI(Debug, VerifyEach)
llvm::IndexedInstrProf::HashT::Last
@ Last
llvm::TargetTransformInfo::isLegalToVectorizeLoadChain
bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
Definition: TargetTransformInfo.cpp:995
llvm::generic_gep_type_iterator::getIndexedType
Type * getIndexedType() const
Definition: GetElementPtrTypeIterator.h:72
iterator_range.h
llvm::TTI
TargetTransformInfo TTI
Definition: TargetTransformInfo.h:163
llvm::WinEH::EncodingType::CE
@ CE
Windows NT (Windows on ARM)
llvm::TargetTransformInfo::allowsMisalignedMemoryAccesses
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace=0, Align Alignment=Align(1), bool *Fast=nullptr) const
Determine if the target supports unaligned memory accesses.
Definition: TargetTransformInfo.cpp:517
llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:382
llvm::APInt::urem
APInt urem(const APInt &RHS) const
Unsigned remainder operation.
Definition: APInt.cpp:1656
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::AssumptionCacheTracker
An immutable pass that tracks lazily created AssumptionCache objects.
Definition: AssumptionCache.h:200
llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: APInt.h:32
llvm::BinaryOperator
Definition: InstrTypes.h:189
DataLayout.h
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:253
llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
llvm::APInt::zextOrSelf
APInt zextOrSelf(unsigned width) const
Zero extend or truncate to width.
Definition: APInt.cpp:990
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:41
llvm::ScalarEvolution::getConstant
const SCEV * getConstant(ConstantInt *V)
Definition: ScalarEvolution.cpp:446
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:256
llvm::CFGAnalyses
Represents analyses that only rely on functions' control flow.
Definition: PassManager.h:116
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:532
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:81
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
S
add sub stmia L5 ldr r0 bl L_printf $stub Instead of a and a wouldn t it be better to do three moves *Return an aggregate type is even return S
Definition: README.txt:210
llvm::Type::isPtrOrPtrVectorTy
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition: Type.h:234
llvm::APInt::ult
bool ult(const APInt &RHS) const
Unsigned less than comparison.
Definition: APInt.h:1037
llvm::TargetTransformInfo::getLoadVectorFactor
unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
Definition: TargetTransformInfo.cpp:1016
llvm::Value::getName
StringRef getName() const
Return a constant reference to the value's name.
Definition: Value.cpp:309
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:175
llvm::ArrayRef::front
const T & front() const
front - Get the first element.
Definition: ArrayRef.h:168
users
iv users
Definition: IVUsers.cpp:52
llvm::APInt::udiv
APInt udiv(const APInt &RHS) const
Unsigned division operation.
Definition: APInt.cpp:1563
llvm::APInt::clearBit
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition: APInt.h:1349
llvm::TargetTransformInfo::isLegalToVectorizeStore
bool isLegalToVectorizeStore(StoreInst *SI) const
Definition: TargetTransformInfo.cpp:991
MaxDepth
static const unsigned MaxDepth
Definition: InstCombineMulDivRem.cpp:869
llvm::Value::stripAndAccumulateInBoundsConstantOffsets
const Value * stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, APInt &Offset) const
This is a wrapper around stripAndAccumulateConstantOffsets with the in-bounds requirement set to fals...
Definition: Value.h:723
llvm::APInt::zext
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:950
DEBUG_TYPE
#define DEBUG_TYPE
Definition: LoadStoreVectorizer.cpp:91
Attributes.h
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
j
return j(j<< 16)
llvm::APInt::trunc
APInt trunc(unsigned width) const
Truncate to new width.
Definition: APInt.cpp:881
llvm::KnownBits
Definition: KnownBits.h:23
llvm::Type::getIntNTy
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition: Type.cpp:207
llvm::ScalarEvolution::getMinusSCEV
const SCEV * getMinusSCEV(const SCEV *LHS, const SCEV *RHS, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Return LHS-RHS.
Definition: ScalarEvolution.cpp:4199
llvm::APInt::isMinSignedValue
bool isMinSignedValue() const
Determine if this is the smallest signed value.
Definition: APInt.h:412
llvm::StoreInst::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:407
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:161
llvm::ArrayRef::begin
iterator begin() const
Definition: ArrayRef.h:153
llvm::Align::value
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
llvm::Type::isIntOrIntVectorTy
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:207
llvm::APInt::sextOrTrunc
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition: APInt.cpp:976
llvm::TargetTransformInfo::getLoadStoreVecRegBitWidth
unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const
Definition: TargetTransformInfo.cpp:983
Casting.h
llvm::post_order
iterator_range< po_iterator< T > > post_order(const T &G)
Definition: PostOrderIterator.h:188
Function.h
llvm::BitWidth
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:147
llvm::Type::getPointerTo
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
Definition: Type.cpp:738
INITIALIZE_PASS_END
INITIALIZE_PASS_END(LoadStoreVectorizerLegacyPass, DEBUG_TYPE, "Vectorize load and store instructions", false, false) Pass *llvm
Definition: LoadStoreVectorizer.cpp:230
llvm::MCID::Add
@ Add
Definition: MCInstrDesc.h:183
llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:252
llvm::Pass
Pass interface - Implemented by all 'passes'.
Definition: Pass.h:91
Instructions.h
PostOrderIterator.h
llvm::PreservedAnalyses::preserveSet
void preserveSet()
Mark an analysis set as preserved.
Definition: PassManager.h:191
llvm::User::getNumOperands
unsigned getNumOperands() const
Definition: User.h:191
SmallVector.h
User.h
llvm::StoreInst::getPointerOperand
Value * getPointerOperand()
Definition: Instructions.h:401
llvm::Instruction::hasNoSignedWrap
bool hasNoSignedWrap() const
Determine whether the no signed wrap flag is set.
Definition: Instruction.cpp:140
Dominators.h
llvm::AAResultsWrapperPass
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Definition: AliasAnalysis.h:1281
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:94
llvm::GlobalsAAWrapperPass
Legacy wrapper pass to provide the GlobalsAAResult object.
Definition: GlobalsModRef.h:143
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
TargetTransformInfo.h
llvm::TargetTransformInfo::getStoreVectorFactor
unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, unsigned ChainSizeInBytes, VectorType *VecTy) const
Definition: TargetTransformInfo.cpp:1023
llvm::TargetTransformInfo::isLegalToVectorizeStoreChain
bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, unsigned AddrSpace) const
Definition: TargetTransformInfo.cpp:1001
Vectorize.h
DerivedTypes.h
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:44
llvm::getLoadStorePointerOperand
const Value * getLoadStorePointerOperand(const Value *V)
A helper function that returns the pointer operand of a load or store instruction.
Definition: Instructions.h:5301
llvm::ScalarEvolution::getAddExpr
const SCEV * getAddExpr(SmallVectorImpl< const SCEV * > &Ops, SCEV::NoWrapFlags Flags=SCEV::FlagAnyWrap, unsigned Depth=0)
Get a canonical add expression, or something simpler if possible.
Definition: ScalarEvolution.cpp:2419
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:298
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:172
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
raw_ostream.h
Value.h
llvm::abs
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1284
InitializePasses.h
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
Debug.h
llvm::AAResults::getModRefInfo
ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc)
getModRefInfo (for call sites) - Return information about whether a particular call site modifies or ...
Definition: AliasAnalysis.cpp:218
llvm::ArrayRef::end
iterator end() const
Definition: ArrayRef.h:154
llvm::Use
A Use represents the edge between a Value definition and its users.
Definition: Use.h:44
llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:364
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:37