LLVM  9.0.0svn
MergeICmps.cpp
Go to the documentation of this file.
1 //===- MergeICmps.cpp - Optimize chains of integer comparisons ------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass turns chains of integer comparisons into memcmp (the memcmp is
10 // later typically inlined as a chain of efficient hardware comparisons). This
11 // typically benefits c++ member or nonmember operator==().
12 //
13 // The basic idea is to replace a longer chain of integer comparisons loaded
14 // from contiguous memory locations into a shorter chain of larger integer
15 // comparisons. Benefits are double:
16 // - There are less jumps, and therefore less opportunities for mispredictions
17 // and I-cache misses.
18 // - Code size is smaller, both because jumps are removed and because the
19 // encoding of a 2*n byte compare is smaller than that of two n-byte
20 // compares.
21 //
22 // Example:
23 //
24 // struct S {
25 // int a;
26 // char b;
27 // char c;
28 // uint16_t d;
29 // bool operator==(const S& o) const {
30 // return a == o.a && b == o.b && c == o.c && d == o.d;
31 // }
32 // };
33 //
34 // Is optimized as :
35 //
36 // bool S::operator==(const S& o) const {
37 // return memcmp(this, &o, 8) == 0;
38 // }
39 //
40 // Which will later be expanded (ExpandMemCmp) as a single 8-bytes icmp.
41 //
42 //===----------------------------------------------------------------------===//
43 
46 #include "llvm/Analysis/Loads.h"
49 #include "llvm/IR/Dominators.h"
50 #include "llvm/IR/Function.h"
51 #include "llvm/IR/IRBuilder.h"
52 #include "llvm/Pass.h"
53 #include "llvm/Transforms/Scalar.h"
56 #include <algorithm>
57 #include <numeric>
58 #include <utility>
59 #include <vector>
60 
61 using namespace llvm;
62 
63 namespace {
64 
65 #define DEBUG_TYPE "mergeicmps"
66 
67 // Returns true if the instruction is a simple load or a simple store
68 static bool isSimpleLoadOrStore(const Instruction *I) {
69  if (const LoadInst *LI = dyn_cast<LoadInst>(I))
70  return LI->isSimple();
71  if (const StoreInst *SI = dyn_cast<StoreInst>(I))
72  return SI->isSimple();
73  return false;
74 }
75 
76 // A BCE atom "Binary Compare Expression Atom" represents an integer load
77 // that is a constant offset from a base value, e.g. `a` or `o.c` in the example
78 // at the top.
79 struct BCEAtom {
80  BCEAtom() = default;
81  BCEAtom(GetElementPtrInst *GEP, LoadInst *LoadI, int BaseId, APInt Offset)
82  : GEP(GEP), LoadI(LoadI), BaseId(BaseId), Offset(Offset) {}
83 
84  // We want to order BCEAtoms by (Base, Offset). However we cannot use
85  // the pointer values for Base because these are non-deterministic.
86  // To make sure that the sort order is stable, we first assign to each atom
87  // base value an index based on its order of appearance in the chain of
88  // comparisons. We call this index `BaseOrdering`. For example, for:
89  // b[3] == c[2] && a[1] == d[1] && b[4] == c[3]
90  // | block 1 | | block 2 | | block 3 |
91  // b gets assigned index 0 and a index 1, because b appears as LHS in block 1,
92  // which is before block 2.
93  // We then sort by (BaseOrdering[LHS.Base()], LHS.Offset), which is stable.
94  bool operator<(const BCEAtom &O) const {
95  return BaseId != O.BaseId ? BaseId < O.BaseId : Offset.slt(O.Offset);
96  }
97 
98  GetElementPtrInst *GEP = nullptr;
99  LoadInst *LoadI = nullptr;
100  unsigned BaseId = 0;
101  APInt Offset;
102 };
103 
104 // A class that assigns increasing ids to values in the order in which they are
105 // seen. See comment in `BCEAtom::operator<()``.
106 class BaseIdentifier {
107 public:
108  // Returns the id for value `Base`, after assigning one if `Base` has not been
109  // seen before.
110  int getBaseId(const Value *Base) {
111  assert(Base && "invalid base");
112  const auto Insertion = BaseToIndex.try_emplace(Base, Order);
113  if (Insertion.second)
114  ++Order;
115  return Insertion.first->second;
116  }
117 
118 private:
119  unsigned Order = 1;
120  DenseMap<const Value*, int> BaseToIndex;
121 };
122 
123 // If this value is a load from a constant offset w.r.t. a base address, and
124 // there are no other users of the load or address, returns the base address and
125 // the offset.
126 BCEAtom visitICmpLoadOperand(Value *const Val, BaseIdentifier &BaseId) {
127  auto *const LoadI = dyn_cast<LoadInst>(Val);
128  if (!LoadI)
129  return {};
130  LLVM_DEBUG(dbgs() << "load\n");
131  if (LoadI->isUsedOutsideOfBlock(LoadI->getParent())) {
132  LLVM_DEBUG(dbgs() << "used outside of block\n");
133  return {};
134  }
135  // Do not optimize atomic loads to non-atomic memcmp
136  if (!LoadI->isSimple()) {
137  LLVM_DEBUG(dbgs() << "volatile or atomic\n");
138  return {};
139  }
140  Value *const Addr = LoadI->getOperand(0);
141  auto *const GEP = dyn_cast<GetElementPtrInst>(Addr);
142  if (!GEP)
143  return {};
144  LLVM_DEBUG(dbgs() << "GEP\n");
145  if (GEP->isUsedOutsideOfBlock(LoadI->getParent())) {
146  LLVM_DEBUG(dbgs() << "used outside of block\n");
147  return {};
148  }
149  const auto &DL = GEP->getModule()->getDataLayout();
150  if (!isDereferenceablePointer(GEP, DL)) {
151  LLVM_DEBUG(dbgs() << "not dereferenceable\n");
152  // We need to make sure that we can do comparison in any order, so we
153  // require memory to be unconditionnally dereferencable.
154  return {};
155  }
156  APInt Offset = APInt(DL.getPointerTypeSizeInBits(GEP->getType()), 0);
157  if (!GEP->accumulateConstantOffset(DL, Offset))
158  return {};
159  return BCEAtom(GEP, LoadI, BaseId.getBaseId(GEP->getPointerOperand()),
160  Offset);
161 }
162 
163 // A basic block with a comparison between two BCE atoms, e.g. `a == o.a` in the
164 // example at the top.
165 // The block might do extra work besides the atom comparison, in which case
166 // doesOtherWork() returns true. Under some conditions, the block can be
167 // split into the atom comparison part and the "other work" part
168 // (see canSplit()).
169 // Note: the terminology is misleading: the comparison is symmetric, so there
170 // is no real {l/r}hs. What we want though is to have the same base on the
171 // left (resp. right), so that we can detect consecutive loads. To ensure this
172 // we put the smallest atom on the left.
173 class BCECmpBlock {
174  public:
175  BCECmpBlock() {}
176 
177  BCECmpBlock(BCEAtom L, BCEAtom R, int SizeBits)
178  : Lhs_(L), Rhs_(R), SizeBits_(SizeBits) {
179  if (Rhs_ < Lhs_) std::swap(Rhs_, Lhs_);
180  }
181 
182  bool IsValid() const { return Lhs_.BaseId != 0 && Rhs_.BaseId != 0; }
183 
184  // Assert the block is consistent: If valid, it should also have
185  // non-null members besides Lhs_ and Rhs_.
186  void AssertConsistent() const {
187  if (IsValid()) {
188  assert(BB);
189  assert(CmpI);
190  assert(BranchI);
191  }
192  }
193 
194  const BCEAtom &Lhs() const { return Lhs_; }
195  const BCEAtom &Rhs() const { return Rhs_; }
196  int SizeBits() const { return SizeBits_; }
197 
198  // Returns true if the block does other works besides comparison.
199  bool doesOtherWork() const;
200 
201  // Returns true if the non-BCE-cmp instructions can be separated from BCE-cmp
202  // instructions in the block.
203  bool canSplit(AliasAnalysis *AA) const;
204 
205  // Return true if this all the relevant instructions in the BCE-cmp-block can
206  // be sunk below this instruction. By doing this, we know we can separate the
207  // BCE-cmp-block instructions from the non-BCE-cmp-block instructions in the
208  // block.
209  bool canSinkBCECmpInst(const Instruction *, DenseSet<Instruction *> &,
210  AliasAnalysis *AA) const;
211 
212  // We can separate the BCE-cmp-block instructions and the non-BCE-cmp-block
213  // instructions. Split the old block and move all non-BCE-cmp-insts into the
214  // new parent block.
215  void split(BasicBlock *NewParent, AliasAnalysis *AA) const;
216 
217  // The basic block where this comparison happens.
218  BasicBlock *BB = nullptr;
219  // The ICMP for this comparison.
220  ICmpInst *CmpI = nullptr;
221  // The terminating branch.
222  BranchInst *BranchI = nullptr;
223  // The block requires splitting.
224  bool RequireSplit = false;
225 
226 private:
227  BCEAtom Lhs_;
228  BCEAtom Rhs_;
229  int SizeBits_ = 0;
230 };
231 
232 bool BCECmpBlock::canSinkBCECmpInst(const Instruction *Inst,
233  DenseSet<Instruction *> &BlockInsts,
234  AliasAnalysis *AA) const {
235  // If this instruction has side effects and its in middle of the BCE cmp block
236  // instructions, then bail for now.
237  if (Inst->mayHaveSideEffects()) {
238  // Bail if this is not a simple load or store
239  if (!isSimpleLoadOrStore(Inst))
240  return false;
241  // Disallow stores that might alias the BCE operands
242  MemoryLocation LLoc = MemoryLocation::get(Lhs_.LoadI);
243  MemoryLocation RLoc = MemoryLocation::get(Rhs_.LoadI);
244  if (isModSet(AA->getModRefInfo(Inst, LLoc)) ||
245  isModSet(AA->getModRefInfo(Inst, RLoc)))
246  return false;
247  }
248  // Make sure this instruction does not use any of the BCE cmp block
249  // instructions as operand.
250  for (auto BI : BlockInsts) {
251  if (is_contained(Inst->operands(), BI))
252  return false;
253  }
254  return true;
255 }
256 
257 void BCECmpBlock::split(BasicBlock *NewParent, AliasAnalysis *AA) const {
258  DenseSet<Instruction *> BlockInsts(
259  {Lhs_.GEP, Rhs_.GEP, Lhs_.LoadI, Rhs_.LoadI, CmpI, BranchI});
261  for (Instruction &Inst : *BB) {
262  if (BlockInsts.count(&Inst))
263  continue;
264  assert(canSinkBCECmpInst(&Inst, BlockInsts, AA) &&
265  "Split unsplittable block");
266  // This is a non-BCE-cmp-block instruction. And it can be separated
267  // from the BCE-cmp-block instruction.
268  OtherInsts.push_back(&Inst);
269  }
270 
271  // Do the actual spliting.
272  for (Instruction *Inst : reverse(OtherInsts)) {
273  Inst->moveBefore(&*NewParent->begin());
274  }
275 }
276 
277 bool BCECmpBlock::canSplit(AliasAnalysis *AA) const {
278  DenseSet<Instruction *> BlockInsts(
279  {Lhs_.GEP, Rhs_.GEP, Lhs_.LoadI, Rhs_.LoadI, CmpI, BranchI});
280  for (Instruction &Inst : *BB) {
281  if (!BlockInsts.count(&Inst)) {
282  if (!canSinkBCECmpInst(&Inst, BlockInsts, AA))
283  return false;
284  }
285  }
286  return true;
287 }
288 
289 bool BCECmpBlock::doesOtherWork() const {
290  AssertConsistent();
291  // All the instructions we care about in the BCE cmp block.
292  DenseSet<Instruction *> BlockInsts(
293  {Lhs_.GEP, Rhs_.GEP, Lhs_.LoadI, Rhs_.LoadI, CmpI, BranchI});
294  // TODO(courbet): Can we allow some other things ? This is very conservative.
295  // We might be able to get away with anything does not have any side
296  // effects outside of the basic block.
297  // Note: The GEPs and/or loads are not necessarily in the same block.
298  for (const Instruction &Inst : *BB) {
299  if (!BlockInsts.count(&Inst))
300  return true;
301  }
302  return false;
303 }
304 
305 // Visit the given comparison. If this is a comparison between two valid
306 // BCE atoms, returns the comparison.
307 BCECmpBlock visitICmp(const ICmpInst *const CmpI,
308  const ICmpInst::Predicate ExpectedPredicate,
309  BaseIdentifier &BaseId) {
310  // The comparison can only be used once:
311  // - For intermediate blocks, as a branch condition.
312  // - For the final block, as an incoming value for the Phi.
313  // If there are any other uses of the comparison, we cannot merge it with
314  // other comparisons as we would create an orphan use of the value.
315  if (!CmpI->hasOneUse()) {
316  LLVM_DEBUG(dbgs() << "cmp has several uses\n");
317  return {};
318  }
319  if (CmpI->getPredicate() != ExpectedPredicate)
320  return {};
321  LLVM_DEBUG(dbgs() << "cmp "
322  << (ExpectedPredicate == ICmpInst::ICMP_EQ ? "eq" : "ne")
323  << "\n");
324  auto Lhs = visitICmpLoadOperand(CmpI->getOperand(0), BaseId);
325  if (!Lhs.BaseId)
326  return {};
327  auto Rhs = visitICmpLoadOperand(CmpI->getOperand(1), BaseId);
328  if (!Rhs.BaseId)
329  return {};
330  const auto &DL = CmpI->getModule()->getDataLayout();
331  return BCECmpBlock(std::move(Lhs), std::move(Rhs),
332  DL.getTypeSizeInBits(CmpI->getOperand(0)->getType()));
333 }
334 
335 // Visit the given comparison block. If this is a comparison between two valid
336 // BCE atoms, returns the comparison.
337 BCECmpBlock visitCmpBlock(Value *const Val, BasicBlock *const Block,
338  const BasicBlock *const PhiBlock,
339  BaseIdentifier &BaseId) {
340  if (Block->empty()) return {};
341  auto *const BranchI = dyn_cast<BranchInst>(Block->getTerminator());
342  if (!BranchI) return {};
343  LLVM_DEBUG(dbgs() << "branch\n");
344  if (BranchI->isUnconditional()) {
345  // In this case, we expect an incoming value which is the result of the
346  // comparison. This is the last link in the chain of comparisons (note
347  // that this does not mean that this is the last incoming value, blocks
348  // can be reordered).
349  auto *const CmpI = dyn_cast<ICmpInst>(Val);
350  if (!CmpI) return {};
351  LLVM_DEBUG(dbgs() << "icmp\n");
352  auto Result = visitICmp(CmpI, ICmpInst::ICMP_EQ, BaseId);
353  Result.CmpI = CmpI;
354  Result.BranchI = BranchI;
355  return Result;
356  } else {
357  // In this case, we expect a constant incoming value (the comparison is
358  // chained).
359  const auto *const Const = dyn_cast<ConstantInt>(Val);
360  LLVM_DEBUG(dbgs() << "const\n");
361  if (!Const->isZero()) return {};
362  LLVM_DEBUG(dbgs() << "false\n");
363  auto *const CmpI = dyn_cast<ICmpInst>(BranchI->getCondition());
364  if (!CmpI) return {};
365  LLVM_DEBUG(dbgs() << "icmp\n");
366  assert(BranchI->getNumSuccessors() == 2 && "expecting a cond branch");
367  BasicBlock *const FalseBlock = BranchI->getSuccessor(1);
368  auto Result = visitICmp(
369  CmpI, FalseBlock == PhiBlock ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_NE,
370  BaseId);
371  Result.CmpI = CmpI;
372  Result.BranchI = BranchI;
373  return Result;
374  }
375  return {};
376 }
377 
378 static inline void enqueueBlock(std::vector<BCECmpBlock> &Comparisons,
379  BCECmpBlock &Comparison) {
380  LLVM_DEBUG(dbgs() << "Block '" << Comparison.BB->getName()
381  << "': Found cmp of " << Comparison.SizeBits()
382  << " bits between " << Comparison.Lhs().BaseId << " + "
383  << Comparison.Lhs().Offset << " and "
384  << Comparison.Rhs().BaseId << " + "
385  << Comparison.Rhs().Offset << "\n");
386  LLVM_DEBUG(dbgs() << "\n");
387  Comparisons.push_back(Comparison);
388 }
389 
390 // A chain of comparisons.
391 class BCECmpChain {
392  public:
393  BCECmpChain(const std::vector<BasicBlock *> &Blocks, PHINode &Phi,
394  AliasAnalysis *AA);
395 
396  int size() const { return Comparisons_.size(); }
397 
398 #ifdef MERGEICMPS_DOT_ON
399  void dump() const;
400 #endif // MERGEICMPS_DOT_ON
401 
402  bool simplify(const TargetLibraryInfo *const TLI, AliasAnalysis *AA,
403  DomTreeUpdater &DTU);
404 
405 private:
406  static bool IsContiguous(const BCECmpBlock &First,
407  const BCECmpBlock &Second) {
408  return First.Lhs().BaseId == Second.Lhs().BaseId &&
409  First.Rhs().BaseId == Second.Rhs().BaseId &&
410  First.Lhs().Offset + First.SizeBits() / 8 == Second.Lhs().Offset &&
411  First.Rhs().Offset + First.SizeBits() / 8 == Second.Rhs().Offset;
412  }
413 
414  PHINode &Phi_;
415  std::vector<BCECmpBlock> Comparisons_;
416  // The original entry block (before sorting);
417  BasicBlock *EntryBlock_;
418 };
419 
420 BCECmpChain::BCECmpChain(const std::vector<BasicBlock *> &Blocks, PHINode &Phi,
421  AliasAnalysis *AA)
422  : Phi_(Phi) {
423  assert(!Blocks.empty() && "a chain should have at least one block");
424  // Now look inside blocks to check for BCE comparisons.
425  std::vector<BCECmpBlock> Comparisons;
426  BaseIdentifier BaseId;
427  for (size_t BlockIdx = 0; BlockIdx < Blocks.size(); ++BlockIdx) {
428  BasicBlock *const Block = Blocks[BlockIdx];
429  assert(Block && "invalid block");
430  BCECmpBlock Comparison = visitCmpBlock(Phi.getIncomingValueForBlock(Block),
431  Block, Phi.getParent(), BaseId);
432  Comparison.BB = Block;
433  if (!Comparison.IsValid()) {
434  LLVM_DEBUG(dbgs() << "chain with invalid BCECmpBlock, no merge.\n");
435  return;
436  }
437  if (Comparison.doesOtherWork()) {
438  LLVM_DEBUG(dbgs() << "block '" << Comparison.BB->getName()
439  << "' does extra work besides compare\n");
440  if (Comparisons.empty()) {
441  // This is the initial block in the chain, in case this block does other
442  // work, we can try to split the block and move the irrelevant
443  // instructions to the predecessor.
444  //
445  // If this is not the initial block in the chain, splitting it wont
446  // work.
447  //
448  // As once split, there will still be instructions before the BCE cmp
449  // instructions that do other work in program order, i.e. within the
450  // chain before sorting. Unless we can abort the chain at this point
451  // and start anew.
452  //
453  // NOTE: we only handle blocks a with single predecessor for now.
454  if (Comparison.canSplit(AA)) {
455  LLVM_DEBUG(dbgs()
456  << "Split initial block '" << Comparison.BB->getName()
457  << "' that does extra work besides compare\n");
458  Comparison.RequireSplit = true;
459  enqueueBlock(Comparisons, Comparison);
460  } else {
461  LLVM_DEBUG(dbgs()
462  << "ignoring initial block '" << Comparison.BB->getName()
463  << "' that does extra work besides compare\n");
464  }
465  continue;
466  }
467  // TODO(courbet): Right now we abort the whole chain. We could be
468  // merging only the blocks that don't do other work and resume the
469  // chain from there. For example:
470  // if (a[0] == b[0]) { // bb1
471  // if (a[1] == b[1]) { // bb2
472  // some_value = 3; //bb3
473  // if (a[2] == b[2]) { //bb3
474  // do a ton of stuff //bb4
475  // }
476  // }
477  // }
478  //
479  // This is:
480  //
481  // bb1 --eq--> bb2 --eq--> bb3* -eq--> bb4 --+
482  // \ \ \ \
483  // ne ne ne \
484  // \ \ \ v
485  // +------------+-----------+----------> bb_phi
486  //
487  // We can only merge the first two comparisons, because bb3* does
488  // "other work" (setting some_value to 3).
489  // We could still merge bb1 and bb2 though.
490  return;
491  }
492  enqueueBlock(Comparisons, Comparison);
493  }
494 
495  // It is possible we have no suitable comparison to merge.
496  if (Comparisons.empty()) {
497  LLVM_DEBUG(dbgs() << "chain with no BCE basic blocks, no merge\n");
498  return;
499  }
500  EntryBlock_ = Comparisons[0].BB;
501  Comparisons_ = std::move(Comparisons);
502 #ifdef MERGEICMPS_DOT_ON
503  errs() << "BEFORE REORDERING:\n\n";
504  dump();
505 #endif // MERGEICMPS_DOT_ON
506  // Reorder blocks by LHS. We can do that without changing the
507  // semantics because we are only accessing dereferencable memory.
508  llvm::sort(Comparisons_,
509  [](const BCECmpBlock &LhsBlock, const BCECmpBlock &RhsBlock) {
510  return std::tie(LhsBlock.Lhs(), LhsBlock.Rhs()) <
511  std::tie(RhsBlock.Lhs(), RhsBlock.Rhs());
512  });
513 #ifdef MERGEICMPS_DOT_ON
514  errs() << "AFTER REORDERING:\n\n";
515  dump();
516 #endif // MERGEICMPS_DOT_ON
517 }
518 
519 #ifdef MERGEICMPS_DOT_ON
520 void BCECmpChain::dump() const {
521  errs() << "digraph dag {\n";
522  errs() << " graph [bgcolor=transparent];\n";
523  errs() << " node [color=black,style=filled,fillcolor=lightyellow];\n";
524  errs() << " edge [color=black];\n";
525  for (size_t I = 0; I < Comparisons_.size(); ++I) {
526  const auto &Comparison = Comparisons_[I];
527  errs() << " \"" << I << "\" [label=\"%"
528  << Comparison.Lhs().Base()->getName() << " + "
529  << Comparison.Lhs().Offset << " == %"
530  << Comparison.Rhs().Base()->getName() << " + "
531  << Comparison.Rhs().Offset << " (" << (Comparison.SizeBits() / 8)
532  << " bytes)\"];\n";
533  const Value *const Val = Phi_.getIncomingValueForBlock(Comparison.BB);
534  if (I > 0) errs() << " \"" << (I - 1) << "\" -> \"" << I << "\";\n";
535  errs() << " \"" << I << "\" -> \"Phi\" [label=\"" << *Val << "\"];\n";
536  }
537  errs() << " \"Phi\" [label=\"Phi\"];\n";
538  errs() << "}\n\n";
539 }
540 #endif // MERGEICMPS_DOT_ON
541 
542 namespace {
543 
544 // A class to compute the name of a set of merged basic blocks.
545 // This is optimized for the common case of no block names.
546 class MergedBlockName {
547  // Storage for the uncommon case of several named blocks.
548  SmallString<16> Scratch;
549 
550 public:
551  explicit MergedBlockName(ArrayRef<BCECmpBlock> Comparisons)
552  : Name(makeName(Comparisons)) {}
553  const StringRef Name;
554 
555 private:
556  StringRef makeName(ArrayRef<BCECmpBlock> Comparisons) {
557  assert(!Comparisons.empty() && "no basic block");
558  // Fast path: only one block, or no names at all.
559  if (Comparisons.size() == 1)
560  return Comparisons[0].BB->getName();
561  const int size = std::accumulate(Comparisons.begin(), Comparisons.end(), 0,
562  [](int i, const BCECmpBlock &Cmp) {
563  return i + Cmp.BB->getName().size();
564  });
565  if (size == 0)
566  return StringRef("", 0);
567 
568  // Slow path: at least two blocks, at least one block with a name.
569  Scratch.clear();
570  // We'll have `size` bytes for name and `Comparisons.size() - 1` bytes for
571  // separators.
572  Scratch.reserve(size + Comparisons.size() - 1);
573  const auto append = [this](StringRef str) {
574  Scratch.append(str.begin(), str.end());
575  };
576  append(Comparisons[0].BB->getName());
577  for (int I = 1, E = Comparisons.size(); I < E; ++I) {
578  const BasicBlock *const BB = Comparisons[I].BB;
579  if (!BB->getName().empty()) {
580  append("+");
581  append(BB->getName());
582  }
583  }
584  return StringRef(Scratch);
585  }
586 };
587 } // namespace
588 
589 // Merges the given contiguous comparison blocks into one memcmp block.
590 static BasicBlock *mergeComparisons(ArrayRef<BCECmpBlock> Comparisons,
591  BasicBlock *const InsertBefore,
592  BasicBlock *const NextCmpBlock,
593  PHINode &Phi,
594  const TargetLibraryInfo *const TLI,
595  AliasAnalysis *AA, DomTreeUpdater &DTU) {
596  assert(!Comparisons.empty() && "merging zero comparisons");
597  LLVMContext &Context = NextCmpBlock->getContext();
598  const BCECmpBlock &FirstCmp = Comparisons[0];
599 
600  // Create a new cmp block before next cmp block.
601  BasicBlock *const BB =
602  BasicBlock::Create(Context, MergedBlockName(Comparisons).Name,
603  NextCmpBlock->getParent(), InsertBefore);
604  IRBuilder<> Builder(BB);
605  // Add the GEPs from the first BCECmpBlock.
606  Value *const Lhs = Builder.Insert(FirstCmp.Lhs().GEP->clone());
607  Value *const Rhs = Builder.Insert(FirstCmp.Rhs().GEP->clone());
608 
609  Value *IsEqual = nullptr;
610  LLVM_DEBUG(dbgs() << "Merging " << Comparisons.size() << " comparisons -> "
611  << BB->getName() << "\n");
612  if (Comparisons.size() == 1) {
613  LLVM_DEBUG(dbgs() << "Only one comparison, updating branches\n");
614  Value *const LhsLoad =
615  Builder.CreateLoad(FirstCmp.Lhs().LoadI->getType(), Lhs);
616  Value *const RhsLoad =
617  Builder.CreateLoad(FirstCmp.Rhs().LoadI->getType(), Rhs);
618  // There are no blocks to merge, just do the comparison.
619  IsEqual = Builder.CreateICmpEQ(LhsLoad, RhsLoad);
620  } else {
621  // If there is one block that requires splitting, we do it now, i.e.
622  // just before we know we will collapse the chain. The instructions
623  // can be executed before any of the instructions in the chain.
624  const auto ToSplit =
625  std::find_if(Comparisons.begin(), Comparisons.end(),
626  [](const BCECmpBlock &B) { return B.RequireSplit; });
627  if (ToSplit != Comparisons.end()) {
628  LLVM_DEBUG(dbgs() << "Splitting non_BCE work to header\n");
629  ToSplit->split(BB, AA);
630  }
631 
632  const unsigned TotalSizeBits = std::accumulate(
633  Comparisons.begin(), Comparisons.end(), 0u,
634  [](int Size, const BCECmpBlock &C) { return Size + C.SizeBits(); });
635 
636  // Create memcmp() == 0.
637  const auto &DL = Phi.getModule()->getDataLayout();
638  Value *const MemCmpCall = emitMemCmp(
639  Lhs, Rhs,
640  ConstantInt::get(DL.getIntPtrType(Context), TotalSizeBits / 8), Builder,
641  DL, TLI);
642  IsEqual = Builder.CreateICmpEQ(
643  MemCmpCall, ConstantInt::get(Type::getInt32Ty(Context), 0));
644  }
645 
646  BasicBlock *const PhiBB = Phi.getParent();
647  // Add a branch to the next basic block in the chain.
648  if (NextCmpBlock == PhiBB) {
649  // Continue to phi, passing it the comparison result.
650  Builder.CreateBr(PhiBB);
651  Phi.addIncoming(IsEqual, BB);
652  DTU.applyUpdates({{DominatorTree::Insert, BB, PhiBB}});
653  } else {
654  // Continue to next block if equal, exit to phi else.
655  Builder.CreateCondBr(IsEqual, NextCmpBlock, PhiBB);
656  Phi.addIncoming(ConstantInt::getFalse(Context), BB);
657  DTU.applyUpdates({{DominatorTree::Insert, BB, NextCmpBlock},
658  {DominatorTree::Insert, BB, PhiBB}});
659  }
660  return BB;
661 }
662 
663 bool BCECmpChain::simplify(const TargetLibraryInfo *const TLI,
664  AliasAnalysis *AA, DomTreeUpdater &DTU) {
665  assert(Comparisons_.size() >= 2 && "simplifying trivial BCECmpChain");
666  // First pass to check if there is at least one merge. If not, we don't do
667  // anything and we keep analysis passes intact.
668  const auto AtLeastOneMerged = [this]() {
669  for (size_t I = 1; I < Comparisons_.size(); ++I) {
670  if (IsContiguous(Comparisons_[I - 1], Comparisons_[I]))
671  return true;
672  }
673  return false;
674  };
675  if (!AtLeastOneMerged())
676  return false;
677 
678  LLVM_DEBUG(dbgs() << "Simplifying comparison chain starting at block "
679  << EntryBlock_->getName() << "\n");
680 
681  // Effectively merge blocks. We go in the reverse direction from the phi block
682  // so that the next block is always available to branch to.
683  const auto mergeRange = [this, TLI, AA, &DTU](int I, int Num,
684  BasicBlock *InsertBefore,
685  BasicBlock *Next) {
686  return mergeComparisons(makeArrayRef(Comparisons_).slice(I, Num),
687  InsertBefore, Next, Phi_, TLI, AA, DTU);
688  };
689  int NumMerged = 1;
690  BasicBlock *NextCmpBlock = Phi_.getParent();
691  for (int I = static_cast<int>(Comparisons_.size()) - 2; I >= 0; --I) {
692  if (IsContiguous(Comparisons_[I], Comparisons_[I + 1])) {
693  LLVM_DEBUG(dbgs() << "Merging block " << Comparisons_[I].BB->getName()
694  << " into " << Comparisons_[I + 1].BB->getName()
695  << "\n");
696  ++NumMerged;
697  } else {
698  NextCmpBlock = mergeRange(I + 1, NumMerged, NextCmpBlock, NextCmpBlock);
699  NumMerged = 1;
700  }
701  }
702  // Insert the entry block for the new chain before the old entry block.
703  // If the old entry block was the function entry, this ensures that the new
704  // entry can become the function entry.
705  NextCmpBlock = mergeRange(0, NumMerged, EntryBlock_, NextCmpBlock);
706 
707  // Replace the original cmp chain with the new cmp chain by pointing all
708  // predecessors of EntryBlock_ to NextCmpBlock instead. This makes all cmp
709  // blocks in the old chain unreachable.
710  while (!pred_empty(EntryBlock_)) {
711  BasicBlock* const Pred = *pred_begin(EntryBlock_);
712  LLVM_DEBUG(dbgs() << "Updating jump into old chain from " << Pred->getName()
713  << "\n");
714  Pred->getTerminator()->replaceUsesOfWith(EntryBlock_, NextCmpBlock);
715  DTU.applyUpdates({{DominatorTree::Delete, Pred, EntryBlock_},
716  {DominatorTree::Insert, Pred, NextCmpBlock}});
717  }
718 
719  // If the old cmp chain was the function entry, we need to update the function
720  // entry.
721  const bool ChainEntryIsFnEntry =
722  (EntryBlock_ == &EntryBlock_->getParent()->getEntryBlock());
723  if (ChainEntryIsFnEntry && DTU.hasDomTree()) {
724  LLVM_DEBUG(dbgs() << "Changing function entry from "
725  << EntryBlock_->getName() << " to "
726  << NextCmpBlock->getName() << "\n");
727  DTU.getDomTree().setNewRoot(NextCmpBlock);
728  DTU.applyUpdates({{DominatorTree::Delete, NextCmpBlock, EntryBlock_}});
729  }
730  EntryBlock_ = nullptr;
731 
732  // Delete merged blocks. This also removes incoming values in phi.
734  for (auto &Cmp : Comparisons_) {
735  LLVM_DEBUG(dbgs() << "Deleting merged block " << Cmp.BB->getName() << "\n");
736  DeadBlocks.push_back(Cmp.BB);
737  }
738  DeleteDeadBlocks(DeadBlocks, &DTU);
739 
740  Comparisons_.clear();
741  return true;
742 }
743 
744 std::vector<BasicBlock *> getOrderedBlocks(PHINode &Phi,
745  BasicBlock *const LastBlock,
746  int NumBlocks) {
747  // Walk up from the last block to find other blocks.
748  std::vector<BasicBlock *> Blocks(NumBlocks);
749  assert(LastBlock && "invalid last block");
750  BasicBlock *CurBlock = LastBlock;
751  for (int BlockIndex = NumBlocks - 1; BlockIndex > 0; --BlockIndex) {
752  if (CurBlock->hasAddressTaken()) {
753  // Somebody is jumping to the block through an address, all bets are
754  // off.
755  LLVM_DEBUG(dbgs() << "skip: block " << BlockIndex
756  << " has its address taken\n");
757  return {};
758  }
759  Blocks[BlockIndex] = CurBlock;
760  auto *SinglePredecessor = CurBlock->getSinglePredecessor();
761  if (!SinglePredecessor) {
762  // The block has two or more predecessors.
763  LLVM_DEBUG(dbgs() << "skip: block " << BlockIndex
764  << " has two or more predecessors\n");
765  return {};
766  }
767  if (Phi.getBasicBlockIndex(SinglePredecessor) < 0) {
768  // The block does not link back to the phi.
769  LLVM_DEBUG(dbgs() << "skip: block " << BlockIndex
770  << " does not link back to the phi\n");
771  return {};
772  }
773  CurBlock = SinglePredecessor;
774  }
775  Blocks[0] = CurBlock;
776  return Blocks;
777 }
778 
779 bool processPhi(PHINode &Phi, const TargetLibraryInfo *const TLI,
780  AliasAnalysis *AA, DomTreeUpdater &DTU) {
781  LLVM_DEBUG(dbgs() << "processPhi()\n");
782  if (Phi.getNumIncomingValues() <= 1) {
783  LLVM_DEBUG(dbgs() << "skip: only one incoming value in phi\n");
784  return false;
785  }
786  // We are looking for something that has the following structure:
787  // bb1 --eq--> bb2 --eq--> bb3 --eq--> bb4 --+
788  // \ \ \ \
789  // ne ne ne \
790  // \ \ \ v
791  // +------------+-----------+----------> bb_phi
792  //
793  // - The last basic block (bb4 here) must branch unconditionally to bb_phi.
794  // It's the only block that contributes a non-constant value to the Phi.
795  // - All other blocks (b1, b2, b3) must have exactly two successors, one of
796  // them being the phi block.
797  // - All intermediate blocks (bb2, bb3) must have only one predecessor.
798  // - Blocks cannot do other work besides the comparison, see doesOtherWork()
799 
800  // The blocks are not necessarily ordered in the phi, so we start from the
801  // last block and reconstruct the order.
802  BasicBlock *LastBlock = nullptr;
803  for (unsigned I = 0; I < Phi.getNumIncomingValues(); ++I) {
804  if (isa<ConstantInt>(Phi.getIncomingValue(I))) continue;
805  if (LastBlock) {
806  // There are several non-constant values.
807  LLVM_DEBUG(dbgs() << "skip: several non-constant values\n");
808  return false;
809  }
810  if (!isa<ICmpInst>(Phi.getIncomingValue(I)) ||
811  cast<ICmpInst>(Phi.getIncomingValue(I))->getParent() !=
812  Phi.getIncomingBlock(I)) {
813  // Non-constant incoming value is not from a cmp instruction or not
814  // produced by the last block. We could end up processing the value
815  // producing block more than once.
816  //
817  // This is an uncommon case, so we bail.
818  LLVM_DEBUG(
819  dbgs()
820  << "skip: non-constant value not from cmp or not from last block.\n");
821  return false;
822  }
823  LastBlock = Phi.getIncomingBlock(I);
824  }
825  if (!LastBlock) {
826  // There is no non-constant block.
827  LLVM_DEBUG(dbgs() << "skip: no non-constant block\n");
828  return false;
829  }
830  if (LastBlock->getSingleSuccessor() != Phi.getParent()) {
831  LLVM_DEBUG(dbgs() << "skip: last block non-phi successor\n");
832  return false;
833  }
834 
835  const auto Blocks =
836  getOrderedBlocks(Phi, LastBlock, Phi.getNumIncomingValues());
837  if (Blocks.empty()) return false;
838  BCECmpChain CmpChain(Blocks, Phi, AA);
839 
840  if (CmpChain.size() < 2) {
841  LLVM_DEBUG(dbgs() << "skip: only one compare block\n");
842  return false;
843  }
844 
845  return CmpChain.simplify(TLI, AA, DTU);
846 }
847 
848 class MergeICmps : public FunctionPass {
849  public:
850  static char ID;
851 
852  MergeICmps() : FunctionPass(ID) {
854  }
855 
856  bool runOnFunction(Function &F) override {
857  if (skipFunction(F)) return false;
858  const auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
859  const auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
860  // MergeICmps does not need the DominatorTree, but we update it if it's
861  // already available.
862  auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
863  DomTreeUpdater DTU(DTWP ? &DTWP->getDomTree() : nullptr,
864  /*PostDominatorTree*/ nullptr,
866  AliasAnalysis *AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
867  auto PA = runImpl(F, &TLI, &TTI, AA, DTU);
868  return !PA.areAllPreserved();
869  }
870 
871  private:
872  void getAnalysisUsage(AnalysisUsage &AU) const override {
878  }
879 
881  const TargetTransformInfo *TTI, AliasAnalysis *AA,
882  DomTreeUpdater &DTU);
883 };
884 
886  const TargetTransformInfo *TTI,
887  AliasAnalysis *AA, DomTreeUpdater &DTU) {
888  LLVM_DEBUG(dbgs() << "MergeICmpsPass: " << F.getName() << "\n");
889 
890  // We only try merging comparisons if the target wants to expand memcmp later.
891  // The rationale is to avoid turning small chains into memcmp calls.
892  if (!TTI->enableMemCmpExpansion(true)) return PreservedAnalyses::all();
893 
894  // If we don't have memcmp avaiable we can't emit calls to it.
895  if (!TLI->has(LibFunc_memcmp))
896  return PreservedAnalyses::all();
897 
898  bool MadeChange = false;
899 
900  for (auto BBIt = ++F.begin(); BBIt != F.end(); ++BBIt) {
901  // A Phi operation is always first in a basic block.
902  if (auto *const Phi = dyn_cast<PHINode>(&*BBIt->begin()))
903  MadeChange |= processPhi(*Phi, TLI, AA, DTU);
904  }
905 
906  if (!MadeChange)
907  return PreservedAnalyses::all();
909  PA.preserve<GlobalsAA>();
911  return PA;
912 }
913 
914 } // namespace
915 
916 char MergeICmps::ID = 0;
917 INITIALIZE_PASS_BEGIN(MergeICmps, "mergeicmps",
918  "Merge contiguous icmps into a memcmp", false, false)
923  "Merge contiguous icmps into a memcmp", false, false)
924 
925 Pass *llvm::createMergeICmpsPass() { return new MergeICmps(); }
Legacy wrapper pass to provide the GlobalsAAResult object.
Pass interface - Implemented by all &#39;passes&#39;.
Definition: Pass.h:80
void DeleteDeadBlocks(ArrayRef< BasicBlock *> BBs, DomTreeUpdater *DTU=nullptr, bool KeepOneInputPHIs=false)
Delete the specified blocks from BB.
uint64_t CallInst * C
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:594
static bool runImpl(Function &F, TargetLibraryInfo &TLI, DominatorTree &DT)
This is the entry point for all transforms.
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
raw_ostream & errs()
This returns a reference to a raw_ostream for standard error.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
LLVMContext & Context
This class represents lattice values for constants.
Definition: AllocatorList.h:23
This is the interface for a simple mod/ref and alias analysis over globals.
iterator begin() const
Definition: ArrayRef.h:136
iterator end()
Definition: Function.h:663
amdgpu Simplify well known AMD library false FunctionCallee Value const Twine & Name
Implements a dense probed hash-table based set.
Definition: DenseSet.h:249
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition: APInt.h:1203
DominatorTree & getDomTree()
Flush DomTree updates and return DomTree.
static void dump(StringRef Title, SpillInfo const &Spills)
Definition: CoroFrame.cpp:298
static std::pair< StringRef, StringRef > split(StringRef Str, char Separator)
Checked version of split, to ensure mandatory subparts.
Definition: DataLayout.cpp:203
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:230
F(f)
An instruction for reading from memory.
Definition: Instructions.h:167
Hexagon Common GEP
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.cpp:137
void reserve(size_type N)
Definition: SmallVector.h:369
LLVMContext & getContext() const
Get the context in which this basic block lives.
Definition: BasicBlock.cpp:32
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:268
mergeicmps
Definition: MergeICmps.cpp:922
AnalysisUsage & addRequired()
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:50
void append(SmallVectorImpl< char > &path, const Twine &a, const Twine &b="", const Twine &c="", const Twine &d="")
Append to path.
Definition: Path.cpp:455
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:369
int getBasicBlockIndex(const BasicBlock *BB) const
Return the first index of the specified basic block in the value list for this PHI.
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:450
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:742
Pass * createMergeICmpsPass()
Definition: MergeICmps.cpp:925
LLVM_NODISCARD bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:126
auto reverse(ContainerTy &&C, typename std::enable_if< has_rbegin< ContainerTy >::value >::type *=nullptr) -> decltype(make_range(C.rbegin(), C.rend()))
Definition: STLExtras.h:266
bool empty() const
Definition: BasicBlock.h:279
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:244
bool has(LibFunc F) const
Tests whether a library function is available.
const BasicBlock * getSingleSuccessor() const
Return the successor of this block if it has a single successor.
Definition: BasicBlock.cpp:268
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:32
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:25
An instruction for storing to memory.
Definition: Instructions.h:320
DomTreeNodeBase< NodeT > * setNewRoot(NodeT *BB)
Add a new node to the forward dominator tree and make it a new root.
iterator begin()
Definition: Function.h:661
Value * getOperand(unsigned i) const
Definition: User.h:169
void replaceUsesOfWith(Value *From, Value *To)
Replace uses of one Value with another.
Definition: User.cpp:20
void append(in_iter S, in_iter E)
Append from an iterator pair.
Definition: SmallString.h:74
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
Definition: Instructions.h:873
static bool runOnFunction(Function &F, bool PostInlining)
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
void dump(const SparseBitVector< ElementSize > &LHS, raw_ostream &out)
Wrapper pass for TargetTransformInfo.
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:153
const BasicBlock * getSinglePredecessor() const
Return the predecessor of this block if it has a single predecessor block.
Definition: BasicBlock.cpp:233
LLVM Basic Block Representation.
Definition: BasicBlock.h:57
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
Conditional or Unconditional Branch instruction.
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:148
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Value * getIncomingValueForBlock(const BasicBlock *BB) const
bool mayHaveSideEffects() const
Return true if the instruction may have side effects.
Definition: Instruction.h:572
Interval::pred_iterator pred_begin(Interval *I)
pred_begin/pred_end - define methods so that Intervals may be used just like BasicBlocks can with the...
Definition: Interval.h:112
Represent the analysis usage information of a pass.
This instruction compares its operands according to the predicate given to the constructor.
Analysis pass providing a never-invalidated alias analysis result.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:709
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:284
op_range operands()
Definition: User.h:237
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:99
bool pred_empty(const BasicBlock *BB)
Definition: CFG.h:116
auto find_if(R &&Range, UnaryPredicate P) -> decltype(adl_begin(Range))
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly...
Definition: STLExtras.h:1213
R600 Clause Merge
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:159
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
INITIALIZE_PASS_END(RegBankSelect, DEBUG_TYPE, "Assign register bank of generic virtual registers", false, false) RegBankSelect
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1115
Representation for a specific memory location.
bool hasAddressTaken() const
Returns true if there are any uses of this basic block other than direct branches, switches, etc.
Definition: BasicBlock.h:391
hexagon bit simplify
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
auto size(R &&Range, typename std::enable_if< std::is_same< typename std::iterator_traits< decltype(Range.begin())>::iterator_category, std::random_access_iterator_tag >::value, void >::type *=nullptr) -> decltype(std::distance(Range.begin(), Range.end()))
Get the size of a range.
Definition: STLExtras.h:1166
void applyUpdates(ArrayRef< DominatorTree::UpdateType > Updates)
Submit updates to all available trees.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:841
Provides information about what library functions are available for the current target.
iterator end() const
Definition: ArrayRef.h:137
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:631
unsigned getNumIncomingValues() const
Return the number of incoming edges.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:132
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:55
Class for arbitrary precision integers.
Definition: APInt.h:69
bool isDereferenceablePointer(const Value *V, const DataLayout &DL, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr)
Return true if this is always a dereferenceable pointer.
Definition: Loads.cpp:152
LLVM_NODISCARD bool isModSet(const ModRefInfo MRI)
Predicate getPredicate() const
Return the predicate for this instruction.
Definition: InstrTypes.h:784
Merge contiguous icmps into a memcmp
Definition: MergeICmps.cpp:922
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:175
StringRef getName() const
Return a constant reference to the value&#39;s name.
Definition: Value.cpp:214
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:106
#define I(x, y, z)
Definition: MD5.cpp:58
const MemCmpExpansionOptions * enableMemCmpExpansion(bool IsZeroCmp) const
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
uint32_t Size
Definition: Profile.cpp:46
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:174
size_type count(const_arg_type_t< ValueT > V) const
Return 1 if the specified key is in the set, 0 otherwise.
Definition: DenseSet.h:91
Value * emitMemCmp(Value *Ptr1, Value *Ptr2, Value *Len, IRBuilder<> &B, const DataLayout &DL, const TargetLibraryInfo *TLI)
Emit a call to the memcmp function.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool operator<(int64_t V1, const APSInt &V2)
Definition: APSInt.h:343
LLVM Value Representation.
Definition: Value.h:72
static const Function * getParent(const Value *V)
void moveBefore(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
Definition: Instruction.cpp:86
bool hasOneUse() const
Return true if there is exactly one user of this value.
Definition: Value.h:412
INITIALIZE_PASS_BEGIN(MergeICmps, "mergeicmps", "Merge contiguous icmps into a memcmp", false, false) INITIALIZE_PASS_END(MergeICmps
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:48
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:259
bool hasDomTree() const
Returns true if it holds a DominatorTree.
This pass exposes codegen information to IR-level passes.
void initializeMergeICmpsPass(PassRegistry &)
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object...
ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc)
getModRefInfo (for call sites) - Return information about whether a particular call site modifies or ...
#define LLVM_DEBUG(X)
Definition: Debug.h:122
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:143
const BasicBlock * getParent() const
Definition: Instruction.h:66
bool is_contained(R &&Range, const E &Element)
Wrapper function around std::find to detect if an element exists in a container.
Definition: STLExtras.h:1244