LLVM  17.0.0git
ExpandMemCmp.cpp
Go to the documentation of this file.
1 //===--- ExpandMemCmp.cpp - Expand memcmp() to load/stores ----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass tries to expand memcmp() calls into optimally-sized loads and
10 // compares for the target.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/ADT/Statistic.h"
24 #include "llvm/IR/Dominators.h"
25 #include "llvm/IR/IRBuilder.h"
26 #include "llvm/InitializePasses.h"
31 #include <optional>
32 
33 using namespace llvm;
34 
35 namespace llvm {
36 class TargetLowering;
37 }
38 
39 #define DEBUG_TYPE "expandmemcmp"
40 
41 STATISTIC(NumMemCmpCalls, "Number of memcmp calls");
42 STATISTIC(NumMemCmpNotConstant, "Number of memcmp calls without constant size");
43 STATISTIC(NumMemCmpGreaterThanMax,
44  "Number of memcmp calls with size greater than max size");
45 STATISTIC(NumMemCmpInlined, "Number of inlined memcmp calls");
46 
48  "memcmp-num-loads-per-block", cl::Hidden, cl::init(1),
49  cl::desc("The number of loads per basic block for inline expansion of "
50  "memcmp that is only being compared against zero."));
51 
53  "max-loads-per-memcmp", cl::Hidden,
54  cl::desc("Set maximum number of loads used in expanded memcmp"));
55 
57  "max-loads-per-memcmp-opt-size", cl::Hidden,
58  cl::desc("Set maximum number of loads used in expanded memcmp for -Os/Oz"));
59 
60 namespace {
61 
62 
63 // This class provides helper functions to expand a memcmp library call into an
64 // inline expansion.
65 class MemCmpExpansion {
66  struct ResultBlock {
67  BasicBlock *BB = nullptr;
68  PHINode *PhiSrc1 = nullptr;
69  PHINode *PhiSrc2 = nullptr;
70 
71  ResultBlock() = default;
72  };
73 
74  CallInst *const CI;
75  ResultBlock ResBlock;
76  const uint64_t Size;
77  unsigned MaxLoadSize = 0;
78  uint64_t NumLoadsNonOneByte = 0;
79  const uint64_t NumLoadsPerBlockForZeroCmp;
80  std::vector<BasicBlock *> LoadCmpBlocks;
81  BasicBlock *EndBlock;
82  PHINode *PhiRes;
83  const bool IsUsedForZeroCmp;
84  const DataLayout &DL;
85  DomTreeUpdater *DTU;
87  // Represents the decomposition in blocks of the expansion. For example,
88  // comparing 33 bytes on X86+sse can be done with 2x16-byte loads and
89  // 1x1-byte load, which would be represented as [{16, 0}, {16, 16}, {1, 32}.
90  struct LoadEntry {
91  LoadEntry(unsigned LoadSize, uint64_t Offset)
92  : LoadSize(LoadSize), Offset(Offset) {
93  }
94 
95  // The size of the load for this block, in bytes.
96  unsigned LoadSize;
97  // The offset of this load from the base pointer, in bytes.
99  };
100  using LoadEntryVector = SmallVector<LoadEntry, 8>;
101  LoadEntryVector LoadSequence;
102 
103  void createLoadCmpBlocks();
104  void createResultBlock();
105  void setupResultBlockPHINodes();
106  void setupEndBlockPHINodes();
107  Value *getCompareLoadPairs(unsigned BlockIndex, unsigned &LoadIndex);
108  void emitLoadCompareBlock(unsigned BlockIndex);
109  void emitLoadCompareBlockMultipleLoads(unsigned BlockIndex,
110  unsigned &LoadIndex);
111  void emitLoadCompareByteBlock(unsigned BlockIndex, unsigned OffsetBytes);
112  void emitMemCmpResultBlock();
113  Value *getMemCmpExpansionZeroCase();
114  Value *getMemCmpEqZeroOneBlock();
115  Value *getMemCmpOneBlock();
116  struct LoadPair {
117  Value *Lhs = nullptr;
118  Value *Rhs = nullptr;
119  };
120  LoadPair getLoadPair(Type *LoadSizeType, bool NeedsBSwap, Type *CmpSizeType,
121  unsigned OffsetBytes);
122 
123  static LoadEntryVector
124  computeGreedyLoadSequence(uint64_t Size, llvm::ArrayRef<unsigned> LoadSizes,
125  unsigned MaxNumLoads, unsigned &NumLoadsNonOneByte);
126  static LoadEntryVector
127  computeOverlappingLoadSequence(uint64_t Size, unsigned MaxLoadSize,
128  unsigned MaxNumLoads,
129  unsigned &NumLoadsNonOneByte);
130 
131 public:
132  MemCmpExpansion(CallInst *CI, uint64_t Size,
134  const bool IsUsedForZeroCmp, const DataLayout &TheDataLayout,
135  DomTreeUpdater *DTU);
136 
137  unsigned getNumBlocks();
138  uint64_t getNumLoads() const { return LoadSequence.size(); }
139 
140  Value *getMemCmpExpansion();
141 };
142 
143 MemCmpExpansion::LoadEntryVector MemCmpExpansion::computeGreedyLoadSequence(
144  uint64_t Size, llvm::ArrayRef<unsigned> LoadSizes,
145  const unsigned MaxNumLoads, unsigned &NumLoadsNonOneByte) {
146  NumLoadsNonOneByte = 0;
147  LoadEntryVector LoadSequence;
148  uint64_t Offset = 0;
149  while (Size && !LoadSizes.empty()) {
150  const unsigned LoadSize = LoadSizes.front();
151  const uint64_t NumLoadsForThisSize = Size / LoadSize;
152  if (LoadSequence.size() + NumLoadsForThisSize > MaxNumLoads) {
153  // Do not expand if the total number of loads is larger than what the
154  // target allows. Note that it's important that we exit before completing
155  // the expansion to avoid using a ton of memory to store the expansion for
156  // large sizes.
157  return {};
158  }
159  if (NumLoadsForThisSize > 0) {
160  for (uint64_t I = 0; I < NumLoadsForThisSize; ++I) {
161  LoadSequence.push_back({LoadSize, Offset});
162  Offset += LoadSize;
163  }
164  if (LoadSize > 1)
165  ++NumLoadsNonOneByte;
166  Size = Size % LoadSize;
167  }
168  LoadSizes = LoadSizes.drop_front();
169  }
170  return LoadSequence;
171 }
172 
174 MemCmpExpansion::computeOverlappingLoadSequence(uint64_t Size,
175  const unsigned MaxLoadSize,
176  const unsigned MaxNumLoads,
177  unsigned &NumLoadsNonOneByte) {
178  // These are already handled by the greedy approach.
179  if (Size < 2 || MaxLoadSize < 2)
180  return {};
181 
182  // We try to do as many non-overlapping loads as possible starting from the
183  // beginning.
184  const uint64_t NumNonOverlappingLoads = Size / MaxLoadSize;
185  assert(NumNonOverlappingLoads && "there must be at least one load");
186  // There remain 0 to (MaxLoadSize - 1) bytes to load, this will be done with
187  // an overlapping load.
188  Size = Size - NumNonOverlappingLoads * MaxLoadSize;
189  // Bail if we do not need an overloapping store, this is already handled by
190  // the greedy approach.
191  if (Size == 0)
192  return {};
193  // Bail if the number of loads (non-overlapping + potential overlapping one)
194  // is larger than the max allowed.
195  if ((NumNonOverlappingLoads + 1) > MaxNumLoads)
196  return {};
197 
198  // Add non-overlapping loads.
199  LoadEntryVector LoadSequence;
200  uint64_t Offset = 0;
201  for (uint64_t I = 0; I < NumNonOverlappingLoads; ++I) {
202  LoadSequence.push_back({MaxLoadSize, Offset});
203  Offset += MaxLoadSize;
204  }
205 
206  // Add the last overlapping load.
207  assert(Size > 0 && Size < MaxLoadSize && "broken invariant");
208  LoadSequence.push_back({MaxLoadSize, Offset - (MaxLoadSize - Size)});
209  NumLoadsNonOneByte = 1;
210  return LoadSequence;
211 }
212 
213 // Initialize the basic block structure required for expansion of memcmp call
214 // with given maximum load size and memcmp size parameter.
215 // This structure includes:
216 // 1. A list of load compare blocks - LoadCmpBlocks.
217 // 2. An EndBlock, split from original instruction point, which is the block to
218 // return from.
219 // 3. ResultBlock, block to branch to for early exit when a
220 // LoadCmpBlock finds a difference.
221 MemCmpExpansion::MemCmpExpansion(
222  CallInst *const CI, uint64_t Size,
224  const bool IsUsedForZeroCmp, const DataLayout &TheDataLayout,
225  DomTreeUpdater *DTU)
226  : CI(CI), Size(Size), NumLoadsPerBlockForZeroCmp(Options.NumLoadsPerBlock),
227  IsUsedForZeroCmp(IsUsedForZeroCmp), DL(TheDataLayout), DTU(DTU),
228  Builder(CI) {
229  assert(Size > 0 && "zero blocks");
230  // Scale the max size down if the target can load more bytes than we need.
231  llvm::ArrayRef<unsigned> LoadSizes(Options.LoadSizes);
232  while (!LoadSizes.empty() && LoadSizes.front() > Size) {
233  LoadSizes = LoadSizes.drop_front();
234  }
235  assert(!LoadSizes.empty() && "cannot load Size bytes");
236  MaxLoadSize = LoadSizes.front();
237  // Compute the decomposition.
238  unsigned GreedyNumLoadsNonOneByte = 0;
239  LoadSequence = computeGreedyLoadSequence(Size, LoadSizes, Options.MaxNumLoads,
240  GreedyNumLoadsNonOneByte);
241  NumLoadsNonOneByte = GreedyNumLoadsNonOneByte;
242  assert(LoadSequence.size() <= Options.MaxNumLoads && "broken invariant");
243  // If we allow overlapping loads and the load sequence is not already optimal,
244  // use overlapping loads.
245  if (Options.AllowOverlappingLoads &&
246  (LoadSequence.empty() || LoadSequence.size() > 2)) {
247  unsigned OverlappingNumLoadsNonOneByte = 0;
248  auto OverlappingLoads = computeOverlappingLoadSequence(
249  Size, MaxLoadSize, Options.MaxNumLoads, OverlappingNumLoadsNonOneByte);
250  if (!OverlappingLoads.empty() &&
251  (LoadSequence.empty() ||
252  OverlappingLoads.size() < LoadSequence.size())) {
253  LoadSequence = OverlappingLoads;
254  NumLoadsNonOneByte = OverlappingNumLoadsNonOneByte;
255  }
256  }
257  assert(LoadSequence.size() <= Options.MaxNumLoads && "broken invariant");
258 }
259 
260 unsigned MemCmpExpansion::getNumBlocks() {
261  if (IsUsedForZeroCmp)
262  return getNumLoads() / NumLoadsPerBlockForZeroCmp +
263  (getNumLoads() % NumLoadsPerBlockForZeroCmp != 0 ? 1 : 0);
264  return getNumLoads();
265 }
266 
267 void MemCmpExpansion::createLoadCmpBlocks() {
268  for (unsigned i = 0; i < getNumBlocks(); i++) {
269  BasicBlock *BB = BasicBlock::Create(CI->getContext(), "loadbb",
270  EndBlock->getParent(), EndBlock);
271  LoadCmpBlocks.push_back(BB);
272  }
273 }
274 
275 void MemCmpExpansion::createResultBlock() {
276  ResBlock.BB = BasicBlock::Create(CI->getContext(), "res_block",
277  EndBlock->getParent(), EndBlock);
278 }
279 
280 MemCmpExpansion::LoadPair MemCmpExpansion::getLoadPair(Type *LoadSizeType,
281  bool NeedsBSwap,
282  Type *CmpSizeType,
283  unsigned OffsetBytes) {
284  // Get the memory source at offset `OffsetBytes`.
285  Value *LhsSource = CI->getArgOperand(0);
286  Value *RhsSource = CI->getArgOperand(1);
287  Align LhsAlign = LhsSource->getPointerAlignment(DL);
288  Align RhsAlign = RhsSource->getPointerAlignment(DL);
289  if (OffsetBytes > 0) {
290  auto *ByteType = Type::getInt8Ty(CI->getContext());
291  LhsSource = Builder.CreateConstGEP1_64(
292  ByteType, Builder.CreateBitCast(LhsSource, ByteType->getPointerTo()),
293  OffsetBytes);
294  RhsSource = Builder.CreateConstGEP1_64(
295  ByteType, Builder.CreateBitCast(RhsSource, ByteType->getPointerTo()),
296  OffsetBytes);
297  LhsAlign = commonAlignment(LhsAlign, OffsetBytes);
298  RhsAlign = commonAlignment(RhsAlign, OffsetBytes);
299  }
300  LhsSource = Builder.CreateBitCast(LhsSource, LoadSizeType->getPointerTo());
301  RhsSource = Builder.CreateBitCast(RhsSource, LoadSizeType->getPointerTo());
302 
303  // Create a constant or a load from the source.
304  Value *Lhs = nullptr;
305  if (auto *C = dyn_cast<Constant>(LhsSource))
306  Lhs = ConstantFoldLoadFromConstPtr(C, LoadSizeType, DL);
307  if (!Lhs)
308  Lhs = Builder.CreateAlignedLoad(LoadSizeType, LhsSource, LhsAlign);
309 
310  Value *Rhs = nullptr;
311  if (auto *C = dyn_cast<Constant>(RhsSource))
312  Rhs = ConstantFoldLoadFromConstPtr(C, LoadSizeType, DL);
313  if (!Rhs)
314  Rhs = Builder.CreateAlignedLoad(LoadSizeType, RhsSource, RhsAlign);
315 
316  // Swap bytes if required.
317  if (NeedsBSwap) {
319  Intrinsic::bswap, LoadSizeType);
320  Lhs = Builder.CreateCall(Bswap, Lhs);
321  Rhs = Builder.CreateCall(Bswap, Rhs);
322  }
323 
324  // Zero extend if required.
325  if (CmpSizeType != nullptr && CmpSizeType != LoadSizeType) {
326  Lhs = Builder.CreateZExt(Lhs, CmpSizeType);
327  Rhs = Builder.CreateZExt(Rhs, CmpSizeType);
328  }
329  return {Lhs, Rhs};
330 }
331 
332 // This function creates the IR instructions for loading and comparing 1 byte.
333 // It loads 1 byte from each source of the memcmp parameters with the given
334 // GEPIndex. It then subtracts the two loaded values and adds this result to the
335 // final phi node for selecting the memcmp result.
336 void MemCmpExpansion::emitLoadCompareByteBlock(unsigned BlockIndex,
337  unsigned OffsetBytes) {
338  BasicBlock *BB = LoadCmpBlocks[BlockIndex];
339  Builder.SetInsertPoint(BB);
340  const LoadPair Loads =
341  getLoadPair(Type::getInt8Ty(CI->getContext()), /*NeedsBSwap=*/false,
342  Type::getInt32Ty(CI->getContext()), OffsetBytes);
343  Value *Diff = Builder.CreateSub(Loads.Lhs, Loads.Rhs);
344 
345  PhiRes->addIncoming(Diff, BB);
346 
347  if (BlockIndex < (LoadCmpBlocks.size() - 1)) {
348  // Early exit branch if difference found to EndBlock. Otherwise, continue to
349  // next LoadCmpBlock,
350  Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_NE, Diff,
351  ConstantInt::get(Diff->getType(), 0));
352  BranchInst *CmpBr =
353  BranchInst::Create(EndBlock, LoadCmpBlocks[BlockIndex + 1], Cmp);
354  Builder.Insert(CmpBr);
355  if (DTU)
356  DTU->applyUpdates(
357  {{DominatorTree::Insert, BB, EndBlock},
358  {DominatorTree::Insert, BB, LoadCmpBlocks[BlockIndex + 1]}});
359  } else {
360  // The last block has an unconditional branch to EndBlock.
361  BranchInst *CmpBr = BranchInst::Create(EndBlock);
362  Builder.Insert(CmpBr);
363  if (DTU)
364  DTU->applyUpdates({{DominatorTree::Insert, BB, EndBlock}});
365  }
366 }
367 
368 /// Generate an equality comparison for one or more pairs of loaded values.
369 /// This is used in the case where the memcmp() call is compared equal or not
370 /// equal to zero.
371 Value *MemCmpExpansion::getCompareLoadPairs(unsigned BlockIndex,
372  unsigned &LoadIndex) {
373  assert(LoadIndex < getNumLoads() &&
374  "getCompareLoadPairs() called with no remaining loads");
375  std::vector<Value *> XorList, OrList;
376  Value *Diff = nullptr;
377 
378  const unsigned NumLoads =
379  std::min(getNumLoads() - LoadIndex, NumLoadsPerBlockForZeroCmp);
380 
381  // For a single-block expansion, start inserting before the memcmp call.
382  if (LoadCmpBlocks.empty())
383  Builder.SetInsertPoint(CI);
384  else
385  Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]);
386 
387  Value *Cmp = nullptr;
388  // If we have multiple loads per block, we need to generate a composite
389  // comparison using xor+or. The type for the combinations is the largest load
390  // type.
391  IntegerType *const MaxLoadType =
392  NumLoads == 1 ? nullptr
393  : IntegerType::get(CI->getContext(), MaxLoadSize * 8);
394  for (unsigned i = 0; i < NumLoads; ++i, ++LoadIndex) {
395  const LoadEntry &CurLoadEntry = LoadSequence[LoadIndex];
396  const LoadPair Loads = getLoadPair(
397  IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8),
398  /*NeedsBSwap=*/false, MaxLoadType, CurLoadEntry.Offset);
399 
400  if (NumLoads != 1) {
401  // If we have multiple loads per block, we need to generate a composite
402  // comparison using xor+or.
403  Diff = Builder.CreateXor(Loads.Lhs, Loads.Rhs);
404  Diff = Builder.CreateZExt(Diff, MaxLoadType);
405  XorList.push_back(Diff);
406  } else {
407  // If there's only one load per block, we just compare the loaded values.
408  Cmp = Builder.CreateICmpNE(Loads.Lhs, Loads.Rhs);
409  }
410  }
411 
412  auto pairWiseOr = [&](std::vector<Value *> &InList) -> std::vector<Value *> {
413  std::vector<Value *> OutList;
414  for (unsigned i = 0; i < InList.size() - 1; i = i + 2) {
415  Value *Or = Builder.CreateOr(InList[i], InList[i + 1]);
416  OutList.push_back(Or);
417  }
418  if (InList.size() % 2 != 0)
419  OutList.push_back(InList.back());
420  return OutList;
421  };
422 
423  if (!Cmp) {
424  // Pairwise OR the XOR results.
425  OrList = pairWiseOr(XorList);
426 
427  // Pairwise OR the OR results until one result left.
428  while (OrList.size() != 1) {
429  OrList = pairWiseOr(OrList);
430  }
431 
432  assert(Diff && "Failed to find comparison diff");
433  Cmp = Builder.CreateICmpNE(OrList[0], ConstantInt::get(Diff->getType(), 0));
434  }
435 
436  return Cmp;
437 }
438 
439 void MemCmpExpansion::emitLoadCompareBlockMultipleLoads(unsigned BlockIndex,
440  unsigned &LoadIndex) {
441  Value *Cmp = getCompareLoadPairs(BlockIndex, LoadIndex);
442 
443  BasicBlock *NextBB = (BlockIndex == (LoadCmpBlocks.size() - 1))
444  ? EndBlock
445  : LoadCmpBlocks[BlockIndex + 1];
446  // Early exit branch if difference found to ResultBlock. Otherwise,
447  // continue to next LoadCmpBlock or EndBlock.
448  BasicBlock *BB = Builder.GetInsertBlock();
449  BranchInst *CmpBr = BranchInst::Create(ResBlock.BB, NextBB, Cmp);
450  Builder.Insert(CmpBr);
451  if (DTU)
452  DTU->applyUpdates({{DominatorTree::Insert, BB, ResBlock.BB},
453  {DominatorTree::Insert, BB, NextBB}});
454 
455  // Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0
456  // since early exit to ResultBlock was not taken (no difference was found in
457  // any of the bytes).
458  if (BlockIndex == LoadCmpBlocks.size() - 1) {
459  Value *Zero = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 0);
460  PhiRes->addIncoming(Zero, LoadCmpBlocks[BlockIndex]);
461  }
462 }
463 
464 // This function creates the IR intructions for loading and comparing using the
465 // given LoadSize. It loads the number of bytes specified by LoadSize from each
466 // source of the memcmp parameters. It then does a subtract to see if there was
467 // a difference in the loaded values. If a difference is found, it branches
468 // with an early exit to the ResultBlock for calculating which source was
469 // larger. Otherwise, it falls through to the either the next LoadCmpBlock or
470 // the EndBlock if this is the last LoadCmpBlock. Loading 1 byte is handled with
471 // a special case through emitLoadCompareByteBlock. The special handling can
472 // simply subtract the loaded values and add it to the result phi node.
473 void MemCmpExpansion::emitLoadCompareBlock(unsigned BlockIndex) {
474  // There is one load per block in this case, BlockIndex == LoadIndex.
475  const LoadEntry &CurLoadEntry = LoadSequence[BlockIndex];
476 
477  if (CurLoadEntry.LoadSize == 1) {
478  MemCmpExpansion::emitLoadCompareByteBlock(BlockIndex, CurLoadEntry.Offset);
479  return;
480  }
481 
482  Type *LoadSizeType =
483  IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8);
484  Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8);
485  assert(CurLoadEntry.LoadSize <= MaxLoadSize && "Unexpected load type");
486 
487  Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]);
488 
489  const LoadPair Loads =
490  getLoadPair(LoadSizeType, /*NeedsBSwap=*/DL.isLittleEndian(), MaxLoadType,
491  CurLoadEntry.Offset);
492 
493  // Add the loaded values to the phi nodes for calculating memcmp result only
494  // if result is not used in a zero equality.
495  if (!IsUsedForZeroCmp) {
496  ResBlock.PhiSrc1->addIncoming(Loads.Lhs, LoadCmpBlocks[BlockIndex]);
497  ResBlock.PhiSrc2->addIncoming(Loads.Rhs, LoadCmpBlocks[BlockIndex]);
498  }
499 
500  Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Loads.Lhs, Loads.Rhs);
501  BasicBlock *NextBB = (BlockIndex == (LoadCmpBlocks.size() - 1))
502  ? EndBlock
503  : LoadCmpBlocks[BlockIndex + 1];
504  // Early exit branch if difference found to ResultBlock. Otherwise, continue
505  // to next LoadCmpBlock or EndBlock.
506  BasicBlock *BB = Builder.GetInsertBlock();
507  BranchInst *CmpBr = BranchInst::Create(NextBB, ResBlock.BB, Cmp);
508  Builder.Insert(CmpBr);
509  if (DTU)
510  DTU->applyUpdates({{DominatorTree::Insert, BB, NextBB},
511  {DominatorTree::Insert, BB, ResBlock.BB}});
512 
513  // Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0
514  // since early exit to ResultBlock was not taken (no difference was found in
515  // any of the bytes).
516  if (BlockIndex == LoadCmpBlocks.size() - 1) {
517  Value *Zero = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 0);
518  PhiRes->addIncoming(Zero, LoadCmpBlocks[BlockIndex]);
519  }
520 }
521 
522 // This function populates the ResultBlock with a sequence to calculate the
523 // memcmp result. It compares the two loaded source values and returns -1 if
524 // src1 < src2 and 1 if src1 > src2.
525 void MemCmpExpansion::emitMemCmpResultBlock() {
526  // Special case: if memcmp result is used in a zero equality, result does not
527  // need to be calculated and can simply return 1.
528  if (IsUsedForZeroCmp) {
529  BasicBlock::iterator InsertPt = ResBlock.BB->getFirstInsertionPt();
530  Builder.SetInsertPoint(ResBlock.BB, InsertPt);
531  Value *Res = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 1);
532  PhiRes->addIncoming(Res, ResBlock.BB);
533  BranchInst *NewBr = BranchInst::Create(EndBlock);
534  Builder.Insert(NewBr);
535  if (DTU)
536  DTU->applyUpdates({{DominatorTree::Insert, ResBlock.BB, EndBlock}});
537  return;
538  }
539  BasicBlock::iterator InsertPt = ResBlock.BB->getFirstInsertionPt();
540  Builder.SetInsertPoint(ResBlock.BB, InsertPt);
541 
542  Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_ULT, ResBlock.PhiSrc1,
543  ResBlock.PhiSrc2);
544 
545  Value *Res =
546  Builder.CreateSelect(Cmp, ConstantInt::get(Builder.getInt32Ty(), -1),
547  ConstantInt::get(Builder.getInt32Ty(), 1));
548 
549  PhiRes->addIncoming(Res, ResBlock.BB);
550  BranchInst *NewBr = BranchInst::Create(EndBlock);
551  Builder.Insert(NewBr);
552  if (DTU)
553  DTU->applyUpdates({{DominatorTree::Insert, ResBlock.BB, EndBlock}});
554 }
555 
556 void MemCmpExpansion::setupResultBlockPHINodes() {
557  Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8);
558  Builder.SetInsertPoint(ResBlock.BB);
559  // Note: this assumes one load per block.
560  ResBlock.PhiSrc1 =
561  Builder.CreatePHI(MaxLoadType, NumLoadsNonOneByte, "phi.src1");
562  ResBlock.PhiSrc2 =
563  Builder.CreatePHI(MaxLoadType, NumLoadsNonOneByte, "phi.src2");
564 }
565 
566 void MemCmpExpansion::setupEndBlockPHINodes() {
567  Builder.SetInsertPoint(&EndBlock->front());
568  PhiRes = Builder.CreatePHI(Type::getInt32Ty(CI->getContext()), 2, "phi.res");
569 }
570 
571 Value *MemCmpExpansion::getMemCmpExpansionZeroCase() {
572  unsigned LoadIndex = 0;
573  // This loop populates each of the LoadCmpBlocks with the IR sequence to
574  // handle multiple loads per block.
575  for (unsigned I = 0; I < getNumBlocks(); ++I) {
576  emitLoadCompareBlockMultipleLoads(I, LoadIndex);
577  }
578 
579  emitMemCmpResultBlock();
580  return PhiRes;
581 }
582 
583 /// A memcmp expansion that compares equality with 0 and only has one block of
584 /// load and compare can bypass the compare, branch, and phi IR that is required
585 /// in the general case.
586 Value *MemCmpExpansion::getMemCmpEqZeroOneBlock() {
587  unsigned LoadIndex = 0;
588  Value *Cmp = getCompareLoadPairs(0, LoadIndex);
589  assert(LoadIndex == getNumLoads() && "some entries were not consumed");
590  return Builder.CreateZExt(Cmp, Type::getInt32Ty(CI->getContext()));
591 }
592 
593 /// A memcmp expansion that only has one block of load and compare can bypass
594 /// the compare, branch, and phi IR that is required in the general case.
595 Value *MemCmpExpansion::getMemCmpOneBlock() {
596  Type *LoadSizeType = IntegerType::get(CI->getContext(), Size * 8);
597  bool NeedsBSwap = DL.isLittleEndian() && Size != 1;
598 
599  // The i8 and i16 cases don't need compares. We zext the loaded values and
600  // subtract them to get the suitable negative, zero, or positive i32 result.
601  if (Size < 4) {
602  const LoadPair Loads =
603  getLoadPair(LoadSizeType, NeedsBSwap, Builder.getInt32Ty(),
604  /*Offset*/ 0);
605  return Builder.CreateSub(Loads.Lhs, Loads.Rhs);
606  }
607 
608  const LoadPair Loads = getLoadPair(LoadSizeType, NeedsBSwap, LoadSizeType,
609  /*Offset*/ 0);
610  // The result of memcmp is negative, zero, or positive, so produce that by
611  // subtracting 2 extended compare bits: sub (ugt, ult).
612  // If a target prefers to use selects to get -1/0/1, they should be able
613  // to transform this later. The inverse transform (going from selects to math)
614  // may not be possible in the DAG because the selects got converted into
615  // branches before we got there.
616  Value *CmpUGT = Builder.CreateICmpUGT(Loads.Lhs, Loads.Rhs);
617  Value *CmpULT = Builder.CreateICmpULT(Loads.Lhs, Loads.Rhs);
618  Value *ZextUGT = Builder.CreateZExt(CmpUGT, Builder.getInt32Ty());
619  Value *ZextULT = Builder.CreateZExt(CmpULT, Builder.getInt32Ty());
620  return Builder.CreateSub(ZextUGT, ZextULT);
621 }
622 
623 // This function expands the memcmp call into an inline expansion and returns
624 // the memcmp result.
625 Value *MemCmpExpansion::getMemCmpExpansion() {
626  // Create the basic block framework for a multi-block expansion.
627  if (getNumBlocks() != 1) {
628  BasicBlock *StartBlock = CI->getParent();
629  EndBlock = SplitBlock(StartBlock, CI, DTU, /*LI=*/nullptr,
630  /*MSSAU=*/nullptr, "endblock");
631  setupEndBlockPHINodes();
632  createResultBlock();
633 
634  // If return value of memcmp is not used in a zero equality, we need to
635  // calculate which source was larger. The calculation requires the
636  // two loaded source values of each load compare block.
637  // These will be saved in the phi nodes created by setupResultBlockPHINodes.
638  if (!IsUsedForZeroCmp) setupResultBlockPHINodes();
639 
640  // Create the number of required load compare basic blocks.
641  createLoadCmpBlocks();
642 
643  // Update the terminator added by SplitBlock to branch to the first
644  // LoadCmpBlock.
645  StartBlock->getTerminator()->setSuccessor(0, LoadCmpBlocks[0]);
646  if (DTU)
647  DTU->applyUpdates({{DominatorTree::Insert, StartBlock, LoadCmpBlocks[0]},
648  {DominatorTree::Delete, StartBlock, EndBlock}});
649  }
650 
651  Builder.SetCurrentDebugLocation(CI->getDebugLoc());
652 
653  if (IsUsedForZeroCmp)
654  return getNumBlocks() == 1 ? getMemCmpEqZeroOneBlock()
655  : getMemCmpExpansionZeroCase();
656 
657  if (getNumBlocks() == 1)
658  return getMemCmpOneBlock();
659 
660  for (unsigned I = 0; I < getNumBlocks(); ++I) {
661  emitLoadCompareBlock(I);
662  }
663 
664  emitMemCmpResultBlock();
665  return PhiRes;
666 }
667 
668 // This function checks to see if an expansion of memcmp can be generated.
669 // It checks for constant compare size that is less than the max inline size.
670 // If an expansion cannot occur, returns false to leave as a library call.
671 // Otherwise, the library call is replaced with a new IR instruction sequence.
672 /// We want to transform:
673 /// %call = call signext i32 @memcmp(i8* %0, i8* %1, i64 15)
674 /// To:
675 /// loadbb:
676 /// %0 = bitcast i32* %buffer2 to i8*
677 /// %1 = bitcast i32* %buffer1 to i8*
678 /// %2 = bitcast i8* %1 to i64*
679 /// %3 = bitcast i8* %0 to i64*
680 /// %4 = load i64, i64* %2
681 /// %5 = load i64, i64* %3
682 /// %6 = call i64 @llvm.bswap.i64(i64 %4)
683 /// %7 = call i64 @llvm.bswap.i64(i64 %5)
684 /// %8 = sub i64 %6, %7
685 /// %9 = icmp ne i64 %8, 0
686 /// br i1 %9, label %res_block, label %loadbb1
687 /// res_block: ; preds = %loadbb2,
688 /// %loadbb1, %loadbb
689 /// %phi.src1 = phi i64 [ %6, %loadbb ], [ %22, %loadbb1 ], [ %36, %loadbb2 ]
690 /// %phi.src2 = phi i64 [ %7, %loadbb ], [ %23, %loadbb1 ], [ %37, %loadbb2 ]
691 /// %10 = icmp ult i64 %phi.src1, %phi.src2
692 /// %11 = select i1 %10, i32 -1, i32 1
693 /// br label %endblock
694 /// loadbb1: ; preds = %loadbb
695 /// %12 = bitcast i32* %buffer2 to i8*
696 /// %13 = bitcast i32* %buffer1 to i8*
697 /// %14 = bitcast i8* %13 to i32*
698 /// %15 = bitcast i8* %12 to i32*
699 /// %16 = getelementptr i32, i32* %14, i32 2
700 /// %17 = getelementptr i32, i32* %15, i32 2
701 /// %18 = load i32, i32* %16
702 /// %19 = load i32, i32* %17
703 /// %20 = call i32 @llvm.bswap.i32(i32 %18)
704 /// %21 = call i32 @llvm.bswap.i32(i32 %19)
705 /// %22 = zext i32 %20 to i64
706 /// %23 = zext i32 %21 to i64
707 /// %24 = sub i64 %22, %23
708 /// %25 = icmp ne i64 %24, 0
709 /// br i1 %25, label %res_block, label %loadbb2
710 /// loadbb2: ; preds = %loadbb1
711 /// %26 = bitcast i32* %buffer2 to i8*
712 /// %27 = bitcast i32* %buffer1 to i8*
713 /// %28 = bitcast i8* %27 to i16*
714 /// %29 = bitcast i8* %26 to i16*
715 /// %30 = getelementptr i16, i16* %28, i16 6
716 /// %31 = getelementptr i16, i16* %29, i16 6
717 /// %32 = load i16, i16* %30
718 /// %33 = load i16, i16* %31
719 /// %34 = call i16 @llvm.bswap.i16(i16 %32)
720 /// %35 = call i16 @llvm.bswap.i16(i16 %33)
721 /// %36 = zext i16 %34 to i64
722 /// %37 = zext i16 %35 to i64
723 /// %38 = sub i64 %36, %37
724 /// %39 = icmp ne i64 %38, 0
725 /// br i1 %39, label %res_block, label %loadbb3
726 /// loadbb3: ; preds = %loadbb2
727 /// %40 = bitcast i32* %buffer2 to i8*
728 /// %41 = bitcast i32* %buffer1 to i8*
729 /// %42 = getelementptr i8, i8* %41, i8 14
730 /// %43 = getelementptr i8, i8* %40, i8 14
731 /// %44 = load i8, i8* %42
732 /// %45 = load i8, i8* %43
733 /// %46 = zext i8 %44 to i32
734 /// %47 = zext i8 %45 to i32
735 /// %48 = sub i32 %46, %47
736 /// br label %endblock
737 /// endblock: ; preds = %res_block,
738 /// %loadbb3
739 /// %phi.res = phi i32 [ %48, %loadbb3 ], [ %11, %res_block ]
740 /// ret i32 %phi.res
741 static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
742  const TargetLowering *TLI, const DataLayout *DL,
744  DomTreeUpdater *DTU, const bool IsBCmp) {
745  NumMemCmpCalls++;
746 
747  // Early exit from expansion if -Oz.
748  if (CI->getFunction()->hasMinSize())
749  return false;
750 
751  // Early exit from expansion if size is not a constant.
752  ConstantInt *SizeCast = dyn_cast<ConstantInt>(CI->getArgOperand(2));
753  if (!SizeCast) {
754  NumMemCmpNotConstant++;
755  return false;
756  }
757  const uint64_t SizeVal = SizeCast->getZExtValue();
758 
759  if (SizeVal == 0) {
760  return false;
761  }
762  // TTI call to check if target would like to expand memcmp. Also, get the
763  // available load sizes.
764  const bool IsUsedForZeroCmp =
766  bool OptForSize = CI->getFunction()->hasOptSize() ||
768  auto Options = TTI->enableMemCmpExpansion(OptForSize,
769  IsUsedForZeroCmp);
770  if (!Options) return false;
771 
772  if (MemCmpEqZeroNumLoadsPerBlock.getNumOccurrences())
773  Options.NumLoadsPerBlock = MemCmpEqZeroNumLoadsPerBlock;
774 
775  if (OptForSize &&
776  MaxLoadsPerMemcmpOptSize.getNumOccurrences())
777  Options.MaxNumLoads = MaxLoadsPerMemcmpOptSize;
778 
779  if (!OptForSize && MaxLoadsPerMemcmp.getNumOccurrences())
780  Options.MaxNumLoads = MaxLoadsPerMemcmp;
781 
782  MemCmpExpansion Expansion(CI, SizeVal, Options, IsUsedForZeroCmp, *DL, DTU);
783 
784  // Don't expand if this will require more loads than desired by the target.
785  if (Expansion.getNumLoads() == 0) {
786  NumMemCmpGreaterThanMax++;
787  return false;
788  }
789 
790  NumMemCmpInlined++;
791 
792  Value *Res = Expansion.getMemCmpExpansion();
793 
794  // Replace call with result of expansion and erase call.
795  CI->replaceAllUsesWith(Res);
796  CI->eraseFromParent();
797 
798  return true;
799 }
800 
801 class ExpandMemCmpPass : public FunctionPass {
802 public:
803  static char ID;
804 
805  ExpandMemCmpPass() : FunctionPass(ID) {
806  initializeExpandMemCmpPassPass(*PassRegistry::getPassRegistry());
807  }
808 
809  bool runOnFunction(Function &F) override {
810  if (skipFunction(F)) return false;
811 
812  auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
813  if (!TPC) {
814  return false;
815  }
816  const TargetLowering* TL =
817  TPC->getTM<TargetMachine>().getSubtargetImpl(F)->getTargetLowering();
818 
819  const TargetLibraryInfo *TLI =
820  &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
821  const TargetTransformInfo *TTI =
822  &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
823  auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
824  auto *BFI = (PSI && PSI->hasProfileSummary()) ?
825  &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
826  nullptr;
827  DominatorTree *DT = nullptr;
828  if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
829  DT = &DTWP->getDomTree();
830  auto PA = runImpl(F, TLI, TTI, TL, PSI, BFI, DT);
831  return !PA.areAllPreserved();
832  }
833 
834 private:
835  void getAnalysisUsage(AnalysisUsage &AU) const override {
842  }
843 
845  const TargetTransformInfo *TTI,
846  const TargetLowering *TL, ProfileSummaryInfo *PSI,
848  // Returns true if a change was made.
849  bool runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI,
850  const TargetTransformInfo *TTI, const TargetLowering *TL,
851  const DataLayout &DL, ProfileSummaryInfo *PSI,
853 };
854 
855 bool ExpandMemCmpPass::runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI,
856  const TargetTransformInfo *TTI,
857  const TargetLowering *TL,
858  const DataLayout &DL, ProfileSummaryInfo *PSI,
860  DomTreeUpdater *DTU) {
861  for (Instruction& I : BB) {
862  CallInst *CI = dyn_cast<CallInst>(&I);
863  if (!CI) {
864  continue;
865  }
866  LibFunc Func;
867  if (TLI->getLibFunc(*CI, Func) &&
868  (Func == LibFunc_memcmp || Func == LibFunc_bcmp) &&
869  expandMemCmp(CI, TTI, TL, &DL, PSI, BFI, DTU, Func == LibFunc_bcmp)) {
870  return true;
871  }
872  }
873  return false;
874 }
875 
878  const TargetTransformInfo *TTI,
879  const TargetLowering *TL, ProfileSummaryInfo *PSI,
881  std::optional<DomTreeUpdater> DTU;
882  if (DT)
883  DTU.emplace(DT, DomTreeUpdater::UpdateStrategy::Lazy);
884 
885  const DataLayout& DL = F.getParent()->getDataLayout();
886  bool MadeChanges = false;
887  for (auto BBIt = F.begin(); BBIt != F.end();) {
888  if (runOnBlock(*BBIt, TLI, TTI, TL, DL, PSI, BFI, DTU ? &*DTU : nullptr)) {
889  MadeChanges = true;
890  // If changes were made, restart the function from the beginning, since
891  // the structure of the function was changed.
892  BBIt = F.begin();
893  } else {
894  ++BBIt;
895  }
896  }
897  if (MadeChanges)
898  for (BasicBlock &BB : F)
900  if (!MadeChanges)
901  return PreservedAnalyses::all();
904  return PA;
905 }
906 
907 } // namespace
908 
909 char ExpandMemCmpPass::ID = 0;
910 INITIALIZE_PASS_BEGIN(ExpandMemCmpPass, "expandmemcmp",
911  "Expand memcmp() to load/stores", false, false)
917 INITIALIZE_PASS_END(ExpandMemCmpPass, "expandmemcmp",
918  "Expand memcmp() to load/stores", false, false)
919 
921  return new ExpandMemCmpPass();
922 }
llvm::Check::Size
@ Size
Definition: FileCheck.h:77
i
i
Definition: README.txt:29
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
llvm::Instruction::getModule
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:70
llvm::Value::getPointerAlignment
Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition: Value.cpp:918
llvm::RecurKind::Or
@ Or
Bitwise or logical OR of integers.
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:114
llvm::AArch64PACKey::ID
ID
Definition: AArch64BaseInfo.h:824
Insert
Vector Rotate Left Mask Mask Insert
Definition: README_P9.txt:112
llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:87
llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:112
llvm::DomTreeUpdater::applyUpdates
void applyUpdates(ArrayRef< DominatorTree::UpdateType > Updates)
Submit updates to all available trees.
Definition: DomTreeUpdater.cpp:230
expandmemcmp
expandmemcmp
Definition: ExpandMemCmp.cpp:917
MaxLoadsPerMemcmpOptSize
static cl::opt< unsigned > MaxLoadsPerMemcmpOptSize("max-loads-per-memcmp-opt-size", cl::Hidden, cl::desc("Set maximum number of loads used in expanded memcmp for -Os/Oz"))
llvm::Function
Definition: Function.h:59
llvm::ProfileSummaryInfo::hasProfileSummary
bool hasProfileSummary() const
Returns true if profile summary is available.
Definition: ProfileSummaryInfo.h:69
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1199
Statistic.h
llvm::FunctionPass::skipFunction
bool skipFunction(const Function &F) const
Optional passes call this function to check whether the pass should be skipped.
Definition: Pass.cpp:174
SizeOpts.h
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:172
LazyBlockFrequencyInfo.h
llvm::IRBuilder<>
DomTreeUpdater.h
ValueTracking.h
Local.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
stores
hexagon widen stores
Definition: HexagonStoreWidening.cpp:118
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:138
llvm::FunctionPass::runOnFunction
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
to
Should compile to
Definition: README.txt:449
llvm::LazyBlockFrequencyInfoPass::getLazyBFIAnalysisUsage
static void getLazyBFIAnalysisUsage(AnalysisUsage &AU)
Helper for client passes to set up the analysis usage on behalf of this pass.
Definition: LazyBlockFrequencyInfo.cpp:62
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
ConstantFolding.h
llvm::ArrayRef::empty
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:158
llvm::commonAlignment
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::createExpandMemCmpPass
FunctionPass * createExpandMemCmpPass()
Definition: ExpandMemCmp.cpp:920
llvm::LazyBlockFrequencyInfoPass
This is an alternative analysis pass to BlockFrequencyInfoWrapperPass.
Definition: LazyBlockFrequencyInfo.h:98
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:78
memcmp
Expand memcmp() to load/stores"
llvm::BlockFrequencyInfo
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
Definition: BlockFrequencyInfo.h:37
llvm::shouldOptimizeForSize
bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
Definition: MachineSizeOpts.cpp:183
llvm::SimplifyInstructionsInBlock
bool SimplifyInstructionsInBlock(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
Scan the specified basic block and try to simplify any instructions in it and recursively delete dead...
Definition: Local.cpp:725
TargetMachine.h
MaxLoadsPerMemcmp
static cl::opt< unsigned > MaxLoadsPerMemcmp("max-loads-per-memcmp", cl::Hidden, cl::desc("Set maximum number of loads used in expanded memcmp"))
llvm::LibFunc
LibFunc
Definition: TargetLibraryInfo.h:36
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::TargetLowering
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Definition: TargetLowering.h:3510
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
TargetLibraryInfo.h
false
Definition: StackSlotColoring.cpp:141
llvm::IntegerType
Class to represent integer types.
Definition: DerivedTypes.h:40
llvm::TargetLibraryInfo::getLibFunc
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
Definition: TargetLibraryInfo.h:306
llvm::Instruction
Definition: Instruction.h:41
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:314
Options
const char LLVMTargetMachineRef LLVMPassBuilderOptionsRef Options
Definition: PassBuilderBindings.cpp:48
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
llvm::DomTreeUpdater
Definition: DomTreeUpdater.h:28
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(ExpandMemCmpPass, "expandmemcmp", "Expand memcmp() to load/stores", false, false) INITIALIZE_PASS_END(ExpandMemCmpPass
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
llvm::ProfileSummaryInfo
Analysis providing profile information.
Definition: ProfileSummaryInfo.h:40
llvm::isOnlyUsedInZeroEqualityComparison
bool isOnlyUsedInZeroEqualityComparison(const Instruction *CxtI)
Definition: ValueTracking.cpp:290
llvm::cl::opt
Definition: CommandLine.h:1410
llvm::Instruction::eraseFromParent
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:82
llvm::ArrayRef::drop_front
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
Definition: ArrayRef.h:202
llvm::TargetLibraryInfoWrapperPass
Definition: TargetLibraryInfo.h:565
uint64_t
ProfileSummaryInfo.h
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2663
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::PreservedAnalyses::preserve
void preserve()
Mark an analysis as preserved.
Definition: PassManager.h:173
llvm::initializeExpandMemCmpPassPass
void initializeExpandMemCmpPassPass(PassRegistry &)
llvm::LegalityPredicates::all
Predicate all(Predicate P0, Predicate P1)
True iff P0 and P1 are true.
Definition: LegalizerInfo.h:228
llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition: Instructions.h:2854
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
llvm::ProfileSummaryInfoWrapperPass
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
Definition: ProfileSummaryInfo.h:194
TargetPassConfig.h
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:78
load
LLVM currently emits rax rax movq rax rax ret It could narrow the loads and stores to emit rax rax movq rax rax ret The trouble is that there is a TokenFactor between the store and the load
Definition: README.txt:1531
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
llvm::TargetTransformInfo::MemCmpExpansionOptions
Returns options for expansion of memcmp. IsZeroCmp is.
Definition: TargetTransformInfo.h:782
llvm::TargetTransformInfo::enableMemCmpExpansion
MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
Definition: TargetTransformInfo.cpp:547
llvm::ArrayRef< unsigned >
llvm::min
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
llvm::Instruction::setSuccessor
void setSuccessor(unsigned Idx, BasicBlock *BB)
Update the specified successor to point at the provided block.
Definition: Instruction.cpp:847
llvm::logicalview::LVAttributeKind::Zero
@ Zero
llvm::Offset
@ Offset
Definition: DWP.cpp:406
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::Instruction::getFunction
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:74
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:532
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:994
TargetSubtargetInfo.h
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::AMDGPUISD::BFI
@ BFI
Definition: AMDGPUISelLowering.h:433
llvm::ifs::IFSSymbolType::Func
@ Func
llvm::ArrayRef::front
const T & front() const
front - Get the first element.
Definition: ArrayRef.h:166
llvm::Intrinsic::getDeclaration
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=std::nullopt)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1502
llvm::Function::hasOptSize
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:644
llvm::ConstantInt::getZExtValue
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:141
llvm::BasicBlock::front
const Instruction & front() const
Definition: BasicBlock.h:326
llvm::ConstantFoldLoadFromConstPtr
Constant * ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty, APInt Offset, const DataLayout &DL)
Return the value that a load from C with offset Offset would produce if it is constant and determinab...
Definition: ConstantFolding.cpp:729
get
Should compile to something r4 addze r3 instead we get
Definition: README.txt:24
llvm::X86::FirstMacroFusionInstKind::Cmp
@ Cmp
llvm::Type::getPointerTo
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
Definition: Type.cpp:776
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:234
runImpl
static bool runImpl(Function &F, const TargetLowering &TLI)
Definition: ExpandLargeDivRem.cpp:56
llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:279
llvm::Instruction::getDebugLoc
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:358
Dominators.h
llvm::CallBase::getArgOperand
Value * getArgOperand(unsigned i) const
Definition: InstrTypes.h:1351
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:90
TargetTransformInfo.h
llvm::PHINode
Definition: Instructions.h:2708
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:127
llvm::Function::hasMinSize
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:641
llvm::Pass::getAnalysisUsage
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:98
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
MemCmpEqZeroNumLoadsPerBlock
static cl::opt< unsigned > MemCmpEqZeroNumLoadsPerBlock("memcmp-num-loads-per-block", cl::Hidden, cl::init(1), cl::desc("The number of loads per basic block for inline expansion of " "memcmp that is only being compared against zero."))
llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1485
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::cl::desc
Definition: CommandLine.h:411
llvm::BranchInst
Conditional or Unconditional Branch instruction.
Definition: Instructions.h:3139
BasicBlockUtils.h
llvm::SplitBlock
BasicBlock * SplitBlock(BasicBlock *Old, Instruction *SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
Definition: BasicBlockUtils.cpp:935
InitializePasses.h
llvm::Value
LLVM Value Representation.
Definition: Value.h:74