LLVM 22.0.0git
ExpandMemCmp.cpp
Go to the documentation of this file.
1//===--- ExpandMemCmp.cpp - Expand memcmp() to load/stores ----------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass tries to expand memcmp() calls into optimally-sized loads and
10// compares for the target.
11//
12//===----------------------------------------------------------------------===//
13
15#include "llvm/ADT/Statistic.h"
25#include "llvm/IR/Dominators.h"
26#include "llvm/IR/IRBuilder.h"
34#include <optional>
35
36using namespace llvm;
37using namespace llvm::PatternMatch;
38
39namespace llvm {
40class TargetLowering;
41}
42
43#define DEBUG_TYPE "expand-memcmp"
44
45STATISTIC(NumMemCmpCalls, "Number of memcmp calls");
46STATISTIC(NumMemCmpNotConstant, "Number of memcmp calls without constant size");
47STATISTIC(NumMemCmpGreaterThanMax,
48 "Number of memcmp calls with size greater than max size");
49STATISTIC(NumMemCmpInlined, "Number of inlined memcmp calls");
50
52 "memcmp-num-loads-per-block", cl::Hidden, cl::init(1),
53 cl::desc("The number of loads per basic block for inline expansion of "
54 "memcmp that is only being compared against zero."));
55
57 "max-loads-per-memcmp", cl::Hidden,
58 cl::desc("Set maximum number of loads used in expanded memcmp"));
59
61 "max-loads-per-memcmp-opt-size", cl::Hidden,
62 cl::desc("Set maximum number of loads used in expanded memcmp for -Os/Oz"));
63
64namespace {
65
66
67// This class provides helper functions to expand a memcmp library call into an
68// inline expansion.
69class MemCmpExpansion {
70 struct ResultBlock {
71 BasicBlock *BB = nullptr;
72 PHINode *PhiSrc1 = nullptr;
73 PHINode *PhiSrc2 = nullptr;
74
75 ResultBlock() = default;
76 };
77
78 CallInst *const CI = nullptr;
79 ResultBlock ResBlock;
80 const uint64_t Size;
81 unsigned MaxLoadSize = 0;
82 uint64_t NumLoadsNonOneByte = 0;
83 const uint64_t NumLoadsPerBlockForZeroCmp;
84 std::vector<BasicBlock *> LoadCmpBlocks;
85 BasicBlock *EndBlock = nullptr;
86 PHINode *PhiRes = nullptr;
87 const bool IsUsedForZeroCmp;
88 const DataLayout &DL;
89 DomTreeUpdater *DTU = nullptr;
90 IRBuilder<> Builder;
91 // Represents the decomposition in blocks of the expansion. For example,
92 // comparing 33 bytes on X86+sse can be done with 2x16-byte loads and
93 // 1x1-byte load, which would be represented as [{16, 0}, {16, 16}, {1, 32}.
94 struct LoadEntry {
95 LoadEntry(unsigned LoadSize, uint64_t Offset)
96 : LoadSize(LoadSize), Offset(Offset) {
97 }
98
99 // The size of the load for this block, in bytes.
100 unsigned LoadSize;
101 // The offset of this load from the base pointer, in bytes.
102 uint64_t Offset;
103 };
104 using LoadEntryVector = SmallVector<LoadEntry, 8>;
105 LoadEntryVector LoadSequence;
106
107 void createLoadCmpBlocks();
108 void createResultBlock();
109 void setupResultBlockPHINodes();
110 void setupEndBlockPHINodes();
111 Value *getCompareLoadPairs(unsigned BlockIndex, unsigned &LoadIndex);
112 void emitLoadCompareBlock(unsigned BlockIndex);
113 void emitLoadCompareBlockMultipleLoads(unsigned BlockIndex,
114 unsigned &LoadIndex);
115 void emitLoadCompareByteBlock(unsigned BlockIndex, unsigned OffsetBytes);
116 void emitMemCmpResultBlock();
117 Value *getMemCmpExpansionZeroCase();
118 Value *getMemCmpEqZeroOneBlock();
119 Value *getMemCmpOneBlock();
120 struct LoadPair {
121 Value *Lhs = nullptr;
122 Value *Rhs = nullptr;
123 };
124 LoadPair getLoadPair(Type *LoadSizeType, Type *BSwapSizeType,
125 Type *CmpSizeType, unsigned OffsetBytes);
126
127 static LoadEntryVector
128 computeGreedyLoadSequence(uint64_t Size, llvm::ArrayRef<unsigned> LoadSizes,
129 unsigned MaxNumLoads, unsigned &NumLoadsNonOneByte);
130 static LoadEntryVector
131 computeOverlappingLoadSequence(uint64_t Size, unsigned MaxLoadSize,
132 unsigned MaxNumLoads,
133 unsigned &NumLoadsNonOneByte);
134
135 static void optimiseLoadSequence(
136 LoadEntryVector &LoadSequence,
137 const TargetTransformInfo::MemCmpExpansionOptions &Options,
138 bool IsUsedForZeroCmp);
139
140public:
141 MemCmpExpansion(CallInst *CI, uint64_t Size,
142 const TargetTransformInfo::MemCmpExpansionOptions &Options,
143 const bool IsUsedForZeroCmp, const DataLayout &TheDataLayout,
144 DomTreeUpdater *DTU);
145
146 unsigned getNumBlocks();
147 uint64_t getNumLoads() const { return LoadSequence.size(); }
148
149 Value *getMemCmpExpansion();
150};
151
152MemCmpExpansion::LoadEntryVector MemCmpExpansion::computeGreedyLoadSequence(
153 uint64_t Size, llvm::ArrayRef<unsigned> LoadSizes,
154 const unsigned MaxNumLoads, unsigned &NumLoadsNonOneByte) {
155 NumLoadsNonOneByte = 0;
156 LoadEntryVector LoadSequence;
157 uint64_t Offset = 0;
158 while (Size && !LoadSizes.empty()) {
159 const unsigned LoadSize = LoadSizes.front();
160 const uint64_t NumLoadsForThisSize = Size / LoadSize;
161 if (LoadSequence.size() + NumLoadsForThisSize > MaxNumLoads) {
162 // Do not expand if the total number of loads is larger than what the
163 // target allows. Note that it's important that we exit before completing
164 // the expansion to avoid using a ton of memory to store the expansion for
165 // large sizes.
166 return {};
167 }
168 if (NumLoadsForThisSize > 0) {
169 for (uint64_t I = 0; I < NumLoadsForThisSize; ++I) {
170 LoadSequence.push_back({LoadSize, Offset});
171 Offset += LoadSize;
172 }
173 if (LoadSize > 1)
174 ++NumLoadsNonOneByte;
175 Size = Size % LoadSize;
176 }
177 LoadSizes = LoadSizes.drop_front();
178 }
179 return LoadSequence;
180}
181
182MemCmpExpansion::LoadEntryVector
183MemCmpExpansion::computeOverlappingLoadSequence(uint64_t Size,
184 const unsigned MaxLoadSize,
185 const unsigned MaxNumLoads,
186 unsigned &NumLoadsNonOneByte) {
187 // These are already handled by the greedy approach.
188 if (Size < 2 || MaxLoadSize < 2)
189 return {};
190
191 // We try to do as many non-overlapping loads as possible starting from the
192 // beginning.
193 const uint64_t NumNonOverlappingLoads = Size / MaxLoadSize;
194 assert(NumNonOverlappingLoads && "there must be at least one load");
195 // There remain 0 to (MaxLoadSize - 1) bytes to load, this will be done with
196 // an overlapping load.
197 Size = Size - NumNonOverlappingLoads * MaxLoadSize;
198 // Bail if we do not need an overloapping store, this is already handled by
199 // the greedy approach.
200 if (Size == 0)
201 return {};
202 // Bail if the number of loads (non-overlapping + potential overlapping one)
203 // is larger than the max allowed.
204 if ((NumNonOverlappingLoads + 1) > MaxNumLoads)
205 return {};
206
207 // Add non-overlapping loads.
208 LoadEntryVector LoadSequence;
209 uint64_t Offset = 0;
210 for (uint64_t I = 0; I < NumNonOverlappingLoads; ++I) {
211 LoadSequence.push_back({MaxLoadSize, Offset});
212 Offset += MaxLoadSize;
213 }
214
215 // Add the last overlapping load.
216 assert(Size > 0 && Size < MaxLoadSize && "broken invariant");
217 LoadSequence.push_back({MaxLoadSize, Offset - (MaxLoadSize - Size)});
218 NumLoadsNonOneByte = 1;
219 return LoadSequence;
220}
221
222void MemCmpExpansion::optimiseLoadSequence(
223 LoadEntryVector &LoadSequence,
224 const TargetTransformInfo::MemCmpExpansionOptions &Options,
225 bool IsUsedForZeroCmp) {
226 // This part of code attempts to optimize the LoadSequence by merging allowed
227 // subsequences into single loads of allowed sizes from
228 // `MemCmpExpansionOptions::AllowedTailExpansions`. If it is for zero
229 // comparison or if no allowed tail expansions are specified, we exit early.
230 if (IsUsedForZeroCmp || Options.AllowedTailExpansions.empty())
231 return;
232
233 while (LoadSequence.size() >= 2) {
234 auto Last = LoadSequence[LoadSequence.size() - 1];
235 auto PreLast = LoadSequence[LoadSequence.size() - 2];
236
237 // Exit the loop if the two sequences are not contiguous
238 if (PreLast.Offset + PreLast.LoadSize != Last.Offset)
239 break;
240
241 auto LoadSize = Last.LoadSize + PreLast.LoadSize;
242 if (find(Options.AllowedTailExpansions, LoadSize) ==
243 Options.AllowedTailExpansions.end())
244 break;
245
246 // Remove the last two sequences and replace with the combined sequence
247 LoadSequence.pop_back();
248 LoadSequence.pop_back();
249 LoadSequence.emplace_back(PreLast.Offset, LoadSize);
250 }
251}
252
253// Initialize the basic block structure required for expansion of memcmp call
254// with given maximum load size and memcmp size parameter.
255// This structure includes:
256// 1. A list of load compare blocks - LoadCmpBlocks.
257// 2. An EndBlock, split from original instruction point, which is the block to
258// return from.
259// 3. ResultBlock, block to branch to for early exit when a
260// LoadCmpBlock finds a difference.
261MemCmpExpansion::MemCmpExpansion(
262 CallInst *const CI, uint64_t Size,
263 const TargetTransformInfo::MemCmpExpansionOptions &Options,
264 const bool IsUsedForZeroCmp, const DataLayout &TheDataLayout,
265 DomTreeUpdater *DTU)
266 : CI(CI), Size(Size), NumLoadsPerBlockForZeroCmp(Options.NumLoadsPerBlock),
267 IsUsedForZeroCmp(IsUsedForZeroCmp), DL(TheDataLayout), DTU(DTU),
268 Builder(CI) {
269 assert(Size > 0 && "zero blocks");
270 // Scale the max size down if the target can load more bytes than we need.
271 llvm::ArrayRef<unsigned> LoadSizes(Options.LoadSizes);
272 while (!LoadSizes.empty() && LoadSizes.front() > Size) {
273 LoadSizes = LoadSizes.drop_front();
274 }
275 assert(!LoadSizes.empty() && "cannot load Size bytes");
276 MaxLoadSize = LoadSizes.front();
277 // Compute the decomposition.
278 unsigned GreedyNumLoadsNonOneByte = 0;
279 LoadSequence = computeGreedyLoadSequence(Size, LoadSizes, Options.MaxNumLoads,
280 GreedyNumLoadsNonOneByte);
281 NumLoadsNonOneByte = GreedyNumLoadsNonOneByte;
282 assert(LoadSequence.size() <= Options.MaxNumLoads && "broken invariant");
283 // If we allow overlapping loads and the load sequence is not already optimal,
284 // use overlapping loads.
285 if (Options.AllowOverlappingLoads &&
286 (LoadSequence.empty() || LoadSequence.size() > 2)) {
287 unsigned OverlappingNumLoadsNonOneByte = 0;
288 auto OverlappingLoads = computeOverlappingLoadSequence(
289 Size, MaxLoadSize, Options.MaxNumLoads, OverlappingNumLoadsNonOneByte);
290 if (!OverlappingLoads.empty() &&
291 (LoadSequence.empty() ||
292 OverlappingLoads.size() < LoadSequence.size())) {
293 LoadSequence = OverlappingLoads;
294 NumLoadsNonOneByte = OverlappingNumLoadsNonOneByte;
295 }
296 }
297 assert(LoadSequence.size() <= Options.MaxNumLoads && "broken invariant");
298 optimiseLoadSequence(LoadSequence, Options, IsUsedForZeroCmp);
299}
300
301unsigned MemCmpExpansion::getNumBlocks() {
302 if (IsUsedForZeroCmp)
303 return getNumLoads() / NumLoadsPerBlockForZeroCmp +
304 (getNumLoads() % NumLoadsPerBlockForZeroCmp != 0 ? 1 : 0);
305 return getNumLoads();
306}
307
308void MemCmpExpansion::createLoadCmpBlocks() {
309 for (unsigned i = 0; i < getNumBlocks(); i++) {
310 BasicBlock *BB = BasicBlock::Create(CI->getContext(), "loadbb",
311 EndBlock->getParent(), EndBlock);
312 LoadCmpBlocks.push_back(BB);
313 }
314}
315
316void MemCmpExpansion::createResultBlock() {
317 ResBlock.BB = BasicBlock::Create(CI->getContext(), "res_block",
318 EndBlock->getParent(), EndBlock);
319}
320
321MemCmpExpansion::LoadPair MemCmpExpansion::getLoadPair(Type *LoadSizeType,
322 Type *BSwapSizeType,
323 Type *CmpSizeType,
324 unsigned OffsetBytes) {
325 // Get the memory source at offset `OffsetBytes`.
326 Value *LhsSource = CI->getArgOperand(0);
327 Value *RhsSource = CI->getArgOperand(1);
328 Align LhsAlign = LhsSource->getPointerAlignment(DL);
329 Align RhsAlign = RhsSource->getPointerAlignment(DL);
330 if (OffsetBytes > 0) {
331 auto *ByteType = Type::getInt8Ty(CI->getContext());
332 LhsSource = Builder.CreateConstGEP1_64(ByteType, LhsSource, OffsetBytes);
333 RhsSource = Builder.CreateConstGEP1_64(ByteType, RhsSource, OffsetBytes);
334 LhsAlign = commonAlignment(LhsAlign, OffsetBytes);
335 RhsAlign = commonAlignment(RhsAlign, OffsetBytes);
336 }
337
338 // Create a constant or a load from the source.
339 Value *Lhs = nullptr;
340 if (auto *C = dyn_cast<Constant>(LhsSource))
341 Lhs = ConstantFoldLoadFromConstPtr(C, LoadSizeType, DL);
342 if (!Lhs)
343 Lhs = Builder.CreateAlignedLoad(LoadSizeType, LhsSource, LhsAlign);
344
345 Value *Rhs = nullptr;
346 if (auto *C = dyn_cast<Constant>(RhsSource))
347 Rhs = ConstantFoldLoadFromConstPtr(C, LoadSizeType, DL);
348 if (!Rhs)
349 Rhs = Builder.CreateAlignedLoad(LoadSizeType, RhsSource, RhsAlign);
350
351 // Zero extend if Byte Swap intrinsic has different type
352 if (BSwapSizeType && LoadSizeType != BSwapSizeType) {
353 Lhs = Builder.CreateZExt(Lhs, BSwapSizeType);
354 Rhs = Builder.CreateZExt(Rhs, BSwapSizeType);
355 }
356
357 // Swap bytes if required.
358 if (BSwapSizeType) {
360 CI->getModule(), Intrinsic::bswap, BSwapSizeType);
361 Lhs = Builder.CreateCall(Bswap, Lhs);
362 Rhs = Builder.CreateCall(Bswap, Rhs);
363 }
364
365 // Zero extend if required.
366 if (CmpSizeType != nullptr && CmpSizeType != Lhs->getType()) {
367 Lhs = Builder.CreateZExt(Lhs, CmpSizeType);
368 Rhs = Builder.CreateZExt(Rhs, CmpSizeType);
369 }
370 return {Lhs, Rhs};
371}
372
373// This function creates the IR instructions for loading and comparing 1 byte.
374// It loads 1 byte from each source of the memcmp parameters with the given
375// GEPIndex. It then subtracts the two loaded values and adds this result to the
376// final phi node for selecting the memcmp result.
377void MemCmpExpansion::emitLoadCompareByteBlock(unsigned BlockIndex,
378 unsigned OffsetBytes) {
379 BasicBlock *BB = LoadCmpBlocks[BlockIndex];
380 Builder.SetInsertPoint(BB);
381 const LoadPair Loads =
382 getLoadPair(Type::getInt8Ty(CI->getContext()), nullptr,
383 Type::getInt32Ty(CI->getContext()), OffsetBytes);
384 Value *Diff = Builder.CreateSub(Loads.Lhs, Loads.Rhs);
385
386 PhiRes->addIncoming(Diff, BB);
387
388 if (BlockIndex < (LoadCmpBlocks.size() - 1)) {
389 // Early exit branch if difference found to EndBlock. Otherwise, continue to
390 // next LoadCmpBlock,
391 Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_NE, Diff,
392 ConstantInt::get(Diff->getType(), 0));
393 BranchInst *CmpBr =
394 BranchInst::Create(EndBlock, LoadCmpBlocks[BlockIndex + 1], Cmp);
395 Builder.Insert(CmpBr);
396 if (DTU)
397 DTU->applyUpdates(
398 {{DominatorTree::Insert, BB, EndBlock},
399 {DominatorTree::Insert, BB, LoadCmpBlocks[BlockIndex + 1]}});
400 } else {
401 // The last block has an unconditional branch to EndBlock.
402 BranchInst *CmpBr = BranchInst::Create(EndBlock);
403 Builder.Insert(CmpBr);
404 if (DTU)
405 DTU->applyUpdates({{DominatorTree::Insert, BB, EndBlock}});
406 }
407}
408
409/// Generate an equality comparison for one or more pairs of loaded values.
410/// This is used in the case where the memcmp() call is compared equal or not
411/// equal to zero.
412Value *MemCmpExpansion::getCompareLoadPairs(unsigned BlockIndex,
413 unsigned &LoadIndex) {
414 assert(LoadIndex < getNumLoads() &&
415 "getCompareLoadPairs() called with no remaining loads");
416 std::vector<Value *> XorList, OrList;
417 Value *Diff = nullptr;
418
419 const unsigned NumLoads =
420 std::min(getNumLoads() - LoadIndex, NumLoadsPerBlockForZeroCmp);
421
422 // For a single-block expansion, start inserting before the memcmp call.
423 if (LoadCmpBlocks.empty())
424 Builder.SetInsertPoint(CI);
425 else
426 Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]);
427
428 Value *Cmp = nullptr;
429 // If we have multiple loads per block, we need to generate a composite
430 // comparison using xor+or. The type for the combinations is the largest load
431 // type.
432 IntegerType *const MaxLoadType =
433 NumLoads == 1 ? nullptr
434 : IntegerType::get(CI->getContext(), MaxLoadSize * 8);
435
436 for (unsigned i = 0; i < NumLoads; ++i, ++LoadIndex) {
437 const LoadEntry &CurLoadEntry = LoadSequence[LoadIndex];
438 const LoadPair Loads = getLoadPair(
439 IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8), nullptr,
440 MaxLoadType, CurLoadEntry.Offset);
441
442 if (NumLoads != 1) {
443 // If we have multiple loads per block, we need to generate a composite
444 // comparison using xor+or.
445 Diff = Builder.CreateXor(Loads.Lhs, Loads.Rhs);
446 Diff = Builder.CreateZExt(Diff, MaxLoadType);
447 XorList.push_back(Diff);
448 } else {
449 // If there's only one load per block, we just compare the loaded values.
450 Cmp = Builder.CreateICmpNE(Loads.Lhs, Loads.Rhs);
451 }
452 }
453
454 auto pairWiseOr = [&](std::vector<Value *> &InList) -> std::vector<Value *> {
455 std::vector<Value *> OutList;
456 for (unsigned i = 0; i < InList.size() - 1; i = i + 2) {
457 Value *Or = Builder.CreateOr(InList[i], InList[i + 1]);
458 OutList.push_back(Or);
459 }
460 if (InList.size() % 2 != 0)
461 OutList.push_back(InList.back());
462 return OutList;
463 };
464
465 if (!Cmp) {
466 // Pairwise OR the XOR results.
467 OrList = pairWiseOr(XorList);
468
469 // Pairwise OR the OR results until one result left.
470 while (OrList.size() != 1) {
471 OrList = pairWiseOr(OrList);
472 }
473
474 assert(Diff && "Failed to find comparison diff");
475 Cmp = Builder.CreateICmpNE(OrList[0], ConstantInt::get(Diff->getType(), 0));
476 }
477
478 return Cmp;
479}
480
481void MemCmpExpansion::emitLoadCompareBlockMultipleLoads(unsigned BlockIndex,
482 unsigned &LoadIndex) {
483 Value *Cmp = getCompareLoadPairs(BlockIndex, LoadIndex);
484
485 BasicBlock *NextBB = (BlockIndex == (LoadCmpBlocks.size() - 1))
486 ? EndBlock
487 : LoadCmpBlocks[BlockIndex + 1];
488 // Early exit branch if difference found to ResultBlock. Otherwise,
489 // continue to next LoadCmpBlock or EndBlock.
490 BasicBlock *BB = Builder.GetInsertBlock();
491 BranchInst *CmpBr = BranchInst::Create(ResBlock.BB, NextBB, Cmp);
493 CI->getFunction());
494 Builder.Insert(CmpBr);
495 if (DTU)
496 DTU->applyUpdates({{DominatorTree::Insert, BB, ResBlock.BB},
497 {DominatorTree::Insert, BB, NextBB}});
498
499 // Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0
500 // since early exit to ResultBlock was not taken (no difference was found in
501 // any of the bytes).
502 if (BlockIndex == LoadCmpBlocks.size() - 1) {
503 Value *Zero = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 0);
504 PhiRes->addIncoming(Zero, LoadCmpBlocks[BlockIndex]);
505 }
506}
507
508// This function creates the IR intructions for loading and comparing using the
509// given LoadSize. It loads the number of bytes specified by LoadSize from each
510// source of the memcmp parameters. It then does a subtract to see if there was
511// a difference in the loaded values. If a difference is found, it branches
512// with an early exit to the ResultBlock for calculating which source was
513// larger. Otherwise, it falls through to the either the next LoadCmpBlock or
514// the EndBlock if this is the last LoadCmpBlock. Loading 1 byte is handled with
515// a special case through emitLoadCompareByteBlock. The special handling can
516// simply subtract the loaded values and add it to the result phi node.
517void MemCmpExpansion::emitLoadCompareBlock(unsigned BlockIndex) {
518 // There is one load per block in this case, BlockIndex == LoadIndex.
519 const LoadEntry &CurLoadEntry = LoadSequence[BlockIndex];
520
521 if (CurLoadEntry.LoadSize == 1) {
522 MemCmpExpansion::emitLoadCompareByteBlock(BlockIndex, CurLoadEntry.Offset);
523 return;
524 }
525
526 Type *LoadSizeType =
527 IntegerType::get(CI->getContext(), CurLoadEntry.LoadSize * 8);
528 Type *BSwapSizeType =
529 DL.isLittleEndian()
531 PowerOf2Ceil(CurLoadEntry.LoadSize * 8))
532 : nullptr;
533 Type *MaxLoadType = IntegerType::get(
534 CI->getContext(),
535 std::max(MaxLoadSize, (unsigned)PowerOf2Ceil(CurLoadEntry.LoadSize)) * 8);
536 assert(CurLoadEntry.LoadSize <= MaxLoadSize && "Unexpected load type");
537
538 Builder.SetInsertPoint(LoadCmpBlocks[BlockIndex]);
539
540 const LoadPair Loads = getLoadPair(LoadSizeType, BSwapSizeType, MaxLoadType,
541 CurLoadEntry.Offset);
542
543 // Add the loaded values to the phi nodes for calculating memcmp result only
544 // if result is not used in a zero equality.
545 if (!IsUsedForZeroCmp) {
546 ResBlock.PhiSrc1->addIncoming(Loads.Lhs, LoadCmpBlocks[BlockIndex]);
547 ResBlock.PhiSrc2->addIncoming(Loads.Rhs, LoadCmpBlocks[BlockIndex]);
548 }
549
550 Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, Loads.Lhs, Loads.Rhs);
551 BasicBlock *NextBB = (BlockIndex == (LoadCmpBlocks.size() - 1))
552 ? EndBlock
553 : LoadCmpBlocks[BlockIndex + 1];
554 // Early exit branch if difference found to ResultBlock. Otherwise, continue
555 // to next LoadCmpBlock or EndBlock.
556 BasicBlock *BB = Builder.GetInsertBlock();
557 BranchInst *CmpBr = BranchInst::Create(NextBB, ResBlock.BB, Cmp);
559 CI->getFunction());
560 Builder.Insert(CmpBr);
561 if (DTU)
562 DTU->applyUpdates({{DominatorTree::Insert, BB, NextBB},
563 {DominatorTree::Insert, BB, ResBlock.BB}});
564
565 // Add a phi edge for the last LoadCmpBlock to Endblock with a value of 0
566 // since early exit to ResultBlock was not taken (no difference was found in
567 // any of the bytes).
568 if (BlockIndex == LoadCmpBlocks.size() - 1) {
569 Value *Zero = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 0);
570 PhiRes->addIncoming(Zero, LoadCmpBlocks[BlockIndex]);
571 }
572}
573
574// This function populates the ResultBlock with a sequence to calculate the
575// memcmp result. It compares the two loaded source values and returns -1 if
576// src1 < src2 and 1 if src1 > src2.
577void MemCmpExpansion::emitMemCmpResultBlock() {
578 // Special case: if memcmp result is used in a zero equality, result does not
579 // need to be calculated and can simply return 1.
580 if (IsUsedForZeroCmp) {
581 BasicBlock::iterator InsertPt = ResBlock.BB->getFirstInsertionPt();
582 Builder.SetInsertPoint(ResBlock.BB, InsertPt);
583 Value *Res = ConstantInt::get(Type::getInt32Ty(CI->getContext()), 1);
584 PhiRes->addIncoming(Res, ResBlock.BB);
585 BranchInst *NewBr = BranchInst::Create(EndBlock);
586 Builder.Insert(NewBr);
587 if (DTU)
588 DTU->applyUpdates({{DominatorTree::Insert, ResBlock.BB, EndBlock}});
589 return;
590 }
591 BasicBlock::iterator InsertPt = ResBlock.BB->getFirstInsertionPt();
592 Builder.SetInsertPoint(ResBlock.BB, InsertPt);
593
594 Value *Cmp = Builder.CreateICmp(ICmpInst::ICMP_ULT, ResBlock.PhiSrc1,
595 ResBlock.PhiSrc2);
596
597 Value *Res =
598 Builder.CreateSelect(Cmp, Constant::getAllOnesValue(Builder.getInt32Ty()),
599 ConstantInt::get(Builder.getInt32Ty(), 1));
601 DEBUG_TYPE, CI->getFunction());
602
603 PhiRes->addIncoming(Res, ResBlock.BB);
604 BranchInst *NewBr = BranchInst::Create(EndBlock);
605 Builder.Insert(NewBr);
606 if (DTU)
607 DTU->applyUpdates({{DominatorTree::Insert, ResBlock.BB, EndBlock}});
608}
609
610void MemCmpExpansion::setupResultBlockPHINodes() {
611 Type *MaxLoadType = IntegerType::get(CI->getContext(), MaxLoadSize * 8);
612 Builder.SetInsertPoint(ResBlock.BB);
613 // Note: this assumes one load per block.
614 ResBlock.PhiSrc1 =
615 Builder.CreatePHI(MaxLoadType, NumLoadsNonOneByte, "phi.src1");
616 ResBlock.PhiSrc2 =
617 Builder.CreatePHI(MaxLoadType, NumLoadsNonOneByte, "phi.src2");
618}
619
620void MemCmpExpansion::setupEndBlockPHINodes() {
621 Builder.SetInsertPoint(EndBlock, EndBlock->begin());
622 PhiRes = Builder.CreatePHI(Type::getInt32Ty(CI->getContext()), 2, "phi.res");
623}
624
625Value *MemCmpExpansion::getMemCmpExpansionZeroCase() {
626 unsigned LoadIndex = 0;
627 // This loop populates each of the LoadCmpBlocks with the IR sequence to
628 // handle multiple loads per block.
629 for (unsigned I = 0; I < getNumBlocks(); ++I) {
630 emitLoadCompareBlockMultipleLoads(I, LoadIndex);
631 }
632
633 emitMemCmpResultBlock();
634 return PhiRes;
635}
636
637/// A memcmp expansion that compares equality with 0 and only has one block of
638/// load and compare can bypass the compare, branch, and phi IR that is required
639/// in the general case.
640Value *MemCmpExpansion::getMemCmpEqZeroOneBlock() {
641 unsigned LoadIndex = 0;
642 Value *Cmp = getCompareLoadPairs(0, LoadIndex);
643 assert(LoadIndex == getNumLoads() && "some entries were not consumed");
644 return Builder.CreateZExt(Cmp, Type::getInt32Ty(CI->getContext()));
645}
646
647/// A memcmp expansion that only has one block of load and compare can bypass
648/// the compare, branch, and phi IR that is required in the general case.
649/// This function also analyses users of memcmp, and if there is only one user
650/// from which we can conclude that only 2 out of 3 memcmp outcomes really
651/// matter, then it generates more efficient code with only one comparison.
652Value *MemCmpExpansion::getMemCmpOneBlock() {
653 bool NeedsBSwap = DL.isLittleEndian() && Size != 1;
654 Type *LoadSizeType = IntegerType::get(CI->getContext(), Size * 8);
655 Type *BSwapSizeType =
656 NeedsBSwap ? IntegerType::get(CI->getContext(), PowerOf2Ceil(Size * 8))
657 : nullptr;
658 Type *MaxLoadType =
660 std::max(MaxLoadSize, (unsigned)PowerOf2Ceil(Size)) * 8);
661
662 // The i8 and i16 cases don't need compares. We zext the loaded values and
663 // subtract them to get the suitable negative, zero, or positive i32 result.
664 if (Size == 1 || Size == 2) {
665 const LoadPair Loads = getLoadPair(LoadSizeType, BSwapSizeType,
666 Builder.getInt32Ty(), /*Offset*/ 0);
667 return Builder.CreateSub(Loads.Lhs, Loads.Rhs);
668 }
669
670 const LoadPair Loads = getLoadPair(LoadSizeType, BSwapSizeType, MaxLoadType,
671 /*Offset*/ 0);
672
673 // If a user of memcmp cares only about two outcomes, for example:
674 // bool result = memcmp(a, b, NBYTES) > 0;
675 // We can generate more optimal code with a smaller number of operations
676 if (CI->hasOneUser()) {
677 auto *UI = cast<Instruction>(*CI->user_begin());
678 CmpPredicate Pred = ICmpInst::Predicate::BAD_ICMP_PREDICATE;
679 bool NeedsZExt = false;
680 // This is a special case because instead of checking if the result is less
681 // than zero:
682 // bool result = memcmp(a, b, NBYTES) < 0;
683 // Compiler is clever enough to generate the following code:
684 // bool result = memcmp(a, b, NBYTES) >> 31;
685 if (match(UI,
686 m_LShr(m_Value(),
687 m_SpecificInt(CI->getType()->getIntegerBitWidth() - 1)))) {
688 Pred = ICmpInst::ICMP_SLT;
689 NeedsZExt = true;
690 } else if (match(UI, m_SpecificICmp(ICmpInst::ICMP_SGT, m_Specific(CI),
691 m_AllOnes()))) {
692 // Adjust predicate as if it compared with 0.
693 Pred = ICmpInst::ICMP_SGE;
694 } else if (match(UI, m_SpecificICmp(ICmpInst::ICMP_SLT, m_Specific(CI),
695 m_One()))) {
696 // Adjust predicate as if it compared with 0.
697 Pred = ICmpInst::ICMP_SLE;
698 } else {
699 // In case of a successful match this call will set `Pred` variable
700 match(UI, m_ICmp(Pred, m_Specific(CI), m_Zero()));
701 }
702 // Generate new code and remove the original memcmp call and the user
703 if (ICmpInst::isSigned(Pred)) {
705 Loads.Lhs, Loads.Rhs);
706 auto *Result = NeedsZExt ? Builder.CreateZExt(Cmp, UI->getType()) : Cmp;
707 UI->replaceAllUsesWith(Result);
708 UI->eraseFromParent();
709 CI->eraseFromParent();
710 return nullptr;
711 }
712 }
713
714 // The result of memcmp is negative, zero, or positive.
715 return Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::ucmp,
716 {Loads.Lhs, Loads.Rhs});
717}
718
719// This function expands the memcmp call into an inline expansion and returns
720// the memcmp result. Returns nullptr if the memcmp is already replaced.
721Value *MemCmpExpansion::getMemCmpExpansion() {
722 // Create the basic block framework for a multi-block expansion.
723 if (getNumBlocks() != 1) {
724 BasicBlock *StartBlock = CI->getParent();
725 EndBlock = SplitBlock(StartBlock, CI, DTU, /*LI=*/nullptr,
726 /*MSSAU=*/nullptr, "endblock");
727 setupEndBlockPHINodes();
728 createResultBlock();
729
730 // If return value of memcmp is not used in a zero equality, we need to
731 // calculate which source was larger. The calculation requires the
732 // two loaded source values of each load compare block.
733 // These will be saved in the phi nodes created by setupResultBlockPHINodes.
734 if (!IsUsedForZeroCmp) setupResultBlockPHINodes();
735
736 // Create the number of required load compare basic blocks.
737 createLoadCmpBlocks();
738
739 // Update the terminator added by SplitBlock to branch to the first
740 // LoadCmpBlock.
741 StartBlock->getTerminator()->setSuccessor(0, LoadCmpBlocks[0]);
742 if (DTU)
743 DTU->applyUpdates({{DominatorTree::Insert, StartBlock, LoadCmpBlocks[0]},
744 {DominatorTree::Delete, StartBlock, EndBlock}});
745 }
746
748
749 if (IsUsedForZeroCmp)
750 return getNumBlocks() == 1 ? getMemCmpEqZeroOneBlock()
751 : getMemCmpExpansionZeroCase();
752
753 if (getNumBlocks() == 1)
754 return getMemCmpOneBlock();
755
756 for (unsigned I = 0; I < getNumBlocks(); ++I) {
757 emitLoadCompareBlock(I);
758 }
759
760 emitMemCmpResultBlock();
761 return PhiRes;
762}
763
764// This function checks to see if an expansion of memcmp can be generated.
765// It checks for constant compare size that is less than the max inline size.
766// If an expansion cannot occur, returns false to leave as a library call.
767// Otherwise, the library call is replaced with a new IR instruction sequence.
768/// We want to transform:
769/// %call = call signext i32 @memcmp(i8* %0, i8* %1, i64 15)
770/// To:
771/// loadbb:
772/// %0 = bitcast i32* %buffer2 to i8*
773/// %1 = bitcast i32* %buffer1 to i8*
774/// %2 = bitcast i8* %1 to i64*
775/// %3 = bitcast i8* %0 to i64*
776/// %4 = load i64, i64* %2
777/// %5 = load i64, i64* %3
778/// %6 = call i64 @llvm.bswap.i64(i64 %4)
779/// %7 = call i64 @llvm.bswap.i64(i64 %5)
780/// %8 = sub i64 %6, %7
781/// %9 = icmp ne i64 %8, 0
782/// br i1 %9, label %res_block, label %loadbb1
783/// res_block: ; preds = %loadbb2,
784/// %loadbb1, %loadbb
785/// %phi.src1 = phi i64 [ %6, %loadbb ], [ %22, %loadbb1 ], [ %36, %loadbb2 ]
786/// %phi.src2 = phi i64 [ %7, %loadbb ], [ %23, %loadbb1 ], [ %37, %loadbb2 ]
787/// %10 = icmp ult i64 %phi.src1, %phi.src2
788/// %11 = select i1 %10, i32 -1, i32 1
789/// br label %endblock
790/// loadbb1: ; preds = %loadbb
791/// %12 = bitcast i32* %buffer2 to i8*
792/// %13 = bitcast i32* %buffer1 to i8*
793/// %14 = bitcast i8* %13 to i32*
794/// %15 = bitcast i8* %12 to i32*
795/// %16 = getelementptr i32, i32* %14, i32 2
796/// %17 = getelementptr i32, i32* %15, i32 2
797/// %18 = load i32, i32* %16
798/// %19 = load i32, i32* %17
799/// %20 = call i32 @llvm.bswap.i32(i32 %18)
800/// %21 = call i32 @llvm.bswap.i32(i32 %19)
801/// %22 = zext i32 %20 to i64
802/// %23 = zext i32 %21 to i64
803/// %24 = sub i64 %22, %23
804/// %25 = icmp ne i64 %24, 0
805/// br i1 %25, label %res_block, label %loadbb2
806/// loadbb2: ; preds = %loadbb1
807/// %26 = bitcast i32* %buffer2 to i8*
808/// %27 = bitcast i32* %buffer1 to i8*
809/// %28 = bitcast i8* %27 to i16*
810/// %29 = bitcast i8* %26 to i16*
811/// %30 = getelementptr i16, i16* %28, i16 6
812/// %31 = getelementptr i16, i16* %29, i16 6
813/// %32 = load i16, i16* %30
814/// %33 = load i16, i16* %31
815/// %34 = call i16 @llvm.bswap.i16(i16 %32)
816/// %35 = call i16 @llvm.bswap.i16(i16 %33)
817/// %36 = zext i16 %34 to i64
818/// %37 = zext i16 %35 to i64
819/// %38 = sub i64 %36, %37
820/// %39 = icmp ne i64 %38, 0
821/// br i1 %39, label %res_block, label %loadbb3
822/// loadbb3: ; preds = %loadbb2
823/// %40 = bitcast i32* %buffer2 to i8*
824/// %41 = bitcast i32* %buffer1 to i8*
825/// %42 = getelementptr i8, i8* %41, i8 14
826/// %43 = getelementptr i8, i8* %40, i8 14
827/// %44 = load i8, i8* %42
828/// %45 = load i8, i8* %43
829/// %46 = zext i8 %44 to i32
830/// %47 = zext i8 %45 to i32
831/// %48 = sub i32 %46, %47
832/// br label %endblock
833/// endblock: ; preds = %res_block,
834/// %loadbb3
835/// %phi.res = phi i32 [ %48, %loadbb3 ], [ %11, %res_block ]
836/// ret i32 %phi.res
837static bool expandMemCmp(CallInst *CI, const TargetTransformInfo *TTI,
838 const TargetLowering *TLI, const DataLayout *DL,
839 ProfileSummaryInfo *PSI, BlockFrequencyInfo *BFI,
840 DomTreeUpdater *DTU, const bool IsBCmp) {
841 NumMemCmpCalls++;
842
843 // Early exit from expansion if -Oz.
844 if (CI->getFunction()->hasMinSize())
845 return false;
846
847 // Early exit from expansion if size is not a constant.
848 ConstantInt *SizeCast = dyn_cast<ConstantInt>(CI->getArgOperand(2));
849 if (!SizeCast) {
850 NumMemCmpNotConstant++;
851 return false;
852 }
853 const uint64_t SizeVal = SizeCast->getZExtValue();
854
855 if (SizeVal == 0) {
856 return false;
857 }
858 // TTI call to check if target would like to expand memcmp. Also, get the
859 // available load sizes.
860 const bool IsUsedForZeroCmp =
862 bool OptForSize = llvm::shouldOptimizeForSize(CI->getParent(), PSI, BFI);
863 auto Options = TTI->enableMemCmpExpansion(OptForSize,
864 IsUsedForZeroCmp);
865 if (!Options) return false;
866
867 if (MemCmpEqZeroNumLoadsPerBlock.getNumOccurrences())
869
870 if (OptForSize &&
871 MaxLoadsPerMemcmpOptSize.getNumOccurrences())
872 Options.MaxNumLoads = MaxLoadsPerMemcmpOptSize;
873
874 if (!OptForSize && MaxLoadsPerMemcmp.getNumOccurrences())
875 Options.MaxNumLoads = MaxLoadsPerMemcmp;
876
877 MemCmpExpansion Expansion(CI, SizeVal, Options, IsUsedForZeroCmp, *DL, DTU);
878
879 // Don't expand if this will require more loads than desired by the target.
880 if (Expansion.getNumLoads() == 0) {
881 NumMemCmpGreaterThanMax++;
882 return false;
883 }
884
885 NumMemCmpInlined++;
886
887 if (Value *Res = Expansion.getMemCmpExpansion()) {
888 // Replace call with result of expansion and erase call.
889 CI->replaceAllUsesWith(Res);
890 CI->eraseFromParent();
891 }
892
893 return true;
894}
895
896// Returns true if a change was made.
897static bool runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI,
898 const TargetTransformInfo *TTI, const TargetLowering *TL,
899 const DataLayout &DL, ProfileSummaryInfo *PSI,
900 BlockFrequencyInfo *BFI, DomTreeUpdater *DTU);
901
902static PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI,
903 const TargetTransformInfo *TTI,
904 const TargetLowering *TL,
905 ProfileSummaryInfo *PSI,
906 BlockFrequencyInfo *BFI, DominatorTree *DT);
907
908class ExpandMemCmpLegacyPass : public FunctionPass {
909public:
910 static char ID;
911
912 ExpandMemCmpLegacyPass() : FunctionPass(ID) {
914 }
915
916 bool runOnFunction(Function &F) override {
917 if (skipFunction(F)) return false;
918
919 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
920 if (!TPC) {
921 return false;
922 }
923 const TargetLowering* TL =
924 TPC->getTM<TargetMachine>().getSubtargetImpl(F)->getTargetLowering();
925
926 const TargetLibraryInfo *TLI =
927 &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
928 const TargetTransformInfo *TTI =
929 &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
930 auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
931 auto *BFI = (PSI && PSI->hasProfileSummary()) ?
932 &getAnalysis<LazyBlockFrequencyInfoPass>().getBFI() :
933 nullptr;
934 DominatorTree *DT = nullptr;
935 if (auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>())
936 DT = &DTWP->getDomTree();
937 auto PA = runImpl(F, TLI, TTI, TL, PSI, BFI, DT);
938 return !PA.areAllPreserved();
939 }
940
941private:
942 void getAnalysisUsage(AnalysisUsage &AU) const override {
943 AU.addRequired<TargetLibraryInfoWrapperPass>();
944 AU.addRequired<TargetTransformInfoWrapperPass>();
945 AU.addRequired<ProfileSummaryInfoWrapperPass>();
946 AU.addPreserved<DominatorTreeWrapperPass>();
948 FunctionPass::getAnalysisUsage(AU);
949 }
950};
951
952bool runOnBlock(BasicBlock &BB, const TargetLibraryInfo *TLI,
953 const TargetTransformInfo *TTI, const TargetLowering *TL,
954 const DataLayout &DL, ProfileSummaryInfo *PSI,
955 BlockFrequencyInfo *BFI, DomTreeUpdater *DTU) {
956 for (Instruction &I : BB) {
957 CallInst *CI = dyn_cast<CallInst>(&I);
958 if (!CI) {
959 continue;
960 }
961 LibFunc Func;
962 if (TLI->getLibFunc(*CI, Func) &&
963 (Func == LibFunc_memcmp || Func == LibFunc_bcmp) &&
964 expandMemCmp(CI, TTI, TL, &DL, PSI, BFI, DTU, Func == LibFunc_bcmp)) {
965 return true;
966 }
967 }
968 return false;
969}
970
971PreservedAnalyses runImpl(Function &F, const TargetLibraryInfo *TLI,
972 const TargetTransformInfo *TTI,
973 const TargetLowering *TL, ProfileSummaryInfo *PSI,
974 BlockFrequencyInfo *BFI, DominatorTree *DT) {
975 std::optional<DomTreeUpdater> DTU;
976 if (DT)
977 DTU.emplace(DT, DomTreeUpdater::UpdateStrategy::Lazy);
978
979 const DataLayout& DL = F.getDataLayout();
980 bool MadeChanges = false;
981 for (auto BBIt = F.begin(); BBIt != F.end();) {
982 if (runOnBlock(*BBIt, TLI, TTI, TL, DL, PSI, BFI, DTU ? &*DTU : nullptr)) {
983 MadeChanges = true;
984 // If changes were made, restart the function from the beginning, since
985 // the structure of the function was changed.
986 BBIt = F.begin();
987 } else {
988 ++BBIt;
989 }
990 }
991 if (MadeChanges)
992 for (BasicBlock &BB : F)
994 if (!MadeChanges)
995 return PreservedAnalyses::all();
996 PreservedAnalyses PA;
997 PA.preserve<DominatorTreeAnalysis>();
998 return PA;
999}
1000
1001} // namespace
1002
1005 const auto *TL = TM->getSubtargetImpl(F)->getTargetLowering();
1006 const auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
1007 const auto &TTI = FAM.getResult<TargetIRAnalysis>(F);
1008 auto *PSI = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F)
1009 .getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
1010 BlockFrequencyInfo *BFI = (PSI && PSI->hasProfileSummary())
1011 ? &FAM.getResult<BlockFrequencyAnalysis>(F)
1012 : nullptr;
1013 auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(F);
1014
1015 return runImpl(F, &TLI, &TTI, TL, PSI, BFI, DT);
1016}
1017
1018char ExpandMemCmpLegacyPass::ID = 0;
1019INITIALIZE_PASS_BEGIN(ExpandMemCmpLegacyPass, DEBUG_TYPE,
1020 "Expand memcmp() to load/stores", false, false)
1026INITIALIZE_PASS_END(ExpandMemCmpLegacyPass, DEBUG_TYPE,
1027 "Expand memcmp() to load/stores", false, false)
1028
1030 return new ExpandMemCmpLegacyPass();
1031}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
DXIL Intrinsic Expansion
static bool runOnFunction(Function &F, bool PostInlining)
static bool runImpl(Function &F, const TargetLowering &TLI, const LibcallLoweringInfo &Libcalls, AssumptionCache *AC)
Definition ExpandFp.cpp:994
static cl::opt< unsigned > MaxLoadsPerMemcmpOptSize("max-loads-per-memcmp-opt-size", cl::Hidden, cl::desc("Set maximum number of loads used in expanded memcmp for -Os/Oz"))
static cl::opt< unsigned > MaxLoadsPerMemcmp("max-loads-per-memcmp", cl::Hidden, cl::desc("Set maximum number of loads used in expanded memcmp"))
static cl::opt< unsigned > MemCmpEqZeroNumLoadsPerBlock("memcmp-num-loads-per-block", cl::Hidden, cl::init(1), cl::desc("The number of loads per basic block for inline expansion of " "memcmp that is only being compared against zero."))
#define DEBUG_TYPE
static LVOptions Options
Definition LVOptions.cpp:25
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file contains the declarations for profiling metadata utility functions.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
Target-Independent Code Generator Pass Configuration Options pass.
This pass exposes codegen information to IR-level passes.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
ArrayRef< T > drop_front(size_t N=1) const
Drop the first N elements of the array.
Definition ArrayRef.h:195
const T & front() const
front - Get the first element.
Definition ArrayRef.h:145
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:459
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
InstListType::iterator iterator
Instruction iterators...
Definition BasicBlock.h:170
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
Analysis pass which computes BlockFrequencyInfo.
BlockFrequencyInfo pass uses BlockFrequencyInfoImpl implementation to estimate IR basic block frequen...
static BranchInst * Create(BasicBlock *IfTrue, InsertPosition InsertBefore=nullptr)
Value * getArgOperand(unsigned i) const
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:163
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
Analysis pass which computes a DominatorTree.
Definition Dominators.h:283
Legacy analysis pass which computes a DominatorTree.
Definition Dominators.h:321
PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM)
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:703
void applyUpdates(ArrayRef< UpdateT > Updates)
Submit updates to all available trees.
Predicate getUnsignedPredicate() const
For example, EQ->EQ, SLE->ULE, UGT->UGT, etc.
Value * CreateConstGEP1_64(Type *Ty, Value *Ptr, uint64_t Idx0, const Twine &Name="")
Definition IRBuilder.h:1986
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Definition IRBuilder.h:1867
LLVM_ABI Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
IntegerType * getInt32Ty()
Fetch the type representing a 32-bit integer.
Definition IRBuilder.h:562
BasicBlock * GetInsertBlock() const
Definition IRBuilder.h:201
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition IRBuilder.h:247
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2336
LLVM_ABI CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition IRBuilder.h:2497
InstTy * Insert(InstTy *I, const Twine &Name="") const
Insert and return the specified instruction.
Definition IRBuilder.h:172
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1420
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition IRBuilder.h:2085
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition IRBuilder.h:2511
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition IRBuilder.h:207
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:1599
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2442
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
Definition IRBuilder.h:1573
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
LLVM_ABI void setSuccessor(unsigned Idx, BasicBlock *BB)
Update the specified successor to point at the provided block.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:318
This is an alternative analysis pass to BlockFrequencyInfoWrapperPass.
static void getLazyBFIAnalysisUsage(AnalysisUsage &AU)
Helper for client passes to set up the analysis usage on behalf of this pass.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132
An analysis pass based on legacy pass manager to deliver ProfileSummaryInfo.
bool hasProfileSummary() const
Returns true if profile summary is available.
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Wrapper pass for TargetTransformInfo.
LLVM_ABI MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const
LLVM_ABI unsigned getIntegerBitWidth() const
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
user_iterator user_begin()
Definition Value.h:402
LLVM_ABI bool hasOneUser() const
Return true if there is exactly one user of this value.
Definition Value.cpp:166
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:546
LLVM_ABI Align getPointerAlignment(const DataLayout &DL) const
Returns an alignment of the pointer value.
Definition Value.cpp:956
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1099
const ParentTy * getParent() const
Definition ilist_node.h:34
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
initializer< Ty > init(const Ty &Val)
NodeAddr< FuncNode * > Func
Definition RDFGraph.h:393
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
auto find(R &&Range, const T &Val)
Provide wrappers to std::find which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1751
LLVM_ABI bool isOnlyUsedInZeroEqualityComparison(const Instruction *CxtI)
LLVM_ABI void setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, StringRef PassName, const Function *F=nullptr)
Like setExplicitlyUnknownBranchWeights(...), but only sets unknown branch weights in the new instruct...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
OuterAnalysisManagerProxy< ModuleAnalysisManager, Function > ModuleAnalysisManagerFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
LLVM_ABI bool shouldOptimizeForSize(const MachineFunction *MF, ProfileSummaryInfo *PSI, const MachineBlockFrequencyInfo *BFI, PGSOQueryType QueryType=PGSOQueryType::Other)
Returns true if machine function MF is suggested to be size-optimized based on the profile.
LLVM_ABI bool SimplifyInstructionsInBlock(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
Scan the specified basic block and try to simplify any instructions in it and recursively delete dead...
Definition Local.cpp:721
uint64_t PowerOf2Ceil(uint64_t A)
Returns the power of two which is greater than or equal to the given value.
Definition MathExtras.h:385
LLVM_ABI void initializeExpandMemCmpLegacyPassPass(PassRegistry &)
LLVM_ABI FunctionPass * createExpandMemCmpLegacyPass()
TargetTransformInfo TTI
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
@ Or
Bitwise or logical OR of integers.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="", bool Before=false)
Split the specified block at the specified instruction.
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
LLVM_ABI Constant * ConstantFoldLoadFromConstPtr(Constant *C, Type *Ty, APInt Offset, const DataLayout &DL)
Return the value that a load from C with offset Offset would produce if it is constant and determinab...
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.